diff --git a/shortcuts/doc/docs_fetch_im_markdown.go b/shortcuts/doc/docs_fetch_im_markdown.go
new file mode 100644
index 00000000..13b37002
--- /dev/null
+++ b/shortcuts/doc/docs_fetch_im_markdown.go
@@ -0,0 +1,861 @@
+// Copyright (c) 2026 Lark Technologies Pte. Ltd.
+// SPDX-License-Identifier: MIT
+
+package doc
+
+import (
+ "fmt"
+ "html"
+ "net/url"
+ "regexp"
+ "strings"
+ "unicode/utf8"
+)
+
+type imMarkdownContext struct {
+ baseURL string
+ blockquoteDepth int
+}
+
+type imMarkdownHandleFunc func(segment, inner string, attrs map[string]string, imCtx imMarkdownContext) string
+
+type imMarkdownTagHandler struct {
+ closeRE *regexp.Regexp
+ handle imMarkdownHandleFunc
+}
+
+func registerIMMarkdownHandler(tag string, handle imMarkdownHandleFunc) {
+ imMarkdownHandlers[tag] = imMarkdownTagHandler{
+ closeRE: regexp.MustCompile(`(?is)<(/?)` + regexp.QuoteMeta(tag) + `(?:\s[^<>]*?)?\s*/?>`),
+ handle: handle,
+ }
+}
+
+var (
+ imMarkdownTagStartRE = regexp.MustCompile(`(?s)<([A-Za-z][A-Za-z0-9:_-]*)(?:\s[^<>]*?)?\s*/?>`)
+ imMarkdownAttrRE = regexp.MustCompile(`([A-Za-z_:][A-Za-z0-9_:.-]*)\s*=\s*(?:"([^"]*)"|'([^']*)')`)
+ imMarkdownRowTagRE = regexp.MustCompile(`(?is)<(/?)tr\b[^>]*?\s*/?>`)
+ imMarkdownCellTagRE = regexp.MustCompile(`(?is)<(/?)t[dh]\b[^>]*?\s*/?>`)
+ imMarkdownCellBreakRE = regexp.MustCompile(`(?i) `)
+ imMarkdownAnyTagRE = regexp.MustCompile(`(?s)?([A-Za-z][A-Za-z0-9:_-]*)(?:\s[^<>]*?)?>`)
+ imMarkdownLinkRE = regexp.MustCompile(`(?is)]*\bhref=(?:"([^"]*)"|'([^']*)')[^>]*>(.*?) `)
+ imMarkdownCodeBlockRE = regexp.MustCompile(`(?is)^\s*]*?)?>(.*?)\s*$`)
+ imMarkdownLiOpenRE = regexp.MustCompile(`(?is)
]*?)?>`)
+ imMarkdownLiCloseRE = regexp.MustCompile(`(?is)<(/?)li(?:\s[^<>]*?)?\s*/?>`)
+)
+
+var imMarkdownHandlers = map[string]imMarkdownTagHandler{}
+
+func init() {
+ registerIMMarkdownHandler("title", handleIMMarkdownTitle)
+ for level := 1; level <= 9; level++ {
+ registerIMMarkdownHandler(fmt.Sprintf("h%d", level), handleIMMarkdownHeading(level))
+ }
+ registerIMMarkdownHandler("p", handleIMMarkdownParagraph)
+ registerIMMarkdownHandler("ul", handleIMMarkdownUnorderedList)
+ registerIMMarkdownHandler("ol", handleIMMarkdownOrderedList)
+ registerIMMarkdownHandler("li", handleIMMarkdownListItem)
+ registerIMMarkdownHandler("callout", handleIMMarkdownCallout)
+ registerIMMarkdownHandler("blockquote", handleIMMarkdownBlockquote)
+ registerIMMarkdownHandler("grid", handleIMMarkdownPassthroughContainer)
+ registerIMMarkdownHandler("column", handleIMMarkdownColumn)
+ registerIMMarkdownHandler("table", handleIMMarkdownTable)
+ registerIMMarkdownHandler("colgroup", handleIMMarkdownDiscard)
+ registerIMMarkdownHandler("col", handleIMMarkdownDiscard)
+ registerIMMarkdownHandler("pre", handleIMMarkdownPre)
+ registerIMMarkdownHandler("code", handleIMMarkdownCode)
+ registerIMMarkdownHandler("latex", handleIMMarkdownLatex)
+ registerIMMarkdownHandler("hr", handleIMMarkdownHorizontalRule)
+ registerIMMarkdownHandler("img", handleIMMarkdownImage)
+ registerIMMarkdownHandler("figure", handleIMMarkdownDiscard)
+ registerIMMarkdownHandler("source", handleIMMarkdownSource)
+ registerIMMarkdownHandler("button", handleIMMarkdownDiscard)
+ registerIMMarkdownHandler("time", handleIMMarkdownDiscard)
+ registerIMMarkdownHandler("whiteboard", handleIMMarkdownInlineCode)
+ registerIMMarkdownHandler("sheet", handleIMMarkdownSheet)
+ registerIMMarkdownHandler("task", handleIMMarkdownConditionalResourceLabel("任务", "task-id", "guid", "token", "id"))
+ registerIMMarkdownHandler("chat_card", handleIMMarkdownConditionalResourceLabel("群聊卡片", "chat-id", "chat_id", "id"))
+ registerIMMarkdownHandler("bitable", handleIMMarkdownResourceLabel("多维表格"))
+ registerIMMarkdownHandler("base_refer", handleIMMarkdownResourceLabel("多维表格"))
+ registerIMMarkdownHandler("okr", handleIMMarkdownResourceLabel("OKR"))
+ registerIMMarkdownHandler("poll", handleIMMarkdownDiscard)
+ registerIMMarkdownHandler("agenda", handleIMMarkdownDiscard)
+ registerIMMarkdownHandler("folder_manager", handleIMMarkdownDiscard)
+ registerIMMarkdownHandler("wiki_catalog", handleIMMarkdownDiscard)
+ registerIMMarkdownHandler("wiki_recent_update", handleIMMarkdownDiscard)
+ registerIMMarkdownHandler("chart_refer_host_perm", handleIMMarkdownDiscard)
+ registerIMMarkdownHandler("synced_reference", handleIMMarkdownDiscard)
+ registerIMMarkdownHandler("synced-source", handleIMMarkdownDiscard)
+ registerIMMarkdownHandler("mindnote", handleIMMarkdownDiscard)
+ registerIMMarkdownHandler("bookmark", handleIMMarkdownBookmark)
+ registerIMMarkdownHandler("cite", handleIMMarkdownCite)
+ registerIMMarkdownHandler("b", handleIMMarkdownStrong)
+ registerIMMarkdownHandler("em", handleIMMarkdownEmphasis)
+ registerIMMarkdownHandler("del", handleIMMarkdownDelete)
+ registerIMMarkdownHandler("u", handleIMMarkdownPlainInline)
+ registerIMMarkdownHandler("span", handleIMMarkdownPlainInline)
+ registerIMMarkdownHandler("a", handleIMMarkdownAnchor)
+}
+
+func isIMMarkdownFetch(runtime interface{ Str(string) string }) bool {
+ return strings.TrimSpace(runtime.Str("doc-format")) == "im-markdown"
+}
+
+func applyFetchIMMarkdown(data map[string]interface{}, docInput string) {
+ doc, ok := data["document"].(map[string]interface{})
+ if !ok {
+ return
+ }
+ content, ok := doc["content"].(string)
+ if !ok {
+ return
+ }
+ doc["content"] = convertToIMMarkdown(content, newIMMarkdownContext(docInput))
+}
+
+func newIMMarkdownContext(docInput string) imMarkdownContext {
+ base := "https://larkoffice.com"
+ raw := strings.TrimSpace(docInput)
+ if extracted, ok := imMarkdownBaseURLFromInput(raw); ok {
+ base = extracted
+ }
+ return imMarkdownContext{baseURL: base}
+}
+
+func (c imMarkdownContext) withBlockquote() imMarkdownContext {
+ c.blockquoteDepth++
+ return c
+}
+
+func (c imMarkdownContext) inBlockquote() bool {
+ return c.blockquoteDepth > 0
+}
+
+// imMarkdownBaseURLFromInput keeps the tenant host from --doc when it is a URL
+// so generated doc/sheet links point back to the same tenant. parseDocumentRef
+// intentionally strips host information, so it cannot serve this formatting path.
+func imMarkdownBaseURLFromInput(raw string) (string, bool) {
+ if raw == "" {
+ return "", false
+ }
+ if u, err := url.Parse(raw); err == nil && u.Scheme != "" && u.Host != "" {
+ return u.Scheme + "://" + u.Host, true
+ }
+ for _, marker := range []string{"/docx/", "/wiki/", "/doc/"} {
+ idx := strings.Index(raw, marker)
+ if idx <= 0 {
+ continue
+ }
+ candidate := strings.Trim(raw[:idx], "/")
+ if candidate == "" {
+ continue
+ }
+ if u, err := url.Parse(candidate); err == nil && u.Scheme != "" && u.Host != "" {
+ return u.Scheme + "://" + u.Host, true
+ }
+ if u, err := url.Parse("https://" + candidate); err == nil && u.Host != "" && strings.Contains(u.Host, ".") {
+ return "https://" + u.Host, true
+ }
+ }
+ return "", false
+}
+
+func convertToIMMarkdown(content string, imCtx imMarkdownContext) string {
+ var out strings.Builder
+ for offset := 0; offset < len(content); {
+ // Scan only to the next XML-like opening tag. Plain Markdown text between
+ // registered tags is copied unchanged, so ordinary Markdown is not re-parsed.
+ loc := imMarkdownTagStartRE.FindStringSubmatchIndex(content[offset:])
+ if loc == nil {
+ out.WriteString(content[offset:])
+ break
+ }
+ start := offset + loc[0]
+ openEnd := offset + loc[1]
+ tag := strings.ToLower(content[offset+loc[2] : offset+loc[3]])
+ handler, ok := imMarkdownHandlers[tag]
+ if !ok {
+ // Unknown tags are left intact. im-markdown only downgrades tags with
+ // explicit handlers so future server output does not get guessed at.
+ out.WriteString(content[offset:openEnd])
+ offset = openEnd
+ continue
+ }
+
+ out.WriteString(content[offset:start])
+ opening := content[start:openEnd]
+ attrs := parseIMMarkdownAttrs(opening)
+ if isSelfClosingIMMarkdownTag(opening) {
+ out.WriteString(handler.handle(opening, "", attrs, imCtx))
+ offset = openEnd
+ continue
+ }
+
+ // Use the handler's precompiled close regexp to find the matching end tag.
+ // Depth tracking keeps nested same-name containers paired correctly.
+ closeStart, closeEnd, found := findIMMarkdownClosingTag(content, openEnd, handler)
+ if !found {
+ // Malformed or truncated fragments are preserved as-is from the opening
+ // tag onward; do not drop content when the XML-ish structure is incomplete.
+ out.WriteString(content[start:])
+ break
+ }
+ segment := content[start:closeEnd]
+ inner := content[openEnd:closeStart]
+ out.WriteString(handler.handle(segment, inner, attrs, imCtx))
+ offset = closeEnd
+ }
+ return out.String()
+}
+
+func findIMMarkdownClosingTag(content string, from int, handler imMarkdownTagHandler) (int, int, bool) {
+ depth := 1
+ for _, loc := range handler.closeRE.FindAllStringSubmatchIndex(content[from:], -1) {
+ start := from + loc[0]
+ end := from + loc[1]
+ token := content[start:end]
+ if loc[2] >= 0 && content[from+loc[2]:from+loc[3]] == "/" {
+ depth--
+ if depth == 0 {
+ return start, end, true
+ }
+ continue
+ }
+ if !isSelfClosingIMMarkdownTag(token) {
+ depth++
+ }
+ }
+ return 0, 0, false
+}
+
+func parseIMMarkdownAttrs(opening string) map[string]string {
+ attrs := map[string]string{}
+ for _, match := range imMarkdownAttrRE.FindAllStringSubmatch(opening, -1) {
+ value := match[2]
+ if value == "" {
+ value = match[3]
+ }
+ attrs[strings.ToLower(match[1])] = html.UnescapeString(value)
+ }
+ return attrs
+}
+
+func isSelfClosingIMMarkdownTag(tag string) bool {
+ return strings.HasSuffix(strings.TrimSpace(tag), "/>")
+}
+
+func handleIMMarkdownTitle(_ string, inner string, _ map[string]string, imCtx imMarkdownContext) string {
+ text := strings.TrimSpace(convertToIMMarkdown(inner, imCtx))
+ if text == "" {
+ return ""
+ }
+ return "# " + text
+}
+
+func handleIMMarkdownHeading(level int) imMarkdownHandleFunc {
+ return func(_ string, inner string, _ map[string]string, imCtx imMarkdownContext) string {
+ text := strings.TrimSpace(convertToIMMarkdown(inner, imCtx))
+ if text == "" {
+ return ""
+ }
+ markdownLevel := level
+ if markdownLevel > 6 {
+ markdownLevel = 6
+ }
+ return strings.Repeat("#", markdownLevel) + " " + text
+ }
+}
+
+func handleIMMarkdownParagraph(_ string, inner string, _ map[string]string, imCtx imMarkdownContext) string {
+ body := strings.TrimSpace(convertToIMMarkdown(inner, imCtx))
+ if body == "" {
+ return ""
+ }
+ if imCtx.inBlockquote() {
+ return body + "\n"
+ }
+ return body
+}
+
+func handleIMMarkdownUnorderedList(_ string, inner string, _ map[string]string, imCtx imMarkdownContext) string {
+ return convertIMMarkdownListItems(inner, false, imCtx)
+}
+
+func handleIMMarkdownOrderedList(_ string, inner string, _ map[string]string, imCtx imMarkdownContext) string {
+ return convertIMMarkdownListItems(inner, true, imCtx)
+}
+
+func handleIMMarkdownListItem(_ string, inner string, attrs map[string]string, imCtx imMarkdownContext) string {
+ prefix := "-"
+ if seq := strings.TrimSpace(attrs["seq"]); seq != "" && seq != "auto" {
+ prefix = strings.TrimSuffix(seq, ".") + "."
+ }
+ body := strings.TrimSpace(convertToIMMarkdown(inner, imCtx))
+ if body == "" {
+ return ""
+ }
+ return prefix + " " + indentIMMarkdownListContinuation(body) + "\n"
+}
+
+func handleIMMarkdownCallout(_ string, inner string, attrs map[string]string, imCtx imMarkdownContext) string {
+ body := strings.TrimSpace(convertToIMMarkdown(inner, imCtx))
+ emoji := strings.TrimSpace(attrs["emoji"])
+ if emoji != "" {
+ if body == "" {
+ body = emoji
+ } else {
+ body = emoji + " " + body
+ }
+ }
+ if body == "" {
+ return "---\n---"
+ }
+ return fmt.Sprintf("---\n%s\n---", body)
+}
+
+func handleIMMarkdownBlockquote(_ string, inner string, _ map[string]string, imCtx imMarkdownContext) string {
+ body := strings.TrimSpace(convertToIMMarkdown(inner, imCtx.withBlockquote()))
+ if body == "" {
+ return ""
+ }
+ lines := strings.Split(body, "\n")
+ for i, line := range lines {
+ if strings.TrimSpace(line) == "" {
+ lines[i] = ">"
+ continue
+ }
+ lines[i] = "> " + line
+ }
+ return strings.Join(lines, "\n")
+}
+
+func handleIMMarkdownPassthroughContainer(_ string, inner string, _ map[string]string, imCtx imMarkdownContext) string {
+ return strings.TrimSpace(convertToIMMarkdown(inner, imCtx))
+}
+
+func handleIMMarkdownColumn(_ string, inner string, _ map[string]string, imCtx imMarkdownContext) string {
+ body := strings.TrimSpace(convertToIMMarkdown(inner, imCtx))
+ if body == "" {
+ return ""
+ }
+ return body + "\n"
+}
+
+func handleIMMarkdownDiscard(_ string, _ string, _ map[string]string, _ imMarkdownContext) string {
+ return ""
+}
+
+func handleIMMarkdownInlineCode(segment string, _ string, _ map[string]string, _ imMarkdownContext) string {
+ return imMarkdownInlineCode(segment)
+}
+
+func handleIMMarkdownPre(_ string, inner string, attrs map[string]string, _ imMarkdownContext) string {
+ lang := strings.TrimSpace(attrs["lang"])
+ code := strings.TrimSpace(inner)
+ if match := imMarkdownCodeBlockRE.FindStringSubmatch(code); match != nil {
+ code = match[1]
+ }
+ return imMarkdownFencedCode(html.UnescapeString(code), lang)
+}
+
+func handleIMMarkdownCode(_ string, inner string, _ map[string]string, _ imMarkdownContext) string {
+ return imMarkdownInlineCode(markdownPlainText(inner))
+}
+
+func handleIMMarkdownLatex(_ string, inner string, _ map[string]string, _ imMarkdownContext) string {
+ expr := strings.TrimSpace(markdownPlainText(inner))
+ if expr == "" {
+ return ""
+ }
+ return "$" + strings.ReplaceAll(expr, "$", `\$`) + "$"
+}
+
+func handleIMMarkdownHorizontalRule(_ string, _ string, _ map[string]string, _ imMarkdownContext) string {
+ return "---"
+}
+
+func handleIMMarkdownImage(_ string, _ string, attrs map[string]string, _ imMarkdownContext) string {
+ href := firstNonEmpty(attrs["href"], attrs["src"], attrs["url"])
+ if href == "" {
+ return ""
+ }
+ alt := firstNonEmpty(attrs["alt"], attrs["name"], attrs["title"])
+ return fmt.Sprintf("", escapeMarkdownLinkText(alt), escapeMarkdownLinkDestination(href))
+}
+
+func handleIMMarkdownSource(_ string, _ string, attrs map[string]string, _ imMarkdownContext) string {
+ name := strings.TrimSpace(attrs["name"])
+ if name == "" {
+ return ""
+ }
+ return imMarkdownInlineCode(name)
+}
+
+func handleIMMarkdownResourceLabel(label string) imMarkdownHandleFunc {
+ return func(_ string, _ string, _ map[string]string, _ imMarkdownContext) string {
+ return imMarkdownInlineCode(label)
+ }
+}
+
+func handleIMMarkdownConditionalResourceLabel(label string, attrNames ...string) imMarkdownHandleFunc {
+ return func(_ string, _ string, attrs map[string]string, _ imMarkdownContext) string {
+ for _, attrName := range attrNames {
+ if strings.TrimSpace(attrs[attrName]) != "" {
+ return imMarkdownInlineCode(label)
+ }
+ }
+ return ""
+ }
+}
+
+func handleIMMarkdownSheet(segment string, _ string, attrs map[string]string, imCtx imMarkdownContext) string {
+ token := strings.TrimSpace(attrs["token"])
+ if token == "" {
+ return imMarkdownInlineCode(segment)
+ }
+ label := "sheet"
+ if sheetID := strings.TrimSpace(attrs["sheet-id"]); sheetID != "" {
+ label = "sheet " + sheetID
+ }
+ return markdownLink(label, strings.TrimRight(imCtx.baseURL, "/")+"/sheets/"+token)
+}
+
+func handleIMMarkdownBookmark(segment string, inner string, attrs map[string]string, imCtx imMarkdownContext) string {
+ href := strings.TrimSpace(attrs["href"])
+ name := firstNonEmpty(attrs["name"], attrs["title"], markdownLinkLabelText(convertToIMMarkdown(inner, imCtx)), href)
+ if href == "" {
+ return name
+ }
+ return markdownLink(name, href)
+}
+
+func handleIMMarkdownStrong(_ string, inner string, _ map[string]string, imCtx imMarkdownContext) string {
+ body := strings.TrimSpace(convertToIMMarkdown(inner, imCtx))
+ if body == "" {
+ return ""
+ }
+ return "**" + body + "**"
+}
+
+func handleIMMarkdownEmphasis(_ string, inner string, _ map[string]string, imCtx imMarkdownContext) string {
+ body := strings.TrimSpace(convertToIMMarkdown(inner, imCtx))
+ if body == "" {
+ return ""
+ }
+ return "*" + body + "*"
+}
+
+func handleIMMarkdownDelete(_ string, inner string, _ map[string]string, imCtx imMarkdownContext) string {
+ body := strings.TrimSpace(convertToIMMarkdown(inner, imCtx))
+ if body == "" {
+ return ""
+ }
+ return "~~" + body + "~~"
+}
+
+func handleIMMarkdownPlainInline(_ string, inner string, _ map[string]string, imCtx imMarkdownContext) string {
+ return strings.TrimSpace(convertToIMMarkdown(inner, imCtx))
+}
+
+func handleIMMarkdownAnchor(_ string, inner string, attrs map[string]string, imCtx imMarkdownContext) string {
+ href := strings.TrimSpace(attrs["href"])
+ text := firstNonEmpty(markdownLinkLabelText(convertToIMMarkdown(inner, imCtx)), attrs["name"], attrs["title"], href)
+ if href == "" {
+ return text
+ }
+ return markdownLink(text, href)
+}
+
+func handleIMMarkdownCite(segment string, inner string, attrs map[string]string, imCtx imMarkdownContext) string {
+ switch strings.ToLower(strings.TrimSpace(attrs["type"])) {
+ case "user":
+ userID := firstNonEmpty(attrs["user-id"], attrs["open-id"], attrs["id"])
+ name := firstNonEmpty(attrs["user-name"], attrs["name"], markdownPlainText(inner), userID)
+ if userID == "" {
+ return name
+ }
+ return fmt.Sprintf(`%s `, html.EscapeString(userID), html.EscapeString(name))
+ case "doc":
+ title := firstNonEmpty(attrs["title"], attrs["name"], attrs["doc-id"], "document")
+ if href := firstNonEmpty(attrs["href"], attrs["url"]); href != "" {
+ return markdownLink(title, href)
+ }
+ docID := firstNonEmpty(attrs["doc-id"], attrs["token"])
+ if docID == "" {
+ return imMarkdownInlineCode(segment)
+ }
+ fileType := strings.Trim(strings.ToLower(firstNonEmpty(attrs["file-type"], "docx")), "/")
+ return markdownLink(title, strings.TrimRight(imCtx.baseURL, "/")+"/"+fileType+"/"+docID)
+ case "citation":
+ if text, href, ok := extractIMMarkdownInnerLink(inner); ok {
+ return markdownLink(text, href)
+ }
+ if href := firstNonEmpty(attrs["href"], attrs["url"]); href != "" {
+ return markdownLink(firstNonEmpty(attrs["title"], attrs["name"], href), href)
+ }
+ return markdownPlainText(convertToIMMarkdown(inner, imCtx))
+ default:
+ return imMarkdownInlineCode(segment)
+ }
+}
+
+func handleIMMarkdownTable(segment string, inner string, _ map[string]string, imCtx imMarkdownContext) string {
+ // Rows and cells are matched with tag-depth tracking instead of non-greedy
+ // regex captures. A table nested inside a cell can contain its own and
+ // ; treating those as the outer row/cell boundary corrupts the table.
+ rowBodies := extractIMMarkdownElementBodies(inner, imMarkdownRowTagRE)
+ if len(rowBodies) == 0 {
+ return imMarkdownInlineCode(segment)
+ }
+
+ rows := make([][]string, 0, len(rowBodies))
+ for _, rowBody := range rowBodies {
+ cellBodies := extractIMMarkdownElementBodies(rowBody, imMarkdownCellTagRE)
+ if len(cellBodies) == 0 {
+ continue
+ }
+ row := make([]string, 0, len(cellBodies))
+ for _, cellBody := range cellBodies {
+ row = append(row, normalizeIMMarkdownTableCell(convertToIMMarkdown(cellBody, imCtx)))
+ }
+ rows = append(rows, row)
+ }
+ if len(rows) == 0 {
+ return imMarkdownInlineCode(segment)
+ }
+
+ cols := 0
+ for _, row := range rows {
+ if len(row) > cols {
+ cols = len(row)
+ }
+ }
+ var out strings.Builder
+ writeIMMarkdownTableRow(&out, padIMMarkdownTableRow(rows[0], cols))
+ separator := make([]string, cols)
+ for i := range separator {
+ separator[i] = "-"
+ }
+ writeIMMarkdownTableRow(&out, separator)
+ for _, row := range rows[1:] {
+ writeIMMarkdownTableRow(&out, padIMMarkdownTableRow(row, cols))
+ }
+ return strings.TrimRight(out.String(), "\n")
+}
+
+// extractIMMarkdownElementBodies returns the inner content of each top-level
+// element matched by tagRE. tagRE must expose the optional closing slash as its
+// first capture group, matching the row/cell regexes above.
+func extractIMMarkdownElementBodies(content string, tagRE *regexp.Regexp) []string {
+ var bodies []string
+ for offset := 0; offset < len(content); {
+ loc := tagRE.FindStringSubmatchIndex(content[offset:])
+ if loc == nil {
+ break
+ }
+ openStart := offset + loc[0]
+ openEnd := offset + loc[1]
+ opening := content[openStart:openEnd]
+ if loc[2] >= 0 && content[offset+loc[2]:offset+loc[3]] == "/" {
+ offset = openEnd
+ continue
+ }
+ if isSelfClosingIMMarkdownTag(opening) {
+ bodies = append(bodies, "")
+ offset = openEnd
+ continue
+ }
+ closeStart, closeEnd, found := findIMMarkdownElementClosingTag(content, openEnd, tagRE)
+ if !found {
+ break
+ }
+ bodies = append(bodies, content[openEnd:closeStart])
+ offset = closeEnd
+ }
+ return bodies
+}
+
+func findIMMarkdownElementClosingTag(content string, from int, tagRE *regexp.Regexp) (int, int, bool) {
+ depth := 1
+ for _, loc := range tagRE.FindAllStringSubmatchIndex(content[from:], -1) {
+ start := from + loc[0]
+ end := from + loc[1]
+ token := content[start:end]
+ if loc[2] >= 0 && content[from+loc[2]:from+loc[3]] == "/" {
+ depth--
+ if depth == 0 {
+ return start, end, true
+ }
+ continue
+ }
+ if !isSelfClosingIMMarkdownTag(token) {
+ depth++
+ }
+ }
+ return 0, 0, false
+}
+
+func normalizeIMMarkdownTableCell(cell string) string {
+ const brPlaceholder = "\x00BR\x00"
+ cell = imMarkdownCellBreakRE.ReplaceAllString(cell, brPlaceholder)
+ cell = imMarkdownAnyTagRE.ReplaceAllStringFunc(cell, func(tag string) string {
+ name := strings.ToLower(strings.TrimPrefix(imMarkdownAnyTagRE.FindStringSubmatch(tag)[1], "/"))
+ if name == "at" {
+ return tag
+ }
+ return ""
+ })
+ cell = html.UnescapeString(cell)
+ cell = strings.ReplaceAll(cell, brPlaceholder, " ")
+ cell = strings.ReplaceAll(cell, " \n", " ")
+ cell = strings.ReplaceAll(cell, "\n", " ")
+ cell = strings.ReplaceAll(cell, "|", `\|`)
+ lines := strings.Fields(cell)
+ if len(lines) == 0 {
+ return ""
+ }
+ return strings.Join(lines, " ")
+}
+
+func writeIMMarkdownTableRow(out *strings.Builder, row []string) {
+ out.WriteString("| ")
+ out.WriteString(strings.Join(row, " | "))
+ out.WriteString(" |\n")
+}
+
+func padIMMarkdownTableRow(row []string, cols int) []string {
+ if len(row) >= cols {
+ return row
+ }
+ padded := make([]string, cols)
+ copy(padded, row)
+ return padded
+}
+
+func convertIMMarkdownListItems(inner string, ordered bool, imCtx imMarkdownContext) string {
+ var out strings.Builder
+ for offset, index := 0, 1; offset < len(inner); {
+ loc := imMarkdownLiOpenRE.FindStringIndex(inner[offset:])
+ if loc == nil {
+ break
+ }
+ openStart := offset + loc[0]
+ openEnd := offset + loc[1]
+ opening := inner[openStart:openEnd]
+ closeStart, closeEnd, found := findIMMarkdownListItemClosingTag(inner, openEnd)
+ if !found {
+ break
+ }
+ body := strings.TrimSpace(convertToIMMarkdown(inner[openEnd:closeStart], imCtx))
+ if body != "" {
+ prefix := "-"
+ if ordered {
+ attrs := parseIMMarkdownAttrs(opening)
+ if seq := strings.TrimSpace(attrs["seq"]); seq != "" && seq != "auto" {
+ prefix = strings.TrimSuffix(seq, ".") + "."
+ } else {
+ prefix = fmt.Sprintf("%d.", index)
+ }
+ index++
+ }
+ out.WriteString(prefix)
+ out.WriteString(" ")
+ out.WriteString(indentIMMarkdownListContinuation(body))
+ out.WriteString("\n")
+ }
+ offset = closeEnd
+ }
+ return strings.TrimRight(out.String(), "\n")
+}
+
+func findIMMarkdownListItemClosingTag(content string, from int) (int, int, bool) {
+ depth := 1
+ for _, loc := range imMarkdownLiCloseRE.FindAllStringSubmatchIndex(content[from:], -1) {
+ start := from + loc[0]
+ end := from + loc[1]
+ token := content[start:end]
+ if loc[2] >= 0 && content[from+loc[2]:from+loc[3]] == "/" {
+ depth--
+ if depth == 0 {
+ return start, end, true
+ }
+ continue
+ }
+ if !isSelfClosingIMMarkdownTag(token) {
+ depth++
+ }
+ }
+ return 0, 0, false
+}
+
+func indentIMMarkdownListContinuation(body string) string {
+ return strings.ReplaceAll(body, "\n", "\n ")
+}
+
+func extractIMMarkdownInnerLink(inner string) (string, string, bool) {
+ match := imMarkdownLinkRE.FindStringSubmatch(inner)
+ if match == nil {
+ return "", "", false
+ }
+ href := match[1]
+ if href == "" {
+ href = match[2]
+ }
+ text := strings.TrimSpace(markdownPlainText(match[3]))
+ if text == "" {
+ text = href
+ }
+ return text, html.UnescapeString(href), true
+}
+
+func markdownPlainText(s string) string {
+ s = imMarkdownCellBreakRE.ReplaceAllString(s, "\n")
+ s = imMarkdownAnyTagRE.ReplaceAllString(s, "")
+ return strings.TrimSpace(html.UnescapeString(s))
+}
+
+func markdownLinkLabelText(s string) string {
+ text := markdownPlainText(s)
+ if !strings.Contains(text, "---") {
+ return text
+ }
+ lines := strings.Split(text, "\n")
+ kept := lines[:0]
+ for _, line := range lines {
+ if strings.TrimSpace(line) == "---" {
+ continue
+ }
+ kept = append(kept, line)
+ }
+ return strings.TrimSpace(strings.Join(kept, "\n"))
+}
+
+func markdownLink(text, href string) string {
+ cleanHref := strings.TrimSpace(href)
+ return fmt.Sprintf("[%s](%s)", escapeMarkdownLinkText(firstNonEmpty(text, cleanHref)), escapeMarkdownLinkDestination(cleanHref))
+}
+
+func escapeMarkdownLinkText(text string) string {
+ text = strings.ReplaceAll(text, `\`, `\\`)
+ text = strings.ReplaceAll(text, `[`, `\[`)
+ text = strings.ReplaceAll(text, `]`, `\]`)
+ return text
+}
+
+func escapeMarkdownLinkDestination(href string) string {
+ // Lark/Feishu IM Markdown does not reliably parse raw spaces or parentheses
+ // inside (...). Keep URL delimiters like :/?#&= intact, but percent-encode
+ // characters that can terminate or split the Markdown link destination.
+ var out strings.Builder
+ out.Grow(len(href))
+ for i := 0; i < len(href); {
+ if href[i] == '%' {
+ if i+2 < len(href) && isHexDigit(href[i+1]) && isHexDigit(href[i+2]) {
+ out.WriteString(href[i : i+3])
+ i += 3
+ } else {
+ writePercentEncodedByte(&out, href[i])
+ i++
+ }
+ continue
+ }
+ if href[i] < utf8.RuneSelf {
+ if shouldPercentEncodeIMMarkdownURLByte(href[i]) {
+ writePercentEncodedByte(&out, href[i])
+ } else {
+ out.WriteByte(href[i])
+ }
+ i++
+ continue
+ }
+ r, size := utf8.DecodeRuneInString(href[i:])
+ if r == utf8.RuneError && size == 1 {
+ writePercentEncodedByte(&out, href[i])
+ i++
+ continue
+ }
+ for _, b := range []byte(href[i : i+size]) {
+ writePercentEncodedByte(&out, b)
+ }
+ i += size
+ }
+ return out.String()
+}
+
+func shouldPercentEncodeIMMarkdownURLByte(b byte) bool {
+ if b <= ' ' || b >= 0x7f {
+ return true
+ }
+ switch b {
+ case '(', ')', '<', '>', '"', '\\', '^', '`', '{', '|', '}':
+ return true
+ default:
+ return false
+ }
+}
+
+func writePercentEncodedByte(out *strings.Builder, b byte) {
+ const hex = "0123456789ABCDEF"
+ out.WriteByte('%')
+ out.WriteByte(hex[b>>4])
+ out.WriteByte(hex[b&0x0f])
+}
+
+func isHexDigit(b byte) bool {
+ return ('0' <= b && b <= '9') || ('a' <= b && b <= 'f') || ('A' <= b && b <= 'F')
+}
+
+func imMarkdownInlineCode(s string) string {
+ maxRun := 0
+ run := 0
+ for _, r := range s {
+ if r == '`' {
+ run++
+ if run > maxRun {
+ maxRun = run
+ }
+ continue
+ }
+ run = 0
+ }
+ fence := strings.Repeat("`", maxRun+1)
+ if strings.HasPrefix(s, "`") || strings.HasSuffix(s, "`") {
+ return fence + " " + s + " " + fence
+ }
+ return fence + s + fence
+}
+
+func imMarkdownFencedCode(code, lang string) string {
+ maxRun := 0
+ for _, line := range strings.Split(code, "\n") {
+ if run := leadingBacktickRun(line); run > maxRun {
+ maxRun = run
+ }
+ }
+ fenceLen := maxRun + 1
+ if fenceLen < 3 {
+ fenceLen = 3
+ }
+ fence := strings.Repeat("`", fenceLen)
+ return fence + strings.TrimSpace(lang) + "\n" + strings.Trim(code, "\n") + "\n" + fence
+}
+
+func leadingBacktickRun(s string) int {
+ run := 0
+ for _, r := range s {
+ if r != '`' {
+ break
+ }
+ run++
+ }
+ return run
+}
+
+func firstNonEmpty(values ...string) string {
+ for _, value := range values {
+ if strings.TrimSpace(value) != "" {
+ return strings.TrimSpace(value)
+ }
+ }
+ return ""
+}
diff --git a/shortcuts/doc/docs_fetch_im_markdown_test.go b/shortcuts/doc/docs_fetch_im_markdown_test.go
new file mode 100644
index 00000000..971b4878
--- /dev/null
+++ b/shortcuts/doc/docs_fetch_im_markdown_test.go
@@ -0,0 +1,1305 @@
+// Copyright (c) 2026 Lark Technologies Pte. Ltd.
+// SPDX-License-Identifier: MIT
+
+package doc
+
+import (
+ "reflect"
+ "strings"
+ "testing"
+)
+
+func TestApplyFetchIMMarkdown(t *testing.T) {
+ t.Parallel()
+
+ tests := []struct {
+ name string
+ data map[string]interface{}
+ docInput string
+ want map[string]interface{}
+ }{
+ {
+ name: "missing document leaves data unchanged",
+ data: map[string]interface{}{
+ "content": `Roadmap `,
+ },
+ docInput: "https://tenant.example.com/docx/doc_token",
+ want: map[string]interface{}{
+ "content": `Roadmap `,
+ },
+ },
+ {
+ name: "non string content leaves data unchanged",
+ data: map[string]interface{}{
+ "document": map[string]interface{}{
+ "content": 123,
+ },
+ },
+ docInput: "https://tenant.example.com/docx/doc_token",
+ want: map[string]interface{}{
+ "document": map[string]interface{}{
+ "content": 123,
+ },
+ },
+ },
+ {
+ name: "converts content with tenant base url",
+ data: map[string]interface{}{
+ "document": map[string]interface{}{
+ "content": `Roadmap ` + "\n" + ` `,
+ },
+ },
+ docInput: "https://tenant.example.com/docx/doc_token",
+ want: map[string]interface{}{
+ "document": map[string]interface{}{
+ "content": "# Roadmap\n[sheet S1](https://tenant.example.com/sheets/sht_token)",
+ },
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ applyFetchIMMarkdown(tt.data, tt.docInput)
+ if !reflect.DeepEqual(tt.data, tt.want) {
+ t.Fatalf("data = %#v, want %#v", tt.data, tt.want)
+ }
+ })
+ }
+}
+
+func TestConvertToIMMarkdownTitle(t *testing.T) {
+ t.Parallel()
+
+ assertIMMarkdownCases(t, []imMarkdownCase{
+ {
+ name: "plain title",
+ input: `Roadmap `,
+ want: "# Roadmap",
+ },
+ {
+ name: "trim title whitespace",
+ input: "\n Roadmap \n ",
+ want: "# Roadmap",
+ },
+ {
+ name: "convert title inner markup",
+ input: `Bold Title `,
+ want: "# **Bold** Title",
+ },
+ {
+ name: "empty title",
+ input: ` `,
+ want: "",
+ },
+ {
+ name: "title followed by text",
+ input: `Roadmap tail`,
+ want: "# Roadmaptail",
+ },
+ {
+ name: "uppercase title is handled case-insensitively",
+ input: `Roadmap `,
+ want: "# Roadmap",
+ },
+ {
+ name: "missing closing title is preserved",
+ input: `beforeRoadmap`,
+ want: `beforeRoadmap`,
+ },
+ })
+}
+
+func TestConvertToIMMarkdownCallout(t *testing.T) {
+ t.Parallel()
+
+ assertIMMarkdownCases(t, []imMarkdownCase{
+ {
+ name: "emoji and body",
+ input: `Read **this**. `,
+ want: "---\n💡 Read **this**.\n---",
+ },
+ {
+ name: "body without emoji",
+ input: `Plain body `,
+ want: "---\nPlain body\n---",
+ },
+ {
+ name: "emoji only",
+ input: ` `,
+ want: "---\n✅\n---",
+ },
+ {
+ name: "empty callout",
+ input: ` `,
+ want: "---\n---",
+ },
+ {
+ name: "nested callout",
+ input: `Outer Inner `,
+ want: "---\n✅ Outer ---\n💡 Inner\n---\n---",
+ },
+ {
+ name: "callout contains registered tags",
+ input: ` `,
+ want: "---\n📝 [Spec](https://example.com)\n---",
+ },
+ {
+ name: "callout contains grid and cite",
+ input: ` `,
+ want: "---\n📣 Alice \n[Spec](https://example.com)\n---",
+ },
+ {
+ name: "same-name nested callout with trailing text",
+ input: `ab c d`,
+ want: "---\n1 a---\n2 b\n---c\n---d",
+ },
+ {
+ name: "missing closing callout is preserved",
+ input: `beforebody`,
+ want: `beforebody`,
+ },
+ })
+}
+
+func TestConvertToIMMarkdownBlockquote(t *testing.T) {
+ t.Parallel()
+
+ assertIMMarkdownCases(t, []imMarkdownCase{
+ {
+ name: "single paragraph",
+ input: `quote link
`,
+ want: "> quote [link](https://example.com)",
+ },
+ {
+ name: "multiple paragraphs keep line breaks",
+ input: `first
second
`,
+ want: "> first\n> **second**",
+ },
+ {
+ name: "nested blockquote keeps nested markers",
+ input: `outer
inner
`,
+ want: "> outer\n> > inner",
+ },
+ {
+ name: "blank line keeps quote marker",
+ input: "first\n\nsecond ",
+ want: "> first\n>\n> second",
+ },
+ {
+ name: "empty blockquote",
+ input: ` `,
+ want: "",
+ },
+ {
+ name: "plain adjacent paragraphs outside blockquote stay compact",
+ input: `first
second
`,
+ want: "firstsecond",
+ },
+ })
+}
+
+func TestConvertToIMMarkdownParagraphHeadingAndListItemEdges(t *testing.T) {
+ t.Parallel()
+
+ assertIMMarkdownCases(t, []imMarkdownCase{
+ {
+ name: "empty heading",
+ input: ` `,
+ want: "",
+ },
+ {
+ name: "empty paragraph",
+ input: `
`,
+ want: "",
+ },
+ {
+ name: "top level list item uses seq",
+ input: "first\nsecond ",
+ want: "7. first\n second\n",
+ },
+ {
+ name: "top level empty list item",
+ input: ` `,
+ want: "",
+ },
+ {
+ name: "unordered list skips non item text and empty items",
+ input: ``,
+ want: "- first\n- second",
+ },
+ {
+ name: "unclosed list item stops list scan",
+ input: ``,
+ want: "- first",
+ },
+ })
+}
+
+func TestConvertToIMMarkdownGridAndColumn(t *testing.T) {
+ t.Parallel()
+
+ assertIMMarkdownCases(t, []imMarkdownCase{
+ {
+ name: "two columns",
+ input: `Left Right `,
+ want: "Left\nRight",
+ },
+ {
+ name: "column converts nested registered tags",
+ input: ` `,
+ want: "[Spec](https://example.com)\n",
+ },
+ {
+ name: "empty column",
+ input: ` `,
+ want: "",
+ },
+ {
+ name: "nested grid",
+ input: `A B C `,
+ want: "A\nB\nC",
+ },
+ {
+ name: "grid inside callout",
+ input: `A B `,
+ want: "---\n📌 A\nB\n---",
+ },
+ {
+ name: "adjacent grids do not merge",
+ input: `A B `,
+ want: "AB",
+ },
+ {
+ name: "column with nested callout keeps recursive output",
+ input: `Tip `,
+ want: "---\n💡 Tip\n---\n",
+ },
+ {
+ name: "missing closing grid is preserved",
+ input: `A `,
+ want: `A `,
+ },
+ })
+}
+
+func TestConvertToIMMarkdownTable(t *testing.T) {
+ t.Parallel()
+
+ assertIMMarkdownCases(t, []imMarkdownCase{
+ {
+ name: "basic table",
+ input: ``,
+ want: "| A | B |\n| - | - |\n| 1 | 2 |",
+ },
+ {
+ name: "table strips attrs and preserves cell line break",
+ input: ``,
+ want: "| A | B |\n| - | - |\n| 1 | **two** lines |",
+ },
+ {
+ name: "table escapes pipe",
+ input: ``,
+ want: "| A\\|B |\n| - |\n| x\\|y |",
+ },
+ {
+ name: "table pads ragged rows",
+ input: ``,
+ want: "| A | B |\n| - | - |\n| 1 | |",
+ },
+ {
+ name: "table converts nested cite",
+ input: ``,
+ want: "| User |\n| - |\n| Alice |",
+ },
+ {
+ name: "table converts nested bookmark and sheet",
+ input: ``,
+ want: "| Link | Sheet |\n| - | - |\n| [Spec](https://example.com) | [sheet S1](https://larkoffice.com/sheets/sht_1) |",
+ },
+ {
+ name: "table strips nested unknown html but preserves text",
+ input: ``,
+ want: "| A |\n| - |\n| red under |",
+ },
+ {
+ name: "table normalizes markdown hard breaks",
+ input: "",
+ want: "| A |\n| - |\n| line1 line2 |",
+ },
+ {
+ name: "table cell keeps nested table whole",
+ input: ``,
+ want: "| Outer |\n| - |\n| before \\| Inner \\| \\| - \\| \\| x \\| after |",
+ },
+ {
+ name: "table with only data row treats first row as header",
+ input: ``,
+ want: "| A | B |\n| - | - |",
+ },
+ {
+ name: "table without rows falls back to inline code",
+ input: ``,
+ want: "``",
+ },
+ {
+ name: "table row without cells falls back to inline code",
+ input: ``,
+ want: "``",
+ },
+ {
+ name: "table self closing row falls back to inline code",
+ input: ``,
+ want: "``",
+ },
+ {
+ name: "table empty cell stays empty",
+ input: ``,
+ want: "| |\n| - |",
+ },
+ {
+ name: "missing closing table is preserved",
+ input: `beforeA `,
+ want: `beforeA `,
+ },
+ })
+}
+
+func TestIMMarkdownElementExtractionEdges(t *testing.T) {
+ t.Parallel()
+
+ bodies := extractIMMarkdownElementBodies(` x open`, imMarkdownRowTagRE)
+ if want := []string{"", "x "}; !reflect.DeepEqual(bodies, want) {
+ t.Fatalf("extractIMMarkdownElementBodies() = %#v, want %#v", bodies, want)
+ }
+
+ if _, _, ok := findIMMarkdownElementClosingTag(` x`, len(" "), imMarkdownRowTagRE); ok {
+ t.Fatal("findIMMarkdownElementClosingTag() found closing tag, want false")
+ }
+
+ start, end, ok := findIMMarkdownListItemClosingTag(`outer tail`, len(""))
+ if !ok {
+ t.Fatal("findIMMarkdownListItemClosingTag() did not find closing tag")
+ }
+ if got, want := ` outer tail`[start:end], ""; got != want {
+ t.Fatalf("closing tag = %q, want %q", got, want)
+ }
+
+ if _, _, ok := findIMMarkdownListItemClosingTag(`open`, len(" ")); ok {
+ t.Fatal("findIMMarkdownListItemClosingTag() found closing tag, want false")
+ }
+
+ start, end, ok = findIMMarkdownListItemClosingTag(` outer inner tail`, len(""))
+ if !ok {
+ t.Fatal("findIMMarkdownListItemClosingTag() did not find nested closing tag")
+ }
+ if got, want := ` outer inner tail`[start:end], ""; got != want {
+ t.Fatalf("nested closing tag = %q, want %q", got, want)
+ }
+
+ if got := convertIMMarkdownListItems("plain text", false, imMarkdownContext{}); got != "" {
+ t.Fatalf("convertIMMarkdownListItems() = %q, want empty", got)
+ }
+}
+
+func TestNormalizeIMMarkdownTableCellStripsUnknownTags(t *testing.T) {
+ t.Parallel()
+
+ got := normalizeIMMarkdownTableCell(`red `)
+ if want := "red"; got != want {
+ t.Fatalf("normalizeIMMarkdownTableCell() = %q, want %q", got, want)
+ }
+}
+
+func TestConvertToIMMarkdownDiscardTags(t *testing.T) {
+ t.Parallel()
+
+ assertIMMarkdownCases(t, []imMarkdownCase{
+ {
+ name: "figure discarded",
+ input: `beforehidden after`,
+ want: "beforeafter",
+ },
+ {
+ name: "figure with source discarded",
+ input: ` `,
+ want: "",
+ },
+ {
+ name: "self-closing source discarded",
+ input: `a b`,
+ want: "ab",
+ },
+ {
+ name: "source name becomes inline code",
+ input: "ab",
+ want: "a``report`v1`.pdf``b",
+ },
+ {
+ name: "button discarded",
+ input: `aClick b`,
+ want: "ab",
+ },
+ {
+ name: "time discarded",
+ input: `a b`,
+ want: "ab",
+ },
+ {
+ name: "colgroup discarded",
+ input: `a b`,
+ want: "ab",
+ },
+ {
+ name: "col discarded",
+ input: `a b`,
+ want: "ab",
+ },
+ {
+ name: "self-closing button discarded",
+ input: `a b`,
+ want: "ab",
+ },
+ {
+ name: "missing closing discard tag is preserved",
+ input: `ahidden`,
+ want: `ahidden`,
+ },
+ })
+}
+
+func TestConvertToIMMarkdownWhiteboard(t *testing.T) {
+ t.Parallel()
+
+ assertIMMarkdownCases(t, []imMarkdownCase{
+ {
+ name: "paired whiteboard",
+ input: ` `,
+ want: "` `",
+ },
+ {
+ name: "self-closing whiteboard",
+ input: ` `,
+ want: "``",
+ },
+ {
+ name: "whiteboard with backticks",
+ input: " ",
+ want: "`` ``",
+ },
+ {
+ name: "whiteboard preserves inner text as opaque",
+ input: `not exported `,
+ want: "`not exported `",
+ },
+ {
+ name: "missing closing whiteboard is preserved",
+ input: ``,
+ want: ``,
+ },
+ })
+}
+
+func TestConvertToIMMarkdownSheet(t *testing.T) {
+ t.Parallel()
+
+ assertIMMarkdownCasesWithContext(t, imMarkdownContext{baseURL: "https://bytedance.larkoffice.com"}, []imMarkdownCase{
+ {
+ name: "sheet with sheet id",
+ input: ` `,
+ want: "[sheet S1](https://bytedance.larkoffice.com/sheets/sht_token)",
+ },
+ {
+ name: "sheet without sheet id",
+ input: ` `,
+ want: "[sheet](https://bytedance.larkoffice.com/sheets/sht_token)",
+ },
+ {
+ name: "sheet without token falls back to inline code",
+ input: ` `,
+ want: "` `",
+ },
+ {
+ name: "self-closing sheet",
+ input: ` `,
+ want: "[sheet S1](https://bytedance.larkoffice.com/sheets/sht_token)",
+ },
+ {
+ name: "sheet token is trimmed",
+ input: ` `,
+ want: "[sheet S1](https://bytedance.larkoffice.com/sheets/sht_token)",
+ },
+ {
+ name: "sheet inside text",
+ input: `before after`,
+ want: "before [sheet](https://bytedance.larkoffice.com/sheets/sht_token) after",
+ },
+ })
+}
+
+func TestConvertToIMMarkdownBookmark(t *testing.T) {
+ t.Parallel()
+
+ assertIMMarkdownCases(t, []imMarkdownCase{
+ {
+ name: "name and href",
+ input: ` `,
+ want: "[Example](https://example.com)",
+ },
+ {
+ name: "title fallback",
+ input: ` `,
+ want: "[Example](https://example.com)",
+ },
+ {
+ name: "inner text fallback",
+ input: `Example `,
+ want: "[Example](https://example.com)",
+ },
+ {
+ name: "missing href returns label",
+ input: ` `,
+ want: "Example",
+ },
+ {
+ name: "escaped link label",
+ input: ` `,
+ want: "[A \\[B\\]](https://example.com)",
+ },
+ {
+ name: "href is percent encoded",
+ input: ` `,
+ want: "[Spec](https://example.com/wiki/A%20B%20%28draft%29?q=x%20y#frag%281%29)",
+ },
+ {
+ name: "href keeps existing percent escapes",
+ input: ` `,
+ want: "[Spec](https://example.com/wiki/A%20B)",
+ },
+ {
+ name: "href escapes invalid percent and unicode",
+ input: ` `,
+ want: "[Spec](https://example.com/wiki/%E7%A0%94%E5%8F%91%25zz?x=1%25)",
+ },
+ {
+ name: "href escapes markdown delimiter bytes",
+ input: " ",
+ want: "[Spec](https://example.com/a%3Cb%3E%7Cc%60d)",
+ },
+ {
+ name: "inner registered tag fallback",
+ input: ` `,
+ want: "[Alice](https://example.com)",
+ },
+ {
+ name: "href fallback as label",
+ input: ` `,
+ want: "[https://example.com](https://example.com)",
+ },
+ {
+ name: "self-closing bookmark without href",
+ input: ` `,
+ want: "Example",
+ },
+ })
+}
+
+func TestConvertToIMMarkdownInlineEdges(t *testing.T) {
+ t.Parallel()
+
+ assertIMMarkdownCases(t, []imMarkdownCase{
+ {
+ name: "empty strong emphasis and delete",
+ input: ` `,
+ want: "",
+ },
+ {
+ name: "anchor without href returns text",
+ input: `plain text `,
+ want: "plain **text**",
+ },
+ {
+ name: "anchor without text falls back to href",
+ input: ` `,
+ want: "[https://example.com/a b](https://example.com/a%20b)",
+ },
+ {
+ name: "latex escapes dollars",
+ input: `price=$5 `,
+ want: "$price=\\$5$",
+ },
+ {
+ name: "empty latex",
+ input: ` `,
+ want: "",
+ },
+ {
+ name: "image missing href",
+ input: ` `,
+ want: "",
+ },
+ {
+ name: "image uses src and title fallback",
+ input: ` `,
+ want: "![A \\[img\\]](https://example.com/i%201.png)",
+ },
+ {
+ name: "plain fenced code",
+ input: `plain `,
+ want: "```\nplain\n```",
+ },
+ {
+ name: "code inline trims nested markup",
+ input: `x `,
+ want: "`x`",
+ },
+ })
+}
+
+func TestConvertToIMMarkdownCiteUser(t *testing.T) {
+ t.Parallel()
+
+ assertIMMarkdownCases(t, []imMarkdownCase{
+ {
+ name: "user id and name",
+ input: ` `,
+ want: `Alice `,
+ },
+ {
+ name: "open id fallback",
+ input: ` `,
+ want: `Bob `,
+ },
+ {
+ name: "name falls back to user id",
+ input: ` `,
+ want: `ou_abc `,
+ },
+ {
+ name: "missing user id returns name",
+ input: ` `,
+ want: "Alice",
+ },
+ {
+ name: "escape at XML",
+ input: ` `,
+ want: `A&B `,
+ },
+ {
+ name: "inner text fallback when attrs missing name",
+ input: `Alice `,
+ want: `Alice `,
+ },
+ {
+ name: "self-closing user cite",
+ input: ` `,
+ want: `Alice `,
+ },
+ })
+}
+
+func TestConvertToIMMarkdownCiteDoc(t *testing.T) {
+ t.Parallel()
+
+ assertIMMarkdownCasesWithContext(t, imMarkdownContext{baseURL: "https://bytedance.larkoffice.com"}, []imMarkdownCase{
+ {
+ name: "doc id to link",
+ input: ` `,
+ want: "[Spec](https://bytedance.larkoffice.com/docx/doc_token)",
+ },
+ {
+ name: "href wins",
+ input: ` `,
+ want: "[Spec](https://example.com/doc%20%28draft%29)",
+ },
+ {
+ name: "default title and file type",
+ input: ` `,
+ want: "[document](https://bytedance.larkoffice.com/docx/doc_token)",
+ },
+ {
+ name: "missing doc id falls back to inline code",
+ input: ` `,
+ want: "` `",
+ },
+ {
+ name: "wiki file type link",
+ input: ` `,
+ want: "[Wiki](https://bytedance.larkoffice.com/wiki/wiki_token)",
+ },
+ {
+ name: "doc title is escaped",
+ input: ` `,
+ want: "[A \\[B\\]](https://bytedance.larkoffice.com/docx/doc_token)",
+ },
+ })
+}
+
+func TestConvertToIMMarkdownCiteCitation(t *testing.T) {
+ t.Parallel()
+
+ assertIMMarkdownCases(t, []imMarkdownCase{
+ {
+ name: "inner anchor",
+ input: `Ref `,
+ want: "[Ref](https://example.com/ref)",
+ },
+ {
+ name: "href attr",
+ input: ` `,
+ want: "[Ref](https://example.com/ref)",
+ },
+ {
+ name: "plain inner fallback",
+ input: `Plain Ref `,
+ want: "Plain Ref",
+ },
+ {
+ name: "inner anchor text strips markup",
+ input: `Ref `,
+ want: "[Ref](https://example.com/ref)",
+ },
+ {
+ name: "single quoted inner anchor falls back to href text",
+ input: ` `,
+ want: "[https://example.com/ref](https://example.com/ref)",
+ },
+ {
+ name: "href attr falls back to href label",
+ input: ` `,
+ want: "[https://example.com/ref](https://example.com/ref)",
+ },
+ })
+}
+
+func TestEscapeMarkdownLinkDestinationInvalidUTF8(t *testing.T) {
+ t.Parallel()
+
+ got := escapeMarkdownLinkDestination(string([]byte{'a', 0xff, 'b'}))
+ if want := "a%FFb"; got != want {
+ t.Fatalf("escapeMarkdownLinkDestination() = %q, want %q", got, want)
+ }
+}
+
+func TestConvertToIMMarkdownCiteUnknown(t *testing.T) {
+ t.Parallel()
+
+ assertIMMarkdownCases(t, []imMarkdownCase{
+ {
+ name: "unknown paired cite",
+ input: `x `,
+ want: "`x `",
+ },
+ {
+ name: "unknown self-closing cite",
+ input: ` `,
+ want: "``",
+ },
+ })
+}
+
+func TestConvertToIMMarkdownScannerBoundaries(t *testing.T) {
+ t.Parallel()
+
+ assertIMMarkdownCases(t, []imMarkdownCase{
+ {
+ name: "unknown tag preserved with known child untouched",
+ input: ` `,
+ want: `[Spec](https://example.com) `,
+ },
+ {
+ name: "registered tag attributes single quotes",
+ input: ` `,
+ want: "[Spec](https://example.com)",
+ },
+ {
+ name: "registered tag name with leading text",
+ input: `alphaBeta gamma`,
+ want: "alpha# Betagamma",
+ },
+ {
+ name: "xml comment is preserved",
+ input: `aT `,
+ want: "a# T",
+ },
+ {
+ name: "br is preserved",
+ input: `a b`,
+ want: "a b",
+ },
+ {
+ name: "malformed attribute still allows handler",
+ input: `Inner `,
+ want: "[Inner](https://example.com)",
+ },
+ })
+}
+
+func TestConvertToIMMarkdownCompositeNesting(t *testing.T) {
+ t.Parallel()
+
+ assertIMMarkdownCasesWithContext(t, imMarkdownContext{baseURL: "https://tenant.example.com"}, []imMarkdownCase{
+ {
+ name: "callout grid table and resources",
+ input: ` `,
+ want: "---\n📌 | Owner | Doc |\n| - | - |\n| Alice | [Spec](https://tenant.example.com/docx/doc_1) |\n[sheet S1](https://tenant.example.com/sheets/sht_1)\n---",
+ },
+ {
+ name: "grid inside table cell",
+ input: ``,
+ want: "| Outer |\n| - |\n| A B |",
+ },
+ {
+ name: "table inside table cell",
+ input: ``,
+ want: "| Outer | Tail |\n| - | - |\n| \\| Inner \\| \\| - \\| \\| x \\| | done |",
+ },
+ {
+ name: "bookmark wraps callout fallback text",
+ input: `Tip `,
+ want: "[💡 Tip](https://example.com)",
+ },
+ })
+}
+
+func TestConvertToIMMarkdownUnclosedFragments(t *testing.T) {
+ t.Parallel()
+
+ assertIMMarkdownCases(t, []imMarkdownCase{
+ {
+ name: "unclosed title preserves nested registered tag",
+ input: `before `,
+ want: `before `,
+ },
+ {
+ name: "unclosed callout preserves nested registered tag",
+ input: `before `,
+ want: `before `,
+ },
+ {
+ name: "unclosed grid preserves closed child",
+ input: `beforeA `,
+ want: `beforeA `,
+ },
+ {
+ name: "unclosed column preserves nested registered tag",
+ input: `before `,
+ want: `before `,
+ },
+ {
+ name: "unclosed table preserves nested cite",
+ input: `before `,
+ want: `before `,
+ },
+ {
+ name: "unclosed figure preserves nested source",
+ input: `before `,
+ want: `before `,
+ },
+ {
+ name: "unclosed whiteboard preserves nested registered tag",
+ input: `before `,
+ want: `before `,
+ },
+ {
+ name: "unclosed sheet preserves nested registered tag",
+ input: `before `,
+ want: `before `,
+ },
+ {
+ name: "unclosed bookmark preserves nested cite",
+ input: `before `,
+ want: `before `,
+ },
+ {
+ name: "unclosed cite preserves inner anchor",
+ input: `beforeRef `,
+ want: `beforeRef `,
+ },
+ })
+}
+
+func TestConvertToIMMarkdownDeepRegisteredContainers(t *testing.T) {
+ t.Parallel()
+
+ deepGrid := "leaf"
+ for i := 0; i < 32; i++ {
+ deepGrid = "" + deepGrid + " "
+ }
+ if got := convertToIMMarkdown(deepGrid, imMarkdownContext{}); got != "leaf" {
+ t.Fatalf("deep grid conversion = %q, want %q", got, "leaf")
+ }
+
+ deepCallout := "leaf"
+ for i := 0; i < 16; i++ {
+ deepCallout = `` + deepCallout + ` `
+ }
+ got := convertToIMMarkdown(deepCallout, imMarkdownContext{})
+ if !strings.Contains(got, "leaf") {
+ t.Fatalf("deep callout conversion missing leaf:\n%s", got)
+ }
+ if count := strings.Count(got, "💡"); count != 16 {
+ t.Fatalf("deep callout emoji count = %d, want 16\n%s", count, got)
+ }
+}
+
+func TestConvertToIMMarkdownDocumentExpectedTagsAndEscaping(t *testing.T) {
+ t.Parallel()
+
+ imCtx := imMarkdownContext{baseURL: "https://bytedance.larkoffice.com"}
+ input := strings.Join([]string{
+ `Roadmap Q1 `,
+ `Deep Heading `,
+ `plain next Bold Italic Gone Under Plain A [B]
`,
+ `quote Card
`,
+ ``,
+ `one three `,
+ `fmt.Println("hi")` + "\n```" + ` `,
+ `` + "`edge`" + ` E=mc^2
`,
+ ` `,
+ ` `,
+ ` `,
+ }, "\n")
+
+ want := strings.Join([]string{
+ `# Roadmap Q1`,
+ `###### Deep Heading`,
+ `plain next **Bold** *Italic* ~~Gone~~ Under Plain [A \[B\]](https://example.com/a%28b%29)`,
+ `> quote [Card](https://example.com/card)`,
+ `- first`,
+ `- **second**`,
+ `1. one`,
+ `3. three`,
+ "````Go\nfmt.Println(\"hi\")\n```\n````",
+ "`` `edge` `` $E=mc^2$ --- ![A \\[img\\]](https://example.com/i%281%29.png)",
+ "``report`v1`.pdf``",
+ "`任务``群聊卡片`",
+ "`多维表格``多维表格``OKR`",
+ }, "\n")
+
+ if got := convertToIMMarkdown(input, imCtx); got != want {
+ t.Fatalf("convertToIMMarkdown() = %q, want %q", got, want)
+ }
+}
+
+func TestConvertToIMMarkdownMixedDocumentSmoke(t *testing.T) {
+ t.Parallel()
+
+ imCtx := imMarkdownContext{baseURL: "https://bytedance.larkoffice.com"}
+ input := strings.Join([]string{
+ `Roadmap `,
+ `### Left Right `,
+ ``,
+ ` `,
+ ` `,
+ `Ref `,
+ ` `,
+ ` `,
+ }, "\n")
+
+ got := convertToIMMarkdown(input, imCtx)
+
+ for _, want := range []string{
+ "# Roadmap",
+ "### Left",
+ "Right",
+ "| A | B |\n| - | - |\n| 1 | **two** lines |",
+ `Alice `,
+ "[Spec](https://bytedance.larkoffice.com/docx/doc_token)",
+ "[Ref](https://example.com/ref)",
+ "[sheet S1](https://bytedance.larkoffice.com/sheets/sht_token)",
+ } {
+ if !strings.Contains(got, want) {
+ t.Fatalf("converted content missing %q:\n%s", want, got)
+ }
+ }
+ for _, dropped := range []string{" first\n>\n> second",
+ },
+ {
+ name: "empty latex",
+ got: handleIMMarkdownLatex("", " ", nil, ctx),
+ want: "",
+ },
+ {
+ name: "image without URL",
+ got: handleIMMarkdownImage("", "", map[string]string{"alt": "A"}, ctx),
+ want: "",
+ },
+ {
+ name: "empty strong",
+ got: handleIMMarkdownStrong("", " ", nil, ctx),
+ want: "",
+ },
+ {
+ name: "empty emphasis",
+ got: handleIMMarkdownEmphasis("", " ", nil, ctx),
+ want: "",
+ },
+ {
+ name: "empty delete",
+ got: handleIMMarkdownDelete("", " ", nil, ctx),
+ want: "",
+ },
+ {
+ name: "anchor without href",
+ got: handleIMMarkdownAnchor("", "plain ", nil, ctx),
+ want: "**plain**",
+ },
+ {
+ name: "table skips rows without cells",
+ got: handleIMMarkdownTable("", " ", nil, ctx),
+ want: "``",
+ },
+ {
+ name: "empty normalized table cell",
+ got: normalizeIMMarkdownTableCell(" "),
+ want: "",
+ },
+ {
+ name: "plain fenced code uses minimum fence",
+ got: imMarkdownFencedCode("plain", ""),
+ want: "```\nplain\n```",
+ },
+ }
+
+ for _, tt := range cases {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ if tt.got != tt.want {
+ t.Fatalf("got %q, want %q", tt.got, tt.want)
+ }
+ })
+ }
+}
+
+func TestIMMarkdownExtractionAndListBreakBranches(t *testing.T) {
+ t.Parallel()
+
+ rowBodies := extractIMMarkdownElementBodies(`open`, imMarkdownRowTagRE)
+ if want := []string{""}; !reflect.DeepEqual(rowBodies, want) {
+ t.Fatalf("extractIMMarkdownElementBodies() = %#v, want %#v", rowBodies, want)
+ }
+
+ if _, _, ok := findIMMarkdownElementClosingTag(` open `, len(""), imMarkdownRowTagRE); ok {
+ t.Fatal("findIMMarkdownElementClosingTag() found closing tag, want false")
+ }
+
+ if got := convertIMMarkdownListItems("", false, imMarkdownContext{}); got != "" {
+ t.Fatalf("empty list conversion = %q, want empty", got)
+ }
+ if got := convertIMMarkdownListItems("open", false, imMarkdownContext{}); got != "" {
+ t.Fatalf("unclosed list conversion = %q, want empty", got)
+ }
+ if _, _, ok := findIMMarkdownListItemClosingTag(` outer inner `, len("")); ok {
+ t.Fatal("findIMMarkdownListItemClosingTag() found closing tag for unbalanced nested item")
+ }
+}
+
+func TestIMMarkdownLinkAndEncodingFallbackBranches(t *testing.T) {
+ t.Parallel()
+
+ text, href, ok := extractIMMarkdownInnerLink(` `)
+ if !ok {
+ t.Fatal("extractIMMarkdownInnerLink() ok = false, want true")
+ }
+ if text != "https://example.com/ref" || href != "https://example.com/ref" {
+ t.Fatalf("inner link = (%q, %q), want href fallback", text, href)
+ }
+
+ if got := escapeMarkdownLinkDestination("a%zz%"); got != "a%25zz%25" {
+ t.Fatalf("escaped invalid percent = %q, want %q", got, "a%25zz%25")
+ }
+ if got := escapeMarkdownLinkDestination("研发"); got != "%E7%A0%94%E5%8F%91" {
+ t.Fatalf("escaped unicode = %q, want encoded UTF-8 bytes", got)
+ }
+ if got := escapeMarkdownLinkDestination(string([]byte{'a', 0xff, 'b'})); got != "a%FFb" {
+ t.Fatalf("escaped invalid UTF-8 = %q, want %q", got, "a%FFb")
+ }
+}
+
+type imMarkdownCase struct {
+ name string
+ input string
+ want string
+}
+
+func assertIMMarkdownCases(t *testing.T, cases []imMarkdownCase) {
+ t.Helper()
+ assertIMMarkdownCasesWithContext(t, imMarkdownContext{baseURL: "https://larkoffice.com"}, cases)
+}
+
+func assertIMMarkdownCasesWithContext(t *testing.T, imCtx imMarkdownContext, cases []imMarkdownCase) {
+ t.Helper()
+
+ for _, tt := range cases {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ if got := convertToIMMarkdown(tt.input, imCtx); got != tt.want {
+ t.Fatalf("convertToIMMarkdown() = %q, want %q", got, tt.want)
+ }
+ })
+ }
+}
diff --git a/shortcuts/doc/docs_fetch_v2.go b/shortcuts/doc/docs_fetch_v2.go
index b3e4f2fc..16ca133f 100644
--- a/shortcuts/doc/docs_fetch_v2.go
+++ b/shortcuts/doc/docs_fetch_v2.go
@@ -17,7 +17,7 @@ import (
// v2FetchFlags returns the flag definitions for the v2 (OpenAPI) fetch path.
func v2FetchFlags() []common.Flag {
return []common.Flag{
- {Name: "doc-format", Desc: "output content format; xml keeps DocxXML structure and optional block ids, markdown is plain export", Default: "xml", Enum: []string{"xml", "markdown"}},
+ {Name: "doc-format", Desc: "output content format; xml keeps DocxXML structure and optional block ids, markdown is plain export, im-markdown downgrades residual DocxXML fragments for IM messages", Default: "xml", Enum: []string{"xml", "markdown", "im-markdown"}},
{Name: "detail", Desc: "detail level; simple for reading, with-ids for block references, full for styles and edit metadata", Default: "simple", Enum: []string{"simple", "with-ids", "full"}},
{Name: "lang", Desc: "user cite display language, e.g. en-US, zh-CN, ja-JP"},
{Name: "revision-id", Desc: "document revision id; -1 means latest", Type: "int", Default: "-1"},
@@ -72,6 +72,9 @@ func executeFetchV2(_ context.Context, runtime *common.RuntimeContext) error {
if warning := addFetchDetailDowngradeWarning(runtime, data); warning != "" && runtime.Format == "pretty" {
fmt.Fprintf(runtime.IO().ErrOut, "warning: %s\n", warning)
}
+ if isIMMarkdownFetch(runtime) {
+ applyFetchIMMarkdown(data, runtime.Str("doc"))
+ }
runtime.OutFormatRaw(data, nil, func(w io.Writer) {
if doc, ok := data["document"].(map[string]interface{}); ok {
@@ -85,7 +88,7 @@ func executeFetchV2(_ context.Context, runtime *common.RuntimeContext) error {
func buildFetchBody(runtime *common.RuntimeContext) map[string]interface{} {
body := map[string]interface{}{
- "format": runtime.Str("doc-format"),
+ "format": effectiveFetchFormat(runtime),
}
if v := runtime.Int("revision-id"); v > 0 {
body["revision_id"] = v
@@ -122,6 +125,14 @@ func buildFetchBody(runtime *common.RuntimeContext) map[string]interface{} {
return body
}
+func effectiveFetchFormat(runtime *common.RuntimeContext) string {
+ format := strings.TrimSpace(runtime.Str("doc-format"))
+ if format == "im-markdown" {
+ return "markdown"
+ }
+ return format
+}
+
func resolveFetchLang(runtime *common.RuntimeContext) string {
if runtime.Changed("lang") {
return strings.TrimSpace(runtime.Str("lang"))
diff --git a/shortcuts/doc/docs_fetch_v2_test.go b/shortcuts/doc/docs_fetch_v2_test.go
index 1210f0ac..ace683c2 100644
--- a/shortcuts/doc/docs_fetch_v2_test.go
+++ b/shortcuts/doc/docs_fetch_v2_test.go
@@ -6,9 +6,12 @@ package doc
import (
"context"
"encoding/json"
+ "errors"
+ "reflect"
"strings"
"testing"
+ "github.com/larksuite/cli/errs"
"github.com/larksuite/cli/internal/cmdutil"
"github.com/larksuite/cli/internal/core"
"github.com/larksuite/cli/internal/httpmock"
@@ -104,6 +107,369 @@ func TestBuildFetchBodyExplicitBlankLangOmitsLang(t *testing.T) {
}
}
+func TestBuildFetchBodyIncludesRevisionAndFullDetail(t *testing.T) {
+ t.Parallel()
+
+ runtime := newFetchBodyTestRuntime(context.Background())
+ mustSetFetchFlag(t, runtime, "revision-id", "42")
+ mustSetFetchFlag(t, runtime, "detail", "full")
+
+ body := buildFetchBody(runtime)
+ if got := body["revision_id"]; got != 42 {
+ t.Fatalf("revision_id = %#v, want 42", got)
+ }
+ exportOption, _ := body["export_option"].(map[string]interface{})
+ want := map[string]interface{}{
+ "export_block_id": true,
+ "export_style_attrs": true,
+ "export_cite_extra_data": true,
+ }
+ if !reflect.DeepEqual(exportOption, want) {
+ t.Fatalf("export_option = %#v, want %#v", exportOption, want)
+ }
+}
+
+func TestBuildFetchBodyIncludesWithIDsDetail(t *testing.T) {
+ t.Parallel()
+
+ runtime := newFetchBodyTestRuntime(context.Background())
+ mustSetFetchFlag(t, runtime, "detail", "with-ids")
+
+ body := buildFetchBody(runtime)
+ exportOption, _ := body["export_option"].(map[string]interface{})
+ want := map[string]interface{}{
+ "export_block_id": true,
+ }
+ if !reflect.DeepEqual(exportOption, want) {
+ t.Fatalf("export_option = %#v, want %#v", exportOption, want)
+ }
+}
+
+func TestBuildFetchBodyIncludesReadOption(t *testing.T) {
+ t.Parallel()
+
+ runtime := newFetchBodyTestRuntime(context.Background())
+ mustSetFetchFlag(t, runtime, "scope", "section")
+ mustSetFetchFlag(t, runtime, "start-block-id", "blk_heading")
+
+ body := buildFetchBody(runtime)
+ want := map[string]interface{}{
+ "read_mode": "section",
+ "start_block_id": "blk_heading",
+ }
+ if got := body["read_option"]; !reflect.DeepEqual(got, want) {
+ t.Fatalf("read_option = %#v, want %#v", got, want)
+ }
+}
+
+func TestBuildReadOptionModes(t *testing.T) {
+ t.Parallel()
+
+ tests := []struct {
+ name string
+ setFlags map[string]string
+ want map[string]interface{}
+ }{
+ {
+ name: "full omits read option",
+ setFlags: map[string]string{
+ "scope": "full",
+ },
+ want: nil,
+ },
+ {
+ name: "outline with max depth",
+ setFlags: map[string]string{
+ "scope": "outline",
+ "max-depth": "3",
+ },
+ want: map[string]interface{}{
+ "read_mode": "outline",
+ "max_depth": "3",
+ },
+ },
+ {
+ name: "range with block ids and context",
+ setFlags: map[string]string{
+ "scope": "range",
+ "start-block-id": "blk_start",
+ "end-block-id": "blk_end",
+ "context-before": "2",
+ "context-after": "1",
+ "max-depth": "0",
+ },
+ want: map[string]interface{}{
+ "read_mode": "range",
+ "start_block_id": "blk_start",
+ "end_block_id": "blk_end",
+ "context_before": "2",
+ "context_after": "1",
+ "max_depth": "0",
+ },
+ },
+ {
+ name: "keyword with query",
+ setFlags: map[string]string{
+ "scope": "keyword",
+ "keyword": "foo|bar",
+ "context-before": "1",
+ },
+ want: map[string]interface{}{
+ "read_mode": "keyword",
+ "keyword": "foo|bar",
+ "context_before": "1",
+ },
+ },
+ {
+ name: "section keeps unlimited depth omitted",
+ setFlags: map[string]string{
+ "scope": "section",
+ "start-block-id": "blk_heading",
+ "max-depth": "-1",
+ },
+ want: map[string]interface{}{
+ "read_mode": "section",
+ "start_block_id": "blk_heading",
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ runtime := newFetchBodyTestRuntime(context.Background())
+ for name, value := range tt.setFlags {
+ mustSetFetchFlag(t, runtime, name, value)
+ }
+
+ if got := buildReadOption(runtime); !reflect.DeepEqual(got, tt.want) {
+ t.Fatalf("buildReadOption() = %#v, want %#v", got, tt.want)
+ }
+ })
+ }
+}
+
+func TestValidateReadModeFlagsRejectsInvalidScopeOptions(t *testing.T) {
+ t.Parallel()
+
+ tests := []struct {
+ name string
+ setFlags map[string]string
+ wantParam string
+ wantParams []string
+ }{
+ {
+ name: "negative context before",
+ setFlags: map[string]string{
+ "scope": "range",
+ "start-block-id": "blk_start",
+ "context-before": "-1",
+ },
+ wantParam: "--context-before",
+ },
+ {
+ name: "negative context after",
+ setFlags: map[string]string{
+ "scope": "range",
+ "start-block-id": "blk_start",
+ "context-after": "-1",
+ },
+ wantParam: "--context-after",
+ },
+ {
+ name: "max depth below unlimited sentinel",
+ setFlags: map[string]string{
+ "scope": "range",
+ "start-block-id": "blk_start",
+ "max-depth": "-2",
+ },
+ wantParam: "--max-depth",
+ },
+ {
+ name: "range needs boundary",
+ setFlags: map[string]string{
+ "scope": "range",
+ },
+ wantParams: []string{
+ "--start-block-id",
+ "--end-block-id",
+ },
+ },
+ {
+ name: "keyword needs keyword",
+ setFlags: map[string]string{
+ "scope": "keyword",
+ },
+ wantParam: "--keyword",
+ },
+ {
+ name: "section needs start block",
+ setFlags: map[string]string{
+ "scope": "section",
+ },
+ wantParam: "--start-block-id",
+ },
+ {
+ name: "unknown scope",
+ setFlags: map[string]string{
+ "scope": "bad",
+ },
+ wantParam: "--scope",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ runtime := newFetchBodyTestRuntime(context.Background())
+ for name, value := range tt.setFlags {
+ mustSetFetchFlag(t, runtime, name, value)
+ }
+
+ err := validateReadModeFlags(runtime)
+ if err == nil {
+ t.Fatal("validateReadModeFlags() succeeded, want error")
+ }
+ assertValidationContract(t, err, errs.SubtypeInvalidArgument, tt.wantParam, tt.wantParams...)
+ })
+ }
+}
+
+func TestValidateReadModeFlagsAcceptsValidScopeOptions(t *testing.T) {
+ t.Parallel()
+
+ tests := []struct {
+ name string
+ setFlags map[string]string
+ }{
+ {
+ name: "outline",
+ setFlags: map[string]string{
+ "scope": "outline",
+ },
+ },
+ {
+ name: "range with end block",
+ setFlags: map[string]string{
+ "scope": "range",
+ "end-block-id": "blk_end",
+ },
+ },
+ {
+ name: "keyword with keyword",
+ setFlags: map[string]string{
+ "scope": "keyword",
+ "keyword": "bug|缺陷",
+ },
+ },
+ {
+ name: "section with start block",
+ setFlags: map[string]string{
+ "scope": "section",
+ "start-block-id": "blk_heading",
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ runtime := newFetchBodyTestRuntime(context.Background())
+ for name, value := range tt.setFlags {
+ mustSetFetchFlag(t, runtime, name, value)
+ }
+
+ if err := validateReadModeFlags(runtime); err != nil {
+ t.Fatalf("validateReadModeFlags() error = %v", err)
+ }
+ })
+ }
+}
+
+func TestValidateFetchV2RejectsInvalidDocAndScope(t *testing.T) {
+ t.Parallel()
+
+ tests := []struct {
+ name string
+ setFlags map[string]string
+ wantParam string
+ }{
+ {
+ name: "invalid doc",
+ setFlags: map[string]string{
+ "doc": "https://example.com/sheets/sht_token",
+ },
+ wantParam: "--doc",
+ },
+ {
+ name: "invalid scope",
+ setFlags: map[string]string{
+ "scope": "bad",
+ },
+ wantParam: "--scope",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ runtime := newFetchShortcutTestRuntime(t, "", tt.setFlags)
+ err := validateFetchV2(context.Background(), runtime)
+ if err == nil {
+ t.Fatal("validateFetchV2() succeeded, want error")
+ }
+ assertValidationContract(t, err, errs.SubtypeInvalidArgument, tt.wantParam)
+ })
+ }
+}
+
+func TestAddFetchDetailDowngradeWarningNoops(t *testing.T) {
+ t.Parallel()
+
+ tests := []struct {
+ name string
+ setFlags map[string]string
+ }{
+ {
+ name: "xml format",
+ setFlags: map[string]string{
+ "doc-format": "xml",
+ "detail": "full",
+ },
+ },
+ {
+ name: "markdown simple detail",
+ setFlags: map[string]string{
+ "doc-format": "markdown",
+ "detail": "simple",
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ runtime := newFetchBodyTestRuntime(context.Background())
+ for name, value := range tt.setFlags {
+ mustSetFetchFlag(t, runtime, name, value)
+ }
+
+ data := map[string]interface{}{}
+ if got := addFetchDetailDowngradeWarning(runtime, data); got != "" {
+ t.Fatalf("warning = %q, want empty", got)
+ }
+ if _, ok := data["warnings"]; ok {
+ t.Fatalf("unexpected warnings: %#v", data["warnings"])
+ }
+ })
+ }
+}
+
func TestDocsFetchDryRunDefaultsToV2Endpoint(t *testing.T) {
t.Parallel()
@@ -141,36 +507,54 @@ func TestDocsFetchAPIVersionV1StillUsesV2Endpoint(t *testing.T) {
}
}
+func TestDocsFetchIMMarkdownRequestsMarkdownFromAPI(t *testing.T) {
+ t.Parallel()
+
+ runtime := newFetchShortcutTestRuntime(t, "", map[string]string{
+ "doc-format": "im-markdown",
+ })
+ if err := validateFetchV2(context.Background(), runtime); err != nil {
+ t.Fatalf("validateFetchV2() error = %v", err)
+ }
+
+ dry := decodeDocDryRun(t, DocsFetch.DryRun(context.Background(), runtime))
+ if got, want := dry.API[0].Body["format"], "markdown"; got != want {
+ t.Fatalf("dry-run format = %#v, want %q", got, want)
+ }
+}
+
func TestDocsFetchMarkdownDetailDowngradesToSimple(t *testing.T) {
t.Parallel()
- for _, detail := range []string{"with-ids", "full"} {
- t.Run(detail, func(t *testing.T) {
- t.Parallel()
+ for _, format := range []string{"markdown", "im-markdown"} {
+ for _, detail := range []string{"with-ids", "full"} {
+ t.Run(format+"/"+detail, func(t *testing.T) {
+ t.Parallel()
- runtime := newFetchShortcutTestRuntime(t, "", map[string]string{
- "doc-format": "markdown",
- "detail": detail,
+ runtime := newFetchShortcutTestRuntime(t, "", map[string]string{
+ "doc-format": format,
+ "detail": detail,
+ })
+ if err := validateFetchV2(context.Background(), runtime); err != nil {
+ t.Fatalf("validateFetchV2() error = %v", err)
+ }
+
+ dry := decodeDocDryRun(t, DocsFetch.DryRun(context.Background(), runtime))
+ exportOption, _ := dry.API[0].Body["export_option"].(map[string]interface{})
+ if exportOption == nil {
+ t.Fatalf("missing export_option: %#v", dry.API[0].Body)
+ }
+ if got := exportOption["export_block_id"]; got != false {
+ t.Fatalf("export_block_id = %#v, want false after markdown detail downgrade", got)
+ }
+ if got := exportOption["export_style_attrs"]; got != false {
+ t.Fatalf("export_style_attrs = %#v, want false after markdown detail downgrade", got)
+ }
+ if got := exportOption["export_cite_extra_data"]; got != false {
+ t.Fatalf("export_cite_extra_data = %#v, want false after markdown detail downgrade", got)
+ }
})
- if err := validateFetchV2(context.Background(), runtime); err != nil {
- t.Fatalf("validateFetchV2() error = %v", err)
- }
-
- dry := decodeDocDryRun(t, DocsFetch.DryRun(context.Background(), runtime))
- exportOption, _ := dry.API[0].Body["export_option"].(map[string]interface{})
- if exportOption == nil {
- t.Fatalf("missing export_option: %#v", dry.API[0].Body)
- }
- if got := exportOption["export_block_id"]; got != false {
- t.Fatalf("export_block_id = %#v, want false after markdown detail downgrade", got)
- }
- if got := exportOption["export_style_attrs"]; got != false {
- t.Fatalf("export_style_attrs = %#v, want false after markdown detail downgrade", got)
- }
- if got := exportOption["export_cite_extra_data"]; got != false {
- t.Fatalf("export_cite_extra_data = %#v, want false after markdown detail downgrade", got)
- }
- })
+ }
}
}
@@ -261,6 +645,107 @@ func TestDocsFetchMarkdownDetailDowngradeWarnsInPrettyOutput(t *testing.T) {
}
}
+func TestDocsFetchV2ReturnsAPIError(t *testing.T) {
+ t.Setenv("LARKSUITE_CLI_CONFIG_DIR", t.TempDir())
+
+ f, stdout, _, reg := cmdutil.TestFactory(t, docsTestConfigWithAppID("docs-fetch-api-error"))
+ reg.Register(&httpmock.Stub{
+ Method: "POST",
+ URL: "/open-apis/docs_ai/v1/documents/doxcnFetchAPIError/fetch",
+ Body: map[string]interface{}{
+ "code": 999999,
+ "msg": "fetch failed",
+ },
+ })
+
+ err := mountAndRunDocs(t, DocsFetch, []string{
+ "+fetch",
+ "--doc", "doxcnFetchAPIError",
+ "--as", "bot",
+ }, f, stdout)
+ if err == nil {
+ t.Fatal("mountAndRunDocs() succeeded, want API error")
+ }
+ var apiErr *errs.APIError
+ if !errors.As(err, &apiErr) {
+ t.Fatalf("error type = %T, want *errs.APIError (%v)", err, err)
+ }
+ p, ok := errs.ProblemOf(err)
+ if !ok {
+ t.Fatalf("ProblemOf() ok = false for %T (%v)", err, err)
+ }
+ if p.Category != errs.CategoryAPI {
+ t.Errorf("category = %q, want %q", p.Category, errs.CategoryAPI)
+ }
+ if p.Subtype != errs.SubtypeUnknown {
+ t.Errorf("subtype = %q, want %q", p.Subtype, errs.SubtypeUnknown)
+ }
+ if p.Code != 999999 {
+ t.Errorf("code = %d, want 999999", p.Code)
+ }
+ if p.Message != "fetch failed" {
+ t.Errorf("message = %q, want %q", p.Message, "fetch failed")
+ }
+ if cause := errors.Unwrap(err); cause != nil {
+ t.Fatalf("unexpected wrapped cause for API response error: %T %v", cause, cause)
+ }
+}
+
+func TestDocsFetchIMMarkdownConvertsContentInJSONOutput(t *testing.T) {
+ t.Setenv("LARKSUITE_CLI_CONFIG_DIR", t.TempDir())
+
+ f, stdout, _, reg := cmdutil.TestFactory(t, docsTestConfigWithAppID("docs-fetch-im-markdown"))
+ reg.Register(&httpmock.Stub{
+ Method: "POST",
+ URL: "/open-apis/docs_ai/v1/documents/doxcnFetchIMMarkdown/fetch",
+ Body: map[string]interface{}{
+ "code": 0,
+ "msg": "ok",
+ "data": map[string]interface{}{
+ "document": map[string]interface{}{
+ "document_id": "doxcnFetchIMMarkdown",
+ "revision_id": float64(1),
+ "content": strings.Join([]string{
+ `Doc Title `,
+ `Read **this**. `,
+ ` `,
+ }, "\n\n"),
+ },
+ },
+ },
+ })
+
+ err := mountAndRunDocs(t, DocsFetch, []string{
+ "+fetch",
+ "--doc", "doxcnFetchIMMarkdown",
+ "--doc-format", "im-markdown",
+ "--as", "bot",
+ }, f, stdout)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+
+ var envelope map[string]interface{}
+ if err := json.Unmarshal(stdout.Bytes(), &envelope); err != nil {
+ t.Fatalf("decode output: %v\nraw=%s", err, stdout.String())
+ }
+ data, _ := envelope["data"].(map[string]interface{})
+ doc, _ := data["document"].(map[string]interface{})
+ content, _ := doc["content"].(string)
+ for _, want := range []string{
+ "# Doc Title",
+ "---\n💡 Read **this**.\n---",
+ "[Example](https://example.com)",
+ } {
+ if !strings.Contains(content, want) {
+ t.Fatalf("converted content missing %q:\n%s", want, content)
+ }
+ }
+ if strings.Contains(content, "") || strings.Contains(content, " [flags]`)
| Shortcut | 说明 |
|----------|------|
| [`+create`](references/lark-doc-create.md) | Create a Lark document (XML / Markdown) |
-| [`+fetch`](references/lark-doc-fetch.md) | Fetch Lark document content (XML / Markdown) |
+| [`+fetch`](references/lark-doc-fetch.md) | Fetch Lark document content (XML / Markdown / im-markdown; `im-markdown` only after fetch for `lark-im`) |
| [`+update`](references/lark-doc-update.md) | Update a Lark document (str_replace / block_insert_after / block_replace / ...) |
| [`+media-insert`](references/lark-doc-media-insert.md) | Insert a local image or file at the end of a Lark document (4-step orchestration + auto-rollback). Prefer `--from-clipboard` when the image is already on the system clipboard (screenshots, copy from Feishu/browser); use `--file` only for on-disk sources. |
| [`+media-download`](references/lark-doc-media-download.md) | Download document media or whiteboard thumbnail (auto-detects extension) |
diff --git a/skills/lark-doc/references/lark-doc-fetch.md b/skills/lark-doc/references/lark-doc-fetch.md
index 911a390a..b558b8dc 100644
--- a/skills/lark-doc/references/lark-doc-fetch.md
+++ b/skills/lark-doc/references/lark-doc-fetch.md
@@ -91,7 +91,7 @@ lark-cli docs +fetch --api-version v2 --doc Z1Fj...tnAc \
}
```
-`content` 的格式由 `--doc-format` 决定。设置 `--scope` 时会被 `` 包裹,详见上文"局部读取的输出结构"。
+`content` 的格式由 `--doc-format` 决定;`im-markdown` 仅用于获取内容后在 `lark-im` 场景下使用。设置 `--scope` 时会被 `` 包裹,详见上文"局部读取的输出结构"。
## 参数
@@ -99,7 +99,7 @@ lark-cli docs +fetch --api-version v2 --doc Z1Fj...tnAc \
|------|------|------|
| `--api-version` | 是 | 固定传 `v2` |
| `--doc` | 是 | 文档 URL 或 token(支持 `/docx/` 和 `/wiki/`) |
-| `--doc-format` | 否 | `xml`(默认)\| `markdown` \| `text` |
+| `--doc-format` | 否 | `xml`(默认)\| `markdown` \| `text` \| `im-markdown`(仅用于获取内容后在 `lark-im` 场景下使用) |
| `--detail` | 否 | `simple`(默认)\| `with-ids` \| `full` |
| `--revision-id` | 否 | 文档版本号,`-1` = 最新(默认) |
| `--scope` | 否 | `outline` \| `range` \| `keyword` \| `section`(省略 = 读整篇) |
diff --git a/skills/lark-doc/references/lark-doc-md.md b/skills/lark-doc/references/lark-doc-md.md
index 4d267e7b..42748481 100644
--- a/skills/lark-doc/references/lark-doc-md.md
+++ b/skills/lark-doc/references/lark-doc-md.md
@@ -1,6 +1,6 @@
# Markdown 格式参考
-`docs +fetch --api-version v2` / `docs +create --api-version v2` / `docs +update --api-version v2` 使用 `--doc-format markdown` 时适用。
+`docs +fetch --api-version v2` / `docs +create --api-version v2` / `docs +update --api-version v2` 使用 `--doc-format markdown` 时适用;fetch 的 `--doc-format im-markdown` 仅用于获取内容后在 `lark-im` 场景下使用,不作为 create/update 写入格式。
## 创建文档标题
diff --git a/skills/lark-im/SKILL.md b/skills/lark-im/SKILL.md
index 983c66d2..507cf87f 100644
--- a/skills/lark-im/SKILL.md
+++ b/skills/lark-im/SKILL.md
@@ -67,6 +67,10 @@ Card messages (`interactive` type) are not yet supported for compact conversion
`--audio` sends a voice message and supports only Opus audio files, for example `.opus` files or Ogg Opus (`.ogg`) files. For `mp3`, `wav`, or other non-Opus audio, either convert to `.opus` first and keep using `--audio`, or send the original file as an attachment with `--file`.
+### Sending Doc Content as a Message
+
+When sending content fetched from a Lark doc as a message, fetch the doc with --doc-format im-markdown, then send it as a message using the --markdown format. The fetched content is already in markdown; in any content-forwarding scenario, keep the fetched original text and send it in the --markdown format. Note: if the doc contains a cite tag with type="user", keep it as-is and do not strip the tag.
+
### Flag Types
Flags support two layers: