feat: resolve markdown blank-line formatting inconsistency in post messages (#1216)

Simplifies the markdown-to-post rendering pipeline in the IM shortcut. The previous
implementation split markdown at blank-line boundaries into multiple post paragraphs,
using zero-width space (\u200B) sentinel characters to preserve visual spacing.
While well-intentioned, this approach introduced fragility around edge cases such as
blank lines inside fenced code blocks, messages with only blank lines, and interactions
with the heading-normalization pass. This change consolidates rendering back into a
single {"tag":"md"} segment, making the output more predictable, the code significantly
easier to follow, and the test surface easier to maintain.
Change-Id: Ic2870ecbcb31ae7d36121f120102f2ff964f5169
This commit is contained in:
91-enjoy
2026-06-02 17:49:45 +08:00
committed by GitHub
parent e57d97f341
commit 0aa9e96d18
3 changed files with 33 additions and 267 deletions

View File

@@ -102,9 +102,6 @@ func TestResolveMarkdownAsPost(t *testing.T) {
if !strings.Contains(got, `"tag":"md"`) {
t.Fatalf("resolveMarkdownAsPost() = %q, want post payload", got)
}
if !strings.Contains(got, `"tag":"text"`) {
t.Fatalf("resolveMarkdownAsPost() = %q, want segmented blank-line text paragraph", got)
}
if !strings.Contains(got, `#### Title`) || !strings.Contains(got, `##### Subtitle`) {
t.Fatalf("resolveMarkdownAsPost() = %q, want optimized heading levels", got)
}

View File

@@ -817,49 +817,25 @@ func readMp4Duration(f fileio.File, fileSize int64) int64 {
// 5. Compress excess blank lines
// 6. Strip invalid image references (keep only img_xxx keys)
var (
reH2toH6 = regexp.MustCompile(`(?m)^#{2,6} (.+)$`)
reH1 = regexp.MustCompile(`(?m)^# (.+)$`)
reHasH1toH3 = regexp.MustCompile(`(?m)^#{1,3} `)
reConsecH = regexp.MustCompile(`(?m)^(#{4,5} .+)\n{1,2}(#{4,5} )`)
reTableNoGap = regexp.MustCompile(`(?m)^([^|\n].*)\n(\|.+\|)`)
reTableAfter = regexp.MustCompile(`(?m)((?:^\|.+\|[^\S\n]*\n?)+)`)
reExcessNL = regexp.MustCompile(`\n{3,}`)
reInvalidImg = regexp.MustCompile(`!\[[^\]]*\]\(([^)\s]+)\)`)
reCodeBlock = regexp.MustCompile("```[\\s\\S]*?```")
reBlankLineSeparator = regexp.MustCompile(`\n(?:[ \t]*\n)+`)
reH2toH6 = regexp.MustCompile(`(?m)^#{2,6} (.+)$`)
reH1 = regexp.MustCompile(`(?m)^# (.+)$`)
reHasH1toH3 = regexp.MustCompile(`(?m)^#{1,3} `)
reConsecH = regexp.MustCompile(`(?m)^(#{4,5} .+)\n{1,2}(#{4,5} )`)
reTableNoGap = regexp.MustCompile(`(?m)^([^|\n].*)\n(\|.+\|)`)
reTableAfter = regexp.MustCompile(`(?m)((?:^\|.+\|[^\S\n]*\n?)+)`)
reExcessNL = regexp.MustCompile(`\n{3,}`)
reInvalidImg = regexp.MustCompile(`!\[[^\]]*\]\(([^)\s]+)\)`)
reCodeBlock = regexp.MustCompile("```[\\s\\S]*?```")
)
const (
markdownCodeBlockPlaceholder = "___CB_"
postBlankLinePlaceholder = "\u200B"
)
type markdownPart struct {
text string
newlineCount int
isSeparator bool
}
func protectMarkdownCodeBlocks(text string) (string, []string) {
var codeBlocks []string
protected := reCodeBlock.ReplaceAllStringFunc(text, func(m string) string {
idx := len(codeBlocks)
codeBlocks = append(codeBlocks, m)
return fmt.Sprintf("%s%d___", markdownCodeBlockPlaceholder, idx)
})
return protected, codeBlocks
}
func restoreMarkdownCodeBlocks(text string, codeBlocks []string) string {
restored := text
for i, block := range codeBlocks {
restored = strings.Replace(restored, fmt.Sprintf("%s%d___", markdownCodeBlockPlaceholder, i), block, 1)
}
return restored
}
func optimizeMarkdownStyle(text string) string {
r, codeBlocks := protectMarkdownCodeBlocks(text)
const mark = "___CB_"
var codeBlocks []string
r := reCodeBlock.ReplaceAllStringFunc(text, func(m string) string {
idx := len(codeBlocks)
codeBlocks = append(codeBlocks, m)
return fmt.Sprintf("%s%d___", mark, idx)
})
// Only downgrade when original text has H1~H3; order matters (H2~H6 first).
if reHasH1toH3.MatchString(text) {
@@ -872,7 +848,9 @@ func optimizeMarkdownStyle(text string) string {
r = reTableNoGap.ReplaceAllString(r, "$1\n\n$2")
r = reTableAfter.ReplaceAllString(r, "$1\n")
r = restoreMarkdownCodeBlocks(r, codeBlocks)
for i, block := range codeBlocks {
r = strings.Replace(r, fmt.Sprintf("%s%d___", mark, i), block, 1)
}
r = reExcessNL.ReplaceAllString(r, "\n\n")
@@ -891,109 +869,12 @@ func optimizeMarkdownStyle(text string) string {
return r
}
func shouldUseSegmentedPost(markdown string) bool {
protected, _ := protectMarkdownCodeBlocks(markdown)
return reBlankLineSeparator.MatchString(protected)
}
func splitMarkdownByBlankLines(markdown string) []markdownPart {
protected, codeBlocks := protectMarkdownCodeBlocks(markdown)
locs := reBlankLineSeparator.FindAllStringIndex(protected, -1)
if len(locs) == 0 {
return []markdownPart{{text: markdown}}
}
parts := make([]markdownPart, 0, len(locs)*2+1)
last := 0
for _, loc := range locs {
if loc[0] > last {
content := restoreMarkdownCodeBlocks(protected[last:loc[0]], codeBlocks)
if content != "" {
parts = append(parts, markdownPart{text: content})
}
}
separator := protected[loc[0]:loc[1]]
parts = append(parts, markdownPart{
isSeparator: true,
newlineCount: strings.Count(separator, "\n"),
})
last = loc[1]
}
if last < len(protected) {
content := restoreMarkdownCodeBlocks(protected[last:], codeBlocks)
if content != "" {
parts = append(parts, markdownPart{text: content})
}
}
if len(parts) == 0 {
return []markdownPart{{text: markdown}}
}
return parts
}
func marshalMarkdownPostContent(content [][]map[string]interface{}) string {
payload := map[string]interface{}{
"zh_cn": map[string]interface{}{
"content": content,
},
}
data, _ := json.Marshal(payload)
return string(data)
}
func buildSingleMDPost(markdown string) string {
return marshalMarkdownPostContent([][]map[string]interface{}{
{{
"tag": "md",
"text": optimizeMarkdownStyle(markdown),
}},
})
}
func buildSegmentedPost(markdown string) string {
parts := splitMarkdownByBlankLines(markdown)
content := make([][]map[string]interface{}, 0, len(parts))
for _, part := range parts {
if part.isSeparator {
for i := 1; i < part.newlineCount; i++ {
content = append(content, []map[string]interface{}{{
"tag": "text",
"text": postBlankLinePlaceholder,
}})
}
continue
}
if part.text == "" {
continue
}
optimized := strings.Trim(optimizeMarkdownStyle(part.text), "\n")
if optimized == "" {
continue
}
content = append(content, []map[string]interface{}{{
"tag": "md",
"text": optimized,
}})
}
if len(content) == 0 {
return buildSingleMDPost(markdown)
}
return marshalMarkdownPostContent(content)
}
func buildMarkdownPostContent(markdown string) string {
if shouldUseSegmentedPost(markdown) {
return buildSegmentedPost(markdown)
}
return buildSingleMDPost(markdown)
}
// wrapMarkdownAsPost wraps markdown text into Feishu post format JSON (no network).
// Used by DryRun. Output may include md/text paragraphs when blank-line separators are present.
// Used by DryRun. Output: {"zh_cn":{"content":[[{"tag":"md","text":"..."}]]}}
func wrapMarkdownAsPost(markdown string) string {
return buildMarkdownPostContent(markdown)
optimized := optimizeMarkdownStyle(markdown)
inner, _ := json.Marshal(optimized)
return `{"zh_cn":{"content":[[{"tag":"md","text":` + string(inner) + `}]]}}`
}
var reMarkdownImage = regexp.MustCompile(`!\[[^\]]*\]\((https?://[^)\s]+)\)`)
@@ -1028,7 +909,9 @@ func wrapMarkdownAsPostForDryRun(markdown string) (content, desc string) {
// and wraps as post format JSON. Used by Execute (makes network calls).
func resolveMarkdownAsPost(ctx context.Context, runtime *common.RuntimeContext, markdown string) string {
resolved := resolveMarkdownImageURLs(ctx, runtime, markdown)
return buildMarkdownPostContent(resolved)
optimized := optimizeMarkdownStyle(resolved)
inner, _ := json.Marshal(optimized)
return `{"zh_cn":{"content":[[{"tag":"md","text":` + string(inner) + `}]]}}`
}
// resolveMarkdownImageURLs finds ![alt](https://...) in markdown, downloads each URL,

View File

@@ -6,7 +6,6 @@ package im
import (
"context"
"encoding/binary"
"encoding/json"
"errors"
"fmt"
"net/http"
@@ -17,36 +16,6 @@ import (
"github.com/larksuite/cli/shortcuts/common"
)
func decodePostContentForTest(t *testing.T, raw string) []interface{} {
t.Helper()
var payload map[string]interface{}
if err := json.Unmarshal([]byte(raw), &payload); err != nil {
t.Fatalf("json.Unmarshal() error = %v, raw=%s", err, raw)
}
locale, _ := payload["zh_cn"].(map[string]interface{})
content, _ := locale["content"].([]interface{})
if content == nil {
t.Fatalf("post content missing: %#v", payload)
}
return content
}
func decodePostParagraphForTest(t *testing.T, raw string, idx int) map[string]interface{} {
t.Helper()
content := decodePostContentForTest(t, raw)
if idx >= len(content) {
t.Fatalf("paragraph index %d out of range, len=%d, raw=%s", idx, len(content), raw)
}
paragraph, _ := content[idx].([]interface{})
if len(paragraph) != 1 {
t.Fatalf("paragraph %d = %#v, want single node", idx, paragraph)
}
node, _ := paragraph[0].(map[string]interface{})
return node
}
func TestNormalizeAtMentions(t *testing.T) {
input := `<at id=ou_alpha/> hi <at open_id="ou_beta"> and <at user_id=ou_gamma /> and <at email="x@example.com"/>`
got := normalizeAtMentions(input)
@@ -171,16 +140,6 @@ func TestWrapMarkdownAsPostForDryRun(t *testing.T) {
}
}
func TestWrapMarkdownAsPostForDryRun_SegmentedBlankLines(t *testing.T) {
content, _ := wrapMarkdownAsPostForDryRun("hello\n\n![alt](https://example.com/a.png)")
if !strings.Contains(content, `![alt](img_dryrun_1)`) {
t.Fatalf("wrapMarkdownAsPostForDryRun(segmented) content = %q, want placeholder img key", content)
}
if !strings.Contains(content, `"tag":"text"`) {
t.Fatalf("wrapMarkdownAsPostForDryRun(segmented) content = %q, want blank-line text paragraph", content)
}
}
func TestResolveMediaContentWithoutUploads(t *testing.T) {
tests := []struct {
name string
@@ -375,88 +334,15 @@ func TestOptimizeMarkdownStyle(t *testing.T) {
func TestWrapMarkdownAsPost(t *testing.T) {
got := wrapMarkdownAsPost("hello **world**")
content := decodePostContentForTest(t, got)
if len(content) != 1 {
t.Fatalf("wrapMarkdownAsPost() content len = %d, want 1", len(content))
// Should produce valid JSON with post structure
if !strings.Contains(got, `"tag":"md"`) {
t.Fatalf("wrapMarkdownAsPost() missing md tag: %s", got)
}
node := decodePostParagraphForTest(t, got, 0)
if node["tag"] != "md" {
t.Fatalf("wrapMarkdownAsPost() tag = %#v, want md", node["tag"])
if !strings.Contains(got, `"zh_cn"`) {
t.Fatalf("wrapMarkdownAsPost() missing zh_cn: %s", got)
}
if node["text"] != "hello **world**" {
t.Fatalf("wrapMarkdownAsPost() text = %#v, want %q", node["text"], "hello **world**")
}
}
func TestShouldUseSegmentedPost(t *testing.T) {
tests := []struct {
name string
markdown string
want bool
}{
{name: "single newline", markdown: "a\nb", want: false},
{name: "blank line", markdown: "a\n\nb", want: true},
{name: "blank line with spaces", markdown: "a\n \nb", want: true},
{name: "multiple blank lines", markdown: "a\n \n \n b", want: true},
{name: "blank lines inside code block only", markdown: "```go\n\n\nfmt.Println(1)\n```\nnext", want: false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := shouldUseSegmentedPost(tt.markdown); got != tt.want {
t.Fatalf("shouldUseSegmentedPost(%q) = %v, want %v", tt.markdown, got, tt.want)
}
})
}
}
func TestWrapMarkdownAsPost_SegmentedBlankLines(t *testing.T) {
got := wrapMarkdownAsPost("a\n\nb")
content := decodePostContentForTest(t, got)
if len(content) != 3 {
t.Fatalf("wrapMarkdownAsPost(a\\n\\nb) content len = %d, want 3", len(content))
}
first := decodePostParagraphForTest(t, got, 0)
if first["tag"] != "md" || first["text"] != "a" {
t.Fatalf("first paragraph = %#v, want md/a", first)
}
second := decodePostParagraphForTest(t, got, 1)
if second["tag"] != "text" || second["text"] != postBlankLinePlaceholder {
t.Fatalf("second paragraph = %#v, want blank text placeholder", second)
}
third := decodePostParagraphForTest(t, got, 2)
if third["tag"] != "md" || third["text"] != "b" {
t.Fatalf("third paragraph = %#v, want md/b", third)
}
}
func TestWrapMarkdownAsPost_SegmentedMultipleBlankLines(t *testing.T) {
got := wrapMarkdownAsPost("a\n\n\nb")
content := decodePostContentForTest(t, got)
if len(content) != 4 {
t.Fatalf("wrapMarkdownAsPost(a\\n\\n\\nb) content len = %d, want 4", len(content))
}
for i := 1; i <= 2; i++ {
node := decodePostParagraphForTest(t, got, i)
if node["tag"] != "text" || node["text"] != postBlankLinePlaceholder {
t.Fatalf("blank paragraph %d = %#v, want blank text placeholder", i, node)
}
}
}
func TestWrapMarkdownAsPost_SegmentedBlankLinesWithSpaces(t *testing.T) {
got := wrapMarkdownAsPost("a\n \nb")
content := decodePostContentForTest(t, got)
if len(content) != 3 {
t.Fatalf("wrapMarkdownAsPost(a\\n \\nb) content len = %d, want 3", len(content))
}
node := decodePostParagraphForTest(t, got, 1)
if node["tag"] != "text" || node["text"] != postBlankLinePlaceholder {
t.Fatalf("middle paragraph = %#v, want blank text placeholder", node)
if !strings.Contains(got, "hello **world**") {
t.Fatalf("wrapMarkdownAsPost() missing content: %s", got)
}
}