Files
larksuite-cli/cmd/cmdexample_parse_test.go
sang-neo03 a4a4bd6ee0 feat: check shortcut example commands against the live CLI tree (#1244)
Validate the example commands embedded in shortcut definitions (the
"Example: lark-cli ..." lines in each shortcut's Tips, shown in --help)
against the real command tree built by cmd.Build. Implemented entirely as
test-only code in cmd/ (package cmd_test), so it ships in no binary and is
not importable by product code; the truth source is cmd.Build, the same
tree the binary uses, so the check cannot drift. It runs in the standard
unit-test CI job (go test ./cmd/...); a renamed command or unaccepted flag
in an example fails that job.
2026-06-05 10:59:55 +08:00

223 lines
7.4 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT
package cmd_test
import (
"regexp"
"strings"
)
// ref is one lark-cli command reference extracted from a shortcut example.
type ref struct {
line int // 1-based line number (the line where the command starts)
raw string // reconstructed command text, for error display
words []string // command words before the first flag (subcommand candidates)
flags []string // flag tokens used, e.g. "--query", "-q"
}
const cliToken = "lark-cli"
// subcommandStart guards against false positives from prose: a real command's
// first word is ASCII (a service name or a +shortcut). A token starting with
// CJK / punctuation is treated as narration, not a command.
var subcommandStart = regexp.MustCompile(`^[A-Za-z+]`)
// shellStops are standalone tokens that terminate a command (pipes, redirects,
// separators). Separators glued to a token (`get;`, `foo|`) are handled inline.
var shellStops = map[string]bool{
"|": true, "||": true, "&&": true, "&": true, ";": true,
">": true, ">>": true, "<": true, "2>": true, "2>&1": true,
}
// wordTrailPunct is sentence / CJK punctuation that can cling to a command word
// in prose ("auth login." / "auth login"); stripped so the word still resolves
// instead of being dropped as an unknown command or non-ASCII narration.
const wordTrailPunct = `.,;:!?"')]},。、;:!?)】」』`
// parseRefs extracts every lark-cli command reference from text (a shortcut's
// Tips line, which may embed an "Example: lark-cli ..." command). It is
// deliberately format-agnostic: it keys on the "lark-cli" token whether it sits
// in a ```bash fence, an inline `code` span, or bare prose. Backslash
// line-continuations are joined first so a multi-line invocation is parsed as
// one command; inline-code backticks and trailing # comments terminate it.
func parseRefs(content string) []ref {
var refs []ref
lines := strings.Split(content, "\n")
for i := 0; i < len(lines); i++ {
lineNo := i + 1
logical := lines[i]
// Shell line continuation: a trailing backslash joins the next physical
// line. Without this, flags on the continuation lines of a multi-line
// `lark-cli ... \` example are never seen by the checker.
for endsWithBackslash(logical) && i+1 < len(lines) {
logical = strings.TrimRight(logical, " \t")
logical = logical[:len(logical)-1] // drop the trailing backslash
i++
logical += " " + lines[i]
}
refs = append(refs, parseLine(logical, lineNo)...)
}
return refs
}
func endsWithBackslash(s string) bool {
return strings.HasSuffix(strings.TrimRight(s, " \t"), `\`)
}
func parseLine(line string, lineNo int) []ref {
var refs []ref
rest := line
for {
idx := strings.Index(rest, cliToken)
if idx < 0 {
break
}
after := rest[idx+len(cliToken):]
beforeOK := idx == 0 || isBoundary(rest[idx-1])
afterOK := after == "" || isBoundary(after[0])
if beforeOK && afterOK {
if words, flags, raw, ok := parseCmd(after); ok {
refs = append(refs, ref{line: lineNo, raw: cliToken + raw, words: words, flags: flags})
}
}
rest = after
}
return refs
}
// parseCmd tokenizes the text following "lark-cli" into leading command words
// (the subcommand path, up to the first flag) and flag tokens. It stops at a
// shell separator (standalone or glued), an inline-code backtick, a comment, or
// a placeholder/prose word. ok=false filters out non-commands.
func parseCmd(after string) (words, flags []string, raw string, ok bool) {
// An inline code span ends at the next backtick; a command never spans one.
if i := strings.IndexByte(after, '`'); i >= 0 {
after = after[:i]
}
// Drop $(...) command substitutions so flags belonging to the inner command
// (e.g. `--data "$(jq -n --arg x ...)"`) are not mistaken for lark-cli flags.
after = stripCmdSubst(after)
var kept []string
inFlags := false
for _, orig := range strings.Fields(after) {
tok := orig
if shellStops[tok] || strings.HasPrefix(tok, "#") {
break
}
// A shell separator glued to a token ends the command mid-token
// ("get;", "foo|next"): keep the part before it, handle it, then stop.
stop := false
if i := strings.IndexAny(tok, ";|"); i >= 0 {
tok, stop = tok[:i], true
}
switch {
case tok == "" || tok == "-":
// empty (after a glued separator) or a bare stdin marker — skip
case strings.HasPrefix(tok, "-"):
if f := normalizeFlag(tok); f != "" {
inFlags = true
flags = append(flags, f)
kept = append(kept, tok)
}
case inFlags:
// positional / flag value after the first flag — not a command word
kept = append(kept, tok)
default:
// Command-path word. ASCII placeholder markers (<x>, [x], {x|y},
// +<verb>, ...) end the command — checked on the RAW token so the
// trailing-punct stripping below cannot erase a "..." ellipsis
// ("base +..." must stay a placeholder, not become "+").
if strings.ContainsAny(tok, "<>[]{}|") || strings.Contains(tok, "...") {
stop = true
break
}
// Strip trailing sentence/CJK punctuation so "login." / "login"
// resolve to "login"; non-ASCII narration ends the command.
w := strings.TrimRight(tok, wordTrailPunct)
if w == "" || hasNonASCII(w) {
stop = true
break
}
words = append(words, w)
kept = append(kept, tok)
}
if stop {
break
}
}
if len(kept) > 0 {
raw = " " + strings.Join(kept, " ")
}
// Keep root-only refs ("lark-cli --help") and refs whose first word looks
// like a subcommand; drop prose ("lark-cli 就能搞定 ...").
if len(words) == 0 {
return words, flags, raw, len(flags) > 0
}
if !subcommandStart.MatchString(words[0]) {
return nil, nil, "", false
}
return words, flags, raw, true
}
// stripCmdSubst removes $(...) command substitutions (including nested ones)
// from s, leaving the surrounding text intact. Backtick substitutions are
// already handled upstream (a command never spans a backtick).
func stripCmdSubst(s string) string {
var b strings.Builder
depth := 0
for i := 0; i < len(s); i++ {
if depth == 0 && i+1 < len(s) && s[i] == '$' && s[i+1] == '(' {
depth = 1
i++ // skip '('
continue
}
if depth > 0 {
switch s[i] {
case '(':
depth++
case ')':
depth--
}
continue
}
b.WriteByte(s[i])
}
return b.String()
}
// isPlaceholderOrProse reports whether a command word is a doc placeholder
// (<resource>, [flags], {a|b}, +<verb>, ...) or narration (CJK / other
// non-ASCII), rather than a literal command token.
func isPlaceholderOrProse(w string) bool {
if hasNonASCII(w) {
return true
}
return strings.ContainsAny(w, "<>[]{}|") || strings.Contains(w, "...")
}
func hasNonASCII(s string) bool {
return strings.IndexFunc(s, func(r rune) bool { return r > 127 }) >= 0
}
// flagShape matches the leading flag token, stripping any trailing junk such as
// a "=value" suffix or punctuation that bled in from the surrounding markdown
// ("--help\"", "--help;", "--params={}"). The underscore is allowed because
// real flags use it ("--input_format", "--output_as"). Returns "" for non-flags.
var flagShape = regexp.MustCompile(`^--?[A-Za-z][A-Za-z0-9_-]*`)
// normalizeFlag extracts the canonical flag token from tok, or "" if tok is not
// a real flag (e.g. a shell-string fragment like "-草稿'").
func normalizeFlag(tok string) string {
return flagShape.FindString(tok)
}
func isBoundary(b byte) bool {
switch b {
case ' ', '\t', '`', '(', ')', '\'', '"', '*':
return true
}
return false
}