From e93e2a98e187c14afe2319808b0ed0394fb41ff1 Mon Sep 17 00:00:00 2001 From: raistlin042 Date: Mon, 25 May 2026 23:24:40 +0800 Subject: [PATCH] feat(apps): replace +html-publish cwd hard-reject with credential-file scan (#1072) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(apps): replace +html-publish cwd hard-reject with credential-file scan The previous --path == "." block was a coarse heuristic: it caught the common foot-gun of publishing a repo root, but also rejected legitimate clean cwds, and let a ./dist with a forgotten .env ship the secret through anyway (the sensitive-paths scanner was advisory and never ran on the Execute path). Move the gate from path shape to path content: - Validate now walks --path candidates and rejects publishes that include well-known credential files (.env / .env.* / .npmrc / .netrc / .git-credentials / .aws/credentials / .gcloud/credentials* / .docker/config.json / .kube/config). Living in Validate (not DryRun) means dry-run returns non-zero on hit too, so the dry-run preview matches Execute. - Narrow the credential pattern set. .git/, SSH private keys, *.pem and *.key are out of scope -- they're not env-token files and the false-positive rate (public certs, docs about key formats) is high. - Add --allow-sensitive as the escape hatch for legitimate cases (e.g. a docs site shipping .env.example on purpose). DryRun surfaces the waived list in sensitive_waived so the caller can relay it. - Drop the cwd defense-in-depth in runHTMLPublish. A clean cwd is now a valid publish target. The lark-apps skill and the html-publish reference are updated to describe the new gate, the override flag, and the patterns now explicitly out of scope. * feat(apps): drop .gcloud/* from credential-file scan The .gcloud/credentials pattern matched a non-existent path: gcloud's actual config dir is ~/.config/gcloud/ (XDG-based), and the real credential files there are credentials.db / access_tokens.db / application_default_credentials.json -- none of which would land under a .gcloud/ segment in a publish payload. Drop the rule rather than fix it: the realistic gcloud foot-gun would require recognizing the .config/gcloud/* tree by file basename, which is a broader change than the targeted env/cred scan in this PR. The remaining 7 patterns (.env / .env.* / .npmrc / .netrc / .git-credentials / .aws/credentials / .docker/config.json / .kube/config) cover the common Node/Python/CLI-tooling foot-guns. * fix(apps): close credential-scan bypass when --path is the parent dir itself isSensitiveRelPath anchors cloud-SDK matchers on adjacent parent/file segments (.aws/credentials, .docker/config.json, .kube/config), but walker strips that parent via filepath.Rel when --path is the conventional parent dir (e.g. ./.aws), yielding a bare RelPath="credentials" that slipped through silently. Same bypass for the single-file form --path ./.aws/credentials (walker sets RelPath = Base(rootPath)). Wrap the scan in isSensitiveCandidate: keep the fast RelPath scan, and on miss fall back to filepath.Abs(AbsPath) so the parent segment is visible again. isSensitiveRelPath itself is unchanged; existing tests still pin its pure-function contract. * fix(apps): drop filepath.Abs from sensitive scan to satisfy forbidigo lint The previous fix called filepath.Abs(c.AbsPath) — banned by the repo's forbidigo rule because shortcuts must not reach into the filesystem for path resolution. Reframe the same fix without fs access: re-prepend the root's basename (or, for the single-file form, the parent dir's basename of rootPath) to RelPath and re-scan only the parent-anchored credential pairs (.aws/credentials, .docker/config.json, .kube/config). Leaf matchers (.env / .npmrc / ...) stay scoped to RelPath — incidentally closing a latent false-positive where --path /home/alice/.env/dist would have flagged every file under it just because .env appeared in the absolute path. --- shortcuts/apps/apps_html_publish.go | 84 ++++-- shortcuts/apps/apps_html_publish_test.go | 262 ++++++++++++++++-- shortcuts/apps/sensitive_paths.go | 124 +++++++-- shortcuts/apps/sensitive_paths_test.go | 52 ++-- skills/lark-apps/SKILL.md | 4 +- .../references/lark-apps-html-publish.md | 38 ++- .../apps/apps_html_publish_dryrun_test.go | 61 ++-- 7 files changed, 503 insertions(+), 122 deletions(-) diff --git a/shortcuts/apps/apps_html_publish.go b/shortcuts/apps/apps_html_publish.go index 823ab527..18ac8752 100644 --- a/shortcuts/apps/apps_html_publish.go +++ b/shortcuts/apps/apps_html_publish.go @@ -7,7 +7,6 @@ import ( "context" "fmt" "io" - "path/filepath" "strings" "github.com/larksuite/cli/extension/fileio" @@ -28,6 +27,7 @@ var AppsHTMLPublish = common.Shortcut{ Flags: []common.Flag{ {Name: "app-id", Desc: "Miaoda app ID", Required: true}, {Name: "path", Desc: "path to HTML file or directory", Required: true}, + {Name: "allow-sensitive", Type: "bool", Desc: "skip the credential-file scan (allow .env / .npmrc / .aws/credentials / etc. in the publish payload)"}, }, Validate: func(ctx context.Context, rctx *common.RuntimeContext) error { if strings.TrimSpace(rctx.Str("app-id")) == "" { @@ -37,14 +37,27 @@ var AppsHTMLPublish = common.Shortcut{ if path == "" { return output.ErrValidation("--path is required") } - // Reject --path equal to the current working directory. Publishing - // cwd recursively packs .git/ / .env / node_modules / .aws/credentials - // alongside the intended HTML, and combined with --scope public puts - // those on an internet-reachable URL. - if filepath.Clean(path) == "." { - return output.ErrWithHint(output.ExitValidation, "validation", - "--path 不能指向当前工作目录(避免误把整个工程一并发布出去)", - "改成具体的子目录或文件,如 './dist' / './public' / './index.html'") + // Block well-known credential files in the publish payload unless the + // caller explicitly opts in. Lives in Validate (not DryRun) so that + // `--dry-run` returns non-zero on hit — the framework runs Validate + // before branching to DryRun/Execute, so both paths share this gate. + if rctx.Bool("allow-sensitive") { + return nil + } + candidates, err := walkHTMLPublishCandidates(rctx.FileIO(), path) + if err != nil { + // Don't fail Validate on walk errors (bad --path, etc.) — let + // DryRun/Execute surface them in their own (richer) envelopes. + return nil + } + var hits []string + for _, c := range candidates { + if isSensitiveCandidate(path, c) { + hits = append(hits, c.RelPath) + } + } + if len(hits) > 0 { + return sensitiveCandidatesError(hits) } return nil }, @@ -76,19 +89,20 @@ var AppsHTMLPublish = common.Shortcut{ } dry.Set("total_size_bytes", totalSize) dry.Set("files", names) - // Advisory scan: surface paths matching well-known secret / credential - // patterns so the caller can review before going public. Dry-run still - // exits 0; this is non-blocking by design (legit doc sites may ship - // example .env files). - var warnings []string - for _, c := range candidates { - if isSensitiveRelPath(c.RelPath) { - warnings = append(warnings, c.RelPath) + // Sensitive-file rejection lives in Validate (so dry-run exits non-zero + // on hit). When --allow-sensitive is set, still surface the list here + // as an info field so the caller sees what was waived. + if rctx.Bool("allow-sensitive") { + var waived []string + for _, c := range candidates { + if isSensitiveCandidate(path, c) { + waived = append(waived, c.RelPath) + } + } + if len(waived) > 0 { + dry.Set("sensitive_waived", waived) + dry.Set("sensitive_waived_summary", fmt.Sprintf("%d credential file(s) included because --allow-sensitive is set", len(waived))) } - } - if len(warnings) > 0 { - dry.Set("warnings", warnings) - dry.Set("warning_summary", fmt.Sprintf("manifest contains %d sensitive path(s); review before publishing", len(warnings))) } return dry }, @@ -116,6 +130,27 @@ type appsHTMLPublishSpec struct { Path string } +// maxSensitiveListInError caps how many credential-file matches we list inline +// in the validation error, so the message stays readable when a misconfigured +// payload has many hits (e.g. a directory tree accidentally containing +// per-environment .env.* files for every stage). +const maxSensitiveListInError = 5 + +// sensitiveCandidatesError builds the Validate-time rejection when --path +// contains credential files and --allow-sensitive was not set. +func sensitiveCandidatesError(hits []string) error { + var sample string + if len(hits) <= maxSensitiveListInError { + sample = strings.Join(hits, ", ") + } else { + sample = strings.Join(hits[:maxSensitiveListInError], ", ") + + fmt.Sprintf(" (and %d more)", len(hits)-maxSensitiveListInError) + } + return output.ErrWithHint(output.ExitValidation, "validation", + fmt.Sprintf("--path contains %d credential file(s) that should not be published: %s", len(hits), sample), + "remove these files from the publish payload, OR pass --allow-sensitive if shipping them is intentional (e.g. a docs site demoing credential-file formats)") +} + // maxHTMLPublishTarballBytes 是 client 端 tar.gz 包体上限,对齐 OAPI 设计 20MB 约束。 // 用 var 而非 const,便于单测调小覆盖拦截路径。 var maxHTMLPublishTarballBytes int64 = 20 * 1024 * 1024 @@ -145,13 +180,6 @@ func ensureIndexHTML(candidates []htmlPublishCandidate) error { } func runHTMLPublish(ctx context.Context, fio fileio.FileIO, client appsHTMLPublishClient, spec appsHTMLPublishSpec) (map[string]interface{}, error) { - // Defense in depth: callers reaching runHTMLPublish bypass the shortcut's - // Validate closure. Re-check that --path is not cwd before walking. - if filepath.Clean(spec.Path) == "." { - return nil, output.ErrWithHint(output.ExitValidation, "validation", - "--path 不能指向当前工作目录(避免误把整个工程一并发布出去)", - "改成具体的子目录或文件,如 './dist' / './public' / './index.html'") - } candidates, err := walkHTMLPublishCandidates(fio, spec.Path) if err != nil { return nil, output.Errorf(output.ExitAPI, "io", "scan --path %s: %v", spec.Path, err) diff --git a/shortcuts/apps/apps_html_publish_test.go b/shortcuts/apps/apps_html_publish_test.go index 59240284..1d8e942f 100644 --- a/shortcuts/apps/apps_html_publish_test.go +++ b/shortcuts/apps/apps_html_publish_test.go @@ -247,7 +247,6 @@ func TestAppsHTMLPublish_RequiresPath(t *testing.T) { func TestAppsHTMLPublish_DryRunPrintsManifest(t *testing.T) { // 这个用例走真实 shortcut → 真实 LocalFileIO(cwd-bounded)。 // 必须 chdir 进 tmp 用相对路径,否则 SafeInputPath 会拒绝绝对 --path。 - // --path "." 被 Validate 拒绝,因此改为在 tmp 下建 dist 子目录并传 ./dist。 dir := t.TempDir() cwd, err := os.Getwd() if err != nil { @@ -279,6 +278,243 @@ func TestAppsHTMLPublish_DryRunPrintsManifest(t *testing.T) { } } +// TestAppsHTMLPublish_CleanCwdIsAllowed pins the post-PR behavior change: +// --path "." is no longer hard-rejected by Validate. A clean cwd (no +// credential files) is a valid publish target. +func TestAppsHTMLPublish_CleanCwdIsAllowed(t *testing.T) { + dir := t.TempDir() + cwd, err := os.Getwd() + if err != nil { + t.Fatalf("getwd: %v", err) + } + if err := os.Chdir(dir); err != nil { + t.Fatalf("chdir: %v", err) + } + t.Cleanup(func() { _ = os.Chdir(cwd) }) + if err := os.WriteFile(filepath.Join(dir, "index.html"), []byte(""), 0o644); err != nil { + t.Fatalf("write: %v", err) + } + + factory, stdout, _ := newAppsExecuteFactory(t) + if err := runAppsShortcut(t, AppsHTMLPublish, + []string{"+html-publish", "--app-id", "app_x", "--path", ".", "--dry-run", "--as", "user"}, + factory, stdout); err != nil { + t.Fatalf("dry-run with --path . should pass when cwd is clean, got err=%v", err) + } +} + +// TestAppsHTMLPublish_SensitiveBlocksValidate pins the new behavior: a credential +// file under --path causes Validate to reject before either DryRun or Execute +// runs, so dry-run also returns non-zero (unlike the previous advisory-warning +// model). +func TestAppsHTMLPublish_SensitiveBlocksValidate(t *testing.T) { + dir := t.TempDir() + cwd, err := os.Getwd() + if err != nil { + t.Fatalf("getwd: %v", err) + } + if err := os.Chdir(dir); err != nil { + t.Fatalf("chdir: %v", err) + } + t.Cleanup(func() { _ = os.Chdir(cwd) }) + if err := os.MkdirAll(filepath.Join(dir, "dist"), 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + if err := os.WriteFile(filepath.Join(dir, "dist", "index.html"), []byte(""), 0o644); err != nil { + t.Fatalf("write: %v", err) + } + if err := os.WriteFile(filepath.Join(dir, "dist", ".env"), []byte("API_KEY=secret"), 0o644); err != nil { + t.Fatalf("write .env: %v", err) + } + + // Dry-run path: must also fail (this is the whole point of moving the + // check into Validate — dry-run can no longer say "OK" when Execute would + // reject). + factory, stdout, _ := newAppsExecuteFactory(t) + err = runAppsShortcut(t, AppsHTMLPublish, + []string{"+html-publish", "--app-id", "app_x", "--path", "./dist", "--dry-run", "--as", "user"}, + factory, stdout) + if err == nil { + t.Fatalf("dry-run with sensitive file should fail") + } + var exitErr *output.ExitError + if !errors.As(err, &exitErr) || exitErr.Detail == nil { + t.Fatalf("expected ExitError with detail, got %v", err) + } + if exitErr.Detail.Type != "validation" { + t.Fatalf("error.type = %q, want validation", exitErr.Detail.Type) + } + if !strings.Contains(exitErr.Detail.Message, ".env") { + t.Fatalf("error message should list the offending file, got %q", exitErr.Detail.Message) + } + if !strings.Contains(exitErr.Detail.Hint, "--allow-sensitive") { + t.Fatalf("error hint should mention --allow-sensitive escape hatch, got %q", exitErr.Detail.Hint) + } +} + +// TestAppsHTMLPublish_AllowSensitiveOverride pins that --allow-sensitive +// bypasses the credential-file check (legitimate cases like a docs site +// shipping an example .env on purpose). +func TestAppsHTMLPublish_AllowSensitiveOverride(t *testing.T) { + dir := t.TempDir() + cwd, err := os.Getwd() + if err != nil { + t.Fatalf("getwd: %v", err) + } + if err := os.Chdir(dir); err != nil { + t.Fatalf("chdir: %v", err) + } + t.Cleanup(func() { _ = os.Chdir(cwd) }) + if err := os.MkdirAll(filepath.Join(dir, "dist"), 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + if err := os.WriteFile(filepath.Join(dir, "dist", "index.html"), []byte(""), 0o644); err != nil { + t.Fatalf("write: %v", err) + } + if err := os.WriteFile(filepath.Join(dir, "dist", ".env.example"), []byte("API_KEY=replace-me"), 0o644); err != nil { + t.Fatalf("write .env.example: %v", err) + } + + factory, stdout, _ := newAppsExecuteFactory(t) + if err := runAppsShortcut(t, AppsHTMLPublish, + []string{"+html-publish", "--app-id", "app_x", "--path", "./dist", "--dry-run", "--allow-sensitive", "--as", "user"}, + factory, stdout); err != nil { + t.Fatalf("--allow-sensitive should bypass the credential scan, got err=%v", err) + } + got := stdout.String() + // Dry-run output surfaces the waived list so the caller still sees what + // was let through. + if !strings.Contains(got, "sensitive_waived") { + t.Fatalf("dry-run output should record the waived credential file under --allow-sensitive, got: %s", got) + } + if !strings.Contains(got, ".env.example") { + t.Fatalf("waived list should name the file, got: %s", got) + } +} + +// TestAppsHTMLPublish_SensitiveBlocksWhenPathIsCredentialParentDir pins that +// the credential-file scan still rejects when --path itself is the +// conventional parent dir (e.g. ./.aws, ./.docker, ./.kube). Without joining +// the candidate back to its absolute path, walker would strip the parent +// segment via filepath.Rel and the cloud-SDK matchers — which anchor on +// parent/file pairs — would silently pass. +func TestAppsHTMLPublish_SensitiveBlocksWhenPathIsCredentialParentDir(t *testing.T) { + cases := []struct { + name string + parent string + fileName string + wantSubstr string + }{ + {"aws_credentials", ".aws", "credentials", "credentials"}, + {"docker_config_json", ".docker", "config.json", "config.json"}, + {"kube_config", ".kube", "config", "config"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + dir := t.TempDir() + cwd, err := os.Getwd() + if err != nil { + t.Fatalf("getwd: %v", err) + } + if err := os.Chdir(dir); err != nil { + t.Fatalf("chdir: %v", err) + } + t.Cleanup(func() { _ = os.Chdir(cwd) }) + root := filepath.Join(dir, tc.parent) + if err := os.MkdirAll(root, 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + if err := os.WriteFile(filepath.Join(root, tc.fileName), []byte("fake credential"), 0o600); err != nil { + t.Fatalf("write: %v", err) + } + if err := os.WriteFile(filepath.Join(root, "index.html"), []byte(""), 0o644); err != nil { + t.Fatalf("write index: %v", err) + } + + factory, stdout, _ := newAppsExecuteFactory(t) + err = runAppsShortcut(t, AppsHTMLPublish, + []string{"+html-publish", "--app-id", "app_x", "--path", "./" + tc.parent, "--dry-run", "--as", "user"}, + factory, stdout) + if err == nil { + t.Fatalf("expected rejection when --path is %s/ (would leak %s), got success", tc.parent, tc.fileName) + } + var exitErr *output.ExitError + if !errors.As(err, &exitErr) || exitErr.Detail == nil { + t.Fatalf("expected ExitError with detail, got %v", err) + } + if exitErr.Detail.Type != "validation" { + t.Fatalf("error.type = %q, want validation", exitErr.Detail.Type) + } + if !strings.Contains(exitErr.Detail.Message, tc.wantSubstr) { + t.Fatalf("error message should name the leaked file, got %q", exitErr.Detail.Message) + } + }) + } +} + +// TestAppsHTMLPublish_SensitiveBlocksWhenPathIsCredentialFileItself pins the +// single-file form: --path pointing directly at a credential file (e.g. +// ./.aws/credentials) must also reject. Walker's single-file branch sets +// RelPath = filepath.Base(rootPath), so the .aws segment is lost the same way. +func TestAppsHTMLPublish_SensitiveBlocksWhenPathIsCredentialFileItself(t *testing.T) { + dir := t.TempDir() + cwd, err := os.Getwd() + if err != nil { + t.Fatalf("getwd: %v", err) + } + if err := os.Chdir(dir); err != nil { + t.Fatalf("chdir: %v", err) + } + t.Cleanup(func() { _ = os.Chdir(cwd) }) + if err := os.MkdirAll(filepath.Join(dir, ".aws"), 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + if err := os.WriteFile(filepath.Join(dir, ".aws", "credentials"), []byte("fake credential"), 0o600); err != nil { + t.Fatalf("write: %v", err) + } + + factory, stdout, _ := newAppsExecuteFactory(t) + err = runAppsShortcut(t, AppsHTMLPublish, + []string{"+html-publish", "--app-id", "app_x", "--path", "./.aws/credentials", "--dry-run", "--as", "user"}, + factory, stdout) + if err == nil { + t.Fatalf("expected rejection when --path points directly at .aws/credentials, got success") + } + var exitErr *output.ExitError + if !errors.As(err, &exitErr) || exitErr.Detail == nil { + t.Fatalf("expected ExitError with detail, got %v", err) + } + if exitErr.Detail.Type != "validation" { + t.Fatalf("error.type = %q, want validation", exitErr.Detail.Type) + } + if !strings.Contains(exitErr.Detail.Message, "credentials") { + t.Fatalf("error message should name the leaked file, got %q", exitErr.Detail.Message) + } +} + +// TestSensitiveCandidatesError_Truncation pins the inline-list truncation so a +// payload with many credential files (e.g. an accidentally-copied tree of +// per-stage .env.* files) produces a readable, length-bounded error. +func TestSensitiveCandidatesError_Truncation(t *testing.T) { + hits := []string{"a.env", "b.env", "c.env", "d.env", "e.env", "f.env", "g.env"} + err := sensitiveCandidatesError(hits) + var exitErr *output.ExitError + if !errors.As(err, &exitErr) || exitErr.Detail == nil { + t.Fatalf("expected ExitError with detail, got %v", err) + } + msg := exitErr.Detail.Message + if !strings.Contains(msg, "7 credential file(s)") { + t.Fatalf("message should report the full count, got %q", msg) + } + if !strings.Contains(msg, "and 2 more") { + t.Fatalf("message should truncate beyond %d entries, got %q", maxSensitiveListInError, msg) + } + // Pin: the truncated tail is NOT spelled out. + if strings.Contains(msg, "g.env") { + t.Fatalf("message should not list entries past the truncation, got %q", msg) + } +} + func TestRunHTMLPublish_RejectsOversizeRawCandidates(t *testing.T) { orig := maxHTMLPublishRawBytes maxHTMLPublishRawBytes = 100 @@ -312,27 +548,3 @@ func TestRunHTMLPublish_RejectsOversizeRawCandidates(t *testing.T) { t.Fatalf("client must not be called when raw cap hit") } } - -func TestRunHTMLPublish_RejectsCurrentDirectoryPath(t *testing.T) { - // Publishing the entire current working directory is the canonical - // secrets-exfiltration footgun (.git/.env/node_modules all end up in the - // tarball). Reject --path "." (and Clean equivalents) at runHTMLPublish - // entry so any direct caller cannot accidentally trigger it. (Validate - // also rejects at flag layer; this is defense in depth.) - fake := &fakeAppsHTMLPublishClient{} - _, err := runHTMLPublish(context.Background(), newTestFIO(), fake, - appsHTMLPublishSpec{AppID: "app_x", Path: "."}) - if err == nil { - t.Fatalf("expected --path '.' to be rejected") - } - var exitErr *output.ExitError - if !errors.As(err, &exitErr) || exitErr.Detail == nil || exitErr.Detail.Type != "validation" { - t.Fatalf("expected ExitError type=validation, got %v", err) - } - if !strings.Contains(exitErr.Detail.Message, "当前工作目录") { - t.Fatalf("error message should explain cwd is forbidden, got %q", exitErr.Detail.Message) - } - if len(fake.calls) != 0 { - t.Fatalf("client must not be called when --path is cwd") - } -} diff --git a/shortcuts/apps/sensitive_paths.go b/shortcuts/apps/sensitive_paths.go index f95469b9..aead3a2c 100644 --- a/shortcuts/apps/sensitive_paths.go +++ b/shortcuts/apps/sensitive_paths.go @@ -3,18 +3,25 @@ package apps -import "strings" +import ( + "path/filepath" + "strings" +) // isSensitiveRelPath reports whether a relative path inside the candidate -// manifest looks like something that should not ship to a public-internet -// share URL — secrets, credentials, SCM internals, SSH keys. The check is -// path-element-wise (each "/"-delimited segment is inspected) so secrets -// nested under arbitrary subdirectories are still caught. +// manifest is a well-known env / credential file that should not ship to a +// public-internet share URL. The check is path-element-wise (each +// "/"-delimited segment is inspected) so credential files nested under +// arbitrary subdirectories are still caught. // -// Used by +html-publish dry-run to populate a "warnings" field; the -// caller still proceeds (this is advisory, not a hard block) so legit -// edge cases (e.g. a documentation site that has a .env example file -// on purpose) are not gated, but the user/agent sees the list. +// Used by +html-publish: dry-run AND Execute both block by default when any +// candidate matches. Pass --allow-sensitive to override (legitimate cases: +// a documentation site shipping example credential files on purpose). +// +// Scope is intentionally narrow — only files that conventionally hold API +// tokens or service credentials, not the broader "anything cryptographic" +// surface. SSH private keys, generic *.pem / *.key, and SCM internals are +// out of scope here; if they leak it's a separate problem to address. func isSensitiveRelPath(rel string) bool { if rel == "" { return false @@ -22,24 +29,95 @@ func isSensitiveRelPath(rel string) bool { parts := strings.Split(rel, "/") for i, p := range parts { switch { - case p == ".git": - return true case p == ".env" || strings.HasPrefix(p, ".env."): return true - case p == ".npmrc" || p == ".netrc": + case p == ".npmrc": return true - case p == "credentials" || p == "config": - if i > 0 { - parent := parts[i-1] - if parent == ".aws" || parent == ".docker" || parent == ".gcloud" || parent == ".kube" { - return true - } + case p == ".netrc": + return true + case p == ".git-credentials": + return true + } + if i == 0 { + continue + } + parent := parts[i-1] + switch parent { + case ".aws": + if p == "credentials" { + return true } - case strings.HasPrefix(p, "id_rsa") || strings.HasPrefix(p, "id_ed25519") || strings.HasPrefix(p, "id_ecdsa") || strings.HasPrefix(p, "id_dsa"): - return true - case strings.HasSuffix(p, ".pem") || strings.HasSuffix(p, ".key"): - return true - case strings.HasSuffix(p, ".json") && p == "config.json" && i > 0 && parts[i-1] == ".docker": + case ".docker": + if p == "config.json" { + return true + } + case ".kube": + if p == "config" { + return true + } + } + } + return false +} + +// hasParentAnchoredCredentialPair scans a "/"-delimited path for the +// cloud-SDK matchers that depend on a conventional parent dir: +// .aws/credentials, .docker/config.json, .kube/config. The leaf-name +// matchers (.env / .npmrc / ...) intentionally do NOT run here, so callers +// can probe a path that includes surrounding root context without risking +// a leaf-rule false-positive on the context segment itself (e.g. a literal +// ".env" directory somewhere in --path's ancestry). +func hasParentAnchoredCredentialPair(path string) bool { + parts := strings.Split(path, "/") + for i := 1; i < len(parts); i++ { + switch parts[i-1] { + case ".aws": + if parts[i] == "credentials" { + return true + } + case ".docker": + if parts[i] == "config.json" { + return true + } + case ".kube": + if parts[i] == "config" { + return true + } + } + } + return false +} + +// isSensitiveCandidate is the call-site wrapper used by +html-publish. +// +// Two passes: +// +// 1. Scan RelPath with the full matcher (isSensitiveRelPath). Handles the +// common in-tree case (e.g. ./site/.env, ./dist/.docker/config.json). +// 2. Re-probe at the boundary between rootPath and the candidate, using +// ONLY hasParentAnchoredCredentialPair. walker strips the root segment +// via filepath.Rel, so when --path is itself the conventional parent +// dir (e.g. ./.aws) RelPath comes back as a bare "credentials" and +// step 1 has no parent to anchor on. Re-prepending the root's basename +// — or, for the single-file form, the parent dir's basename of +// rootPath — exposes the missing segment. Leaf matchers are NOT re-run +// in this pass, so an ancestor like /home/alice/.env/dist can't +// false-positive every file beneath it just because ".env" appears in +// the root context. +// +// Pure string-level reasoning over rootPath — no filesystem access, no +// reliance on cwd — so it composes with the project's fileio sandbox and +// stays inside the shortcuts-layer constraint against direct fs lookups. +func isSensitiveCandidate(rootPath string, c htmlPublishCandidate) bool { + if isSensitiveRelPath(c.RelPath) { + return true + } + for _, ctx := range []string{filepath.Base(rootPath), filepath.Base(filepath.Dir(rootPath))} { + switch ctx { + case "", ".", "..", "/": + continue + } + if hasParentAnchoredCredentialPair(filepath.ToSlash(filepath.Join(ctx, c.RelPath))) { return true } } diff --git a/shortcuts/apps/sensitive_paths_test.go b/shortcuts/apps/sensitive_paths_test.go index 7afeb69a..3ba9ec7f 100644 --- a/shortcuts/apps/sensitive_paths_test.go +++ b/shortcuts/apps/sensitive_paths_test.go @@ -10,31 +10,51 @@ func TestIsSensitiveRelPath(t *testing.T) { rel string want bool }{ - // dotfiles and well-known secret stores + // .env family (token-bearing env files) {".env", true}, {".env.local", true}, {".env.production", true}, {"backend/.env", true}, + {"src/config/.env.staging", true}, + + // HTTP auth tokens {".npmrc", true}, {"sub/.npmrc", true}, {".netrc", true}, - // .git tree - {".git/config", true}, - {".git/HEAD", true}, - {"subdir/.git/config", true}, - {".gitignore", false}, // NOT sensitive (intended to be committed) - // SSH keys - {".ssh/id_rsa", true}, - {".ssh/id_ed25519", true}, - {"backup/id_rsa.pub", true}, // pub also flagged (often near private) - // Cloud creds + {"home/.netrc", true}, + + // Git HTTPS credentials store + {".git-credentials", true}, + {"backup/.git-credentials", true}, + + // Cloud SDK credentials (require conventional parent dir) {".aws/credentials", true}, - {".aws/config", true}, + {"home/.aws/credentials", true}, {".docker/config.json", true}, - // Generic crypto - {"server.pem", true}, - {"certs/private.key", true}, - {"path/to/whatever.pem", true}, + {"backup/.docker/config.json", true}, + {".kube/config", true}, + {"home/.kube/config", true}, + + // Out of scope (intentionally NOT blocked anymore) + {".gitignore", false}, // intentionally committed + {".git/config", false}, // SCM history, not tokens + {".git/HEAD", false}, // same + {".ssh/id_rsa", false}, // SSH key — different threat model + {".ssh/id_ed25519", false}, // same + {"backup/id_rsa.pub", false}, // same + {".aws/config", false}, // just region/profile, no token + {"server.pem", false}, // too broad — could be a public cert + {"certs/private.key", false}, // too broad — could be a sample + {"path/to/whatever.pem", false}, + + // Lookalikes that should NOT match + {".envrc", false}, // direnv config, no tokens + {"environment.yml", false}, // conda env, not .env + {"my.env.file.txt", false}, // segment doesn't start with .env + {".kube/configmap.yaml", false}, // segment is configmap.yaml not config + {".docker/config", false}, // .docker/config (not .json) doesn't carry token + {"aws/credentials", false}, // missing leading dot on aws + // Benign {"index.html", false}, {"dist/main.js", false}, diff --git a/skills/lark-apps/SKILL.md b/skills/lark-apps/SKILL.md index aa395308..4aaa3a3f 100644 --- a/skills/lark-apps/SKILL.md +++ b/skills/lark-apps/SKILL.md @@ -51,7 +51,7 @@ lark-cli auth login --domain apps ## 写 HTML 前的硬约束(避免 publish 阶段被拒) - **入口文件必须叫 `index.html`** — 妙搭以 `index.html` 作为应用入口;目录形态时根目录下要有 `index.html`,单文件形态时文件名就是 `index.html`。命名成 `app.html` / `demo.html` 等会被 `+html-publish` 直接拒绝 -- **`--path` 不能等于当前工作目录(`.` / cwd)** — 源码硬拒,避免误把 `.git` / `.env` / `node_modules` 一并打包并通过 share URL 公开。HTML 产物放进具体子目录(如 `./dist`、`./public`、`.//`)或单文件路径 +- **`--path` 内不能含已知凭据文件** — Validate 阶段会扫描 `.env` / `.env.*` / `.npmrc` / `.netrc` / `.git-credentials` / `.aws/credentials` / `.docker/config.json` / `.kube/config`,命中就 exit 非 0 拒绝(dry-run 也一样拦)。要么从产物目录里清掉这些文件,要么明确传 `--allow-sensitive` 跳过这道检查(例如教程站故意 shipping `.env.example` 作为示例素材)。`--path .` 本身不再硬拒,cwd 干净就能发 ## 端到端流程(HTML / PPT / 静态网站发布) @@ -67,7 +67,7 @@ lark-cli auth login --domain apps | 步骤 | 命令 | 说明 | |------|------|------| | 1. 新建应用 | `apps +create --name "<根据内容主题起的应用名>" --app-type HTML` → 从响应里拿 `app_id` | 默认都走新建(**不要尝试搜索 / 枚举已有应用**)。用户明确要复用现有应用时让他提供 **妙搭应用链接** 或 **app_id 字符串**(详见下方"快速决策");`--app-type` 必填,当前只支持 `HTML`(区分大小写),未来扩展 | -| 1.5 预检 | `apps +html-publish --app-id --path --dry-run` 看 `warnings` 字段 | 命中 `.git` / `.env*` / `*.pem` / `*.key` 等敏感文件时**停下来**,把 warnings 列给用户看,确认要继续才走 step 2;用户没确认前不要去掉 `--dry-run` 真发 | +| 1.5 预检(可选) | `apps +html-publish --app-id --path --dry-run` 看 manifest | 主要用来看 `files` / `total_size_bytes`。**凭据文件已经在 Validate 阶段直接 exit 非 0**(不再是 advisory warning),所以预检通过就说明走真发也通过;预检报 `.env` 等命中时,先清产物或加 `--allow-sensitive` 再 publish | | 2. 发布 HTML | `apps +html-publish --app-id --path <文件或目录>` | 必走 | | 3. 设置可用范围(可选) | `apps +access-scope-set --app-id --scope tenant\|public\|specific ...` | 用户说"公开 / 全员可见 / 让 Alice 看 / 互联网可分享"等 | diff --git a/skills/lark-apps/references/lark-apps-html-publish.md b/skills/lark-apps/references/lark-apps-html-publish.md index 04ebbd3a..d43d2886 100644 --- a/skills/lark-apps/references/lark-apps-html-publish.md +++ b/skills/lark-apps/references/lark-apps-html-publish.md @@ -23,6 +23,7 @@ lark-cli apps +html-publish --app-id app_xxx --path ./dist --dry-run |---|---|---| | `--app-id ` | ✅ | 应用 ID。从 `apps +create` 响应里拿;或者从用户给的妙搭应用链接 `https://miaoda.feishu.cn/app/app_xxx` 的 `/app/` 后面提取(详见 `../SKILL.md` "用户没给 app_id" 一节) | | `--path ` | ✅ | 本地文件或目录路径;目录会递归打包成 tar.gz。**必须含 `index.html`**:目录形态时根目录下,单文件形态时文件名必须就是 `index.html`(妙搭统一以 `index.html` 作为应用入口) | +| `--allow-sensitive` | ❌ | 跳过 Validate 的凭据文件扫描(详见下面"凭据文件拦截"一节)。默认不传;仅在用户明示要发布凭据示例文件(如教程站的 `.env.example`)时才加 | ## 返回值 @@ -118,24 +119,41 @@ lark-cli apps +html-publish --app-id "$APP" --path ./dist > 服务暂时不可用,建议稍后重试。 -## 敏感文件警告 +## 凭据文件拦截 -dry-run 输出会扫描 manifest 里的相对路径,命中以下任一模式时把它们列入 envelope 的 `warnings` 字段(advisory,不阻断 dry-run): +Validate 阶段会扫描 `--path` 下所有候选文件,命中以下任一模式 **直接 exit 非 0**(dry-run 和真发都拦,不再是 advisory warning): -- `.git/`(任意 SCM 内部文件) -- `.env` 或 `.env.*`(环境变量 / API key) +- `.env` / `.env.*`(环境变量 / API key) - `.npmrc` / `.netrc`(HTTP 凭据) -- `.ssh/id_rsa*` / `.ssh/id_ed25519*` / `.ssh/id_ecdsa*` / `.ssh/id_dsa*` -- `.aws/credentials` / `.aws/config` / `.docker/config.json` / `.gcloud/...` / `.kube/...` -- `*.pem` / `*.key`(私钥) +- `.git-credentials`(Git over HTTPS 凭据) +- `.aws/credentials`、`.docker/config.json`、`.kube/config`(云 SDK 凭据) -**Agent 行为契约**:dry-run 看到 `warnings` 非空,**必须停下来向用户报告并询问是否继续**;用户确认后才能调真实的 `apps +html-publish`(去掉 `--dry-run`)。 +报错形态: + +```json +{ + "ok": false, + "error": { + "type": "validation", + "message": "--path contains 1 credential file(s) that should not be published: dist/.env", + "hint": "remove these files from the publish payload, OR pass --allow-sensitive if shipping them is intentional (e.g. a docs site demoing credential-file formats)" + } +} +``` + +**Agent 行为契约**: + +- 默认必须从产物里清掉命中的文件后再 publish +- 只有当用户**明确**意图是 shipping 凭据示例(文档 / 教程站等)时,才追加 `--allow-sensitive` 旁路;旁路时 dry-run 会在 `sensitive_waived` 字段列出被放行的文件名,转述给用户确认 + +不在拦截范围内(旧版扫过、新版**不再**扫):`.git/` SCM 历史、SSH 私钥 `id_rsa*` / `id_ed25519*` 等、`*.pem` / `*.key`、`.aws/config`。如果产物里有这些文件且确实敏感,要靠用户自己保持产物目录干净。 ## 提示 -- `--path` **不能等于 cwd**(`.` 或 cwd 等价写法均拒)。原因:递归打包 + 互联网公开的组合下,cwd 根的项目级文件(`.git/` / `.env` / `node_modules` / `.aws/credentials`)会被一并打包并通过 share URL 公开访问。强制指定具体子目录或文件,如 `./dist` / `./public/` / `./index.html` +- `--path` 既可以是 cwd(`.`)也可以是子目录或单文件;**不再硬拒 cwd**,cwd 干净(没有命中上面凭据列表)就能发。仍然建议传具体子目录(`./dist`、`./public/` 等)以减少误打包风险 - `--path` **必须**是 cwd 内的相对路径(如 `./dist`、`./index.html`);绝对路径或越界路径(`../`、`/Users/...`)CLI 会直接拒绝。需要发布 cwd 外的目录时,先切到 agent 工作目录再调,**不要**私自 `cd` 绕过 -- 目录打包成 tar.gz 时**不做过滤**(`.git` / `node_modules` 等会一并打包),让用户传干净的产物目录(如 `./dist`) +- 目录打包成 tar.gz 时**不做过滤**(`.git` / `node_modules` 等会一并打包,只有上面那张凭据 list 才会被 Validate 拦),让用户传干净的产物目录(如 `./dist`) +- 旁路写法:`apps +html-publish --app-id --path --allow-sensitive` - **不要**原样把 envelope JSON 转述给用户 ## 协同命令 diff --git a/tests/cli_e2e/apps/apps_html_publish_dryrun_test.go b/tests/cli_e2e/apps/apps_html_publish_dryrun_test.go index 265400e8..a1c33e5e 100644 --- a/tests/cli_e2e/apps/apps_html_publish_dryrun_test.go +++ b/tests/cli_e2e/apps/apps_html_publish_dryrun_test.go @@ -204,7 +204,10 @@ func TestAppsHTMLPublishDryRun(t *testing.T) { assert.Contains(t, result.Stdout+result.Stderr, `required flag(s) "path" not set`) }) - t.Run("WarningsForSensitivePaths", func(t *testing.T) { + t.Run("RejectsSensitivePathsByDefault", func(t *testing.T) { + // Validate scans candidates for well-known credential files and rejects + // when any are found. Dry-run also fails (Validate runs before the + // dry-run branch) — that's the point: dry-run preview matches Execute. dir := t.TempDir() require.NoError(t, os.MkdirAll(filepath.Join(dir, "dist"), 0o755)) require.NoError(t, os.WriteFile(filepath.Join(dir, "dist", "index.html"), []byte(""), 0o644)) @@ -224,23 +227,45 @@ func TestAppsHTMLPublishDryRun(t *testing.T) { WorkDir: dir, }) require.NoError(t, err) - result.AssertExitCode(t, 0) - warnings := gjson.Get(result.Stdout, "warnings").Array() - require.NotEmpty(t, warnings, "expected non-empty warnings for .env: %s", result.Stdout) - var found bool - for _, w := range warnings { - if w.String() == ".env" { - found = true - break - } - } - assert.True(t, found, "warnings should list .env, got %v", warnings) + result.AssertExitCode(t, 2) + assert.Contains(t, validateErrorMessage(result), ".env", + "validation error should name the offending file: %s", result.Stderr) }) - t.Run("RejectsPathEqualsCWD", func(t *testing.T) { - // Even with valid index.html in cwd, --path "." must be rejected at - // Validate (so dry-run also rejects) to prevent accidental - // whole-project secrets exfiltration. + t.Run("AllowSensitiveOverride", func(t *testing.T) { + // --allow-sensitive bypasses the credential-file gate (legitimate + // cases: docs site shipping example .env files). Dry-run output + // surfaces a sensitive_waived field so the caller still sees what + // was let through. + dir := t.TempDir() + require.NoError(t, os.MkdirAll(filepath.Join(dir, "dist"), 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "dist", "index.html"), []byte(""), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "dist", ".env.example"), []byte("API_KEY=replace-me\n"), 0o644)) + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + t.Cleanup(cancel) + + result, err := clie2e.RunCmd(ctx, clie2e.Request{ + Args: []string{ + "apps", "+html-publish", + "--app-id", "app_x", + "--path", "./dist", + "--allow-sensitive", + "--dry-run", + }, + DefaultAs: "user", + WorkDir: dir, + }) + require.NoError(t, err) + result.AssertExitCode(t, 0) + waived := gjson.Get(result.Stdout, "sensitive_waived").Array() + require.Len(t, waived, 1, "expected sensitive_waived to list the file, got: %s", result.Stdout) + assert.Equal(t, ".env.example", waived[0].String()) + }) + + t.Run("CleanCwdAllowed", func(t *testing.T) { + // --path "." is no longer hard-rejected. A cwd that doesn't contain + // well-known credential files is a valid publish target. dir := t.TempDir() require.NoError(t, os.WriteFile(filepath.Join(dir, "index.html"), []byte(""), 0o644)) @@ -258,8 +283,8 @@ func TestAppsHTMLPublishDryRun(t *testing.T) { WorkDir: dir, }) require.NoError(t, err) - result.AssertExitCode(t, 2) - assert.Contains(t, validateErrorMessage(result), "当前工作目录") + result.AssertExitCode(t, 0) + assert.Equal(t, int64(1), gjson.Get(result.Stdout, "file_count").Int()) }) t.Run("TrimsAppIDAndPath", func(t *testing.T) {