feat(ci): add public content safeguards

This commit is contained in:
hanshaoshuai
2026-06-24 15:21:37 +08:00
committed by HanShaoshuai-k
parent fe32a6e0a9
commit cf93ee051c
47 changed files with 5064 additions and 125 deletions

View File

@@ -5,6 +5,7 @@ on:
branches: [main]
pull_request:
branches: [main]
types: [opened, synchronize, reopened, edited]
workflow_dispatch:
permissions:
@@ -70,6 +71,7 @@ jobs:
- uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
with:
fetch-depth: 0
persist-credentials: false
- uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6
with:
go-version-file: go.mod
@@ -87,6 +89,23 @@ jobs:
- name: Run errs/ lint guards (lintcheck)
run: go run -C lint . --changed-from "$QUALITY_GATE_CHANGED_FROM" ..
script-test:
needs: fast-gate
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
with:
fetch-depth: 0
persist-credentials: false
- uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6
with:
go-version-file: go.mod
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
with:
node-version: '22'
- name: Run script tests
run: make script-test
deterministic-gate:
needs: fast-gate
runs-on: ubuntu-latest
@@ -109,8 +128,28 @@ jobs:
env:
QUALITY_GATE_CHANGED_FROM: ${{ github.event.pull_request.base.sha || github.event.before || 'origin/main' }}
run: echo "QUALITY_GATE_CHANGED_FROM=$(bash scripts/resolve-changed-from.sh)" >> "$GITHUB_ENV"
- name: Write public content metadata
if: ${{ github.event_name == 'pull_request' }}
env:
PR_TITLE: ${{ github.event.pull_request.title }}
PR_BODY: ${{ github.event.pull_request.body }}
PR_BRANCH: ${{ github.head_ref }}
run: |
mkdir -p .tmp/quality-gate
python3 - <<'PY'
import json
import os
with open(".tmp/quality-gate/public-content-metadata.json", "w", encoding="utf-8") as f:
json.dump({
"title": os.environ.get("PR_TITLE", ""),
"body": os.environ.get("PR_BODY", ""),
"branch": os.environ.get("PR_BRANCH", ""),
}, f)
f.write("\n")
PY
- name: Run CLI deterministic gate
run: make quality-gate
run: PUBLIC_CONTENT_METADATA=.tmp/quality-gate/public-content-metadata.json make quality-gate
- name: Upload quality gate facts
if: ${{ always() && github.event_name == 'pull_request' }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
@@ -220,7 +259,7 @@ jobs:
# ── Layer 3: E2E Gate ──────────────────────────────────────────────
e2e-dry-run:
needs: [unit-test, lint, deterministic-gate]
needs: [unit-test, lint, script-test, deterministic-gate]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
@@ -241,7 +280,7 @@ jobs:
run: go test -v -count=1 -timeout=5m ./tests/cli_e2e/... -run 'DryRun|Regression'
e2e-live:
needs: [unit-test, lint, deterministic-gate]
needs: [unit-test, lint, script-test, deterministic-gate]
if: ${{ github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork }}
runs-on: ubuntu-latest
permissions:
@@ -333,7 +372,7 @@ jobs:
# ── Results Gate (single required check for branch protection) ─────
results:
if: ${{ always() }}
needs: [fast-gate, unit-test, lint, deterministic-gate, coverage, deadcode, e2e-dry-run, e2e-live, security, license-header]
needs: [fast-gate, unit-test, lint, script-test, deterministic-gate, coverage, deadcode, e2e-dry-run, e2e-live, security, license-header]
runs-on: ubuntu-latest
steps:
- name: Evaluate results
@@ -345,6 +384,7 @@ jobs:
echo "| L1 | fast-gate | ${{ needs.fast-gate.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| L2 | unit-test | ${{ needs.unit-test.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| L2 | lint | ${{ needs.lint.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| L2 | script-test | ${{ needs.script-test.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| L2 | deterministic-gate | ${{ needs.deterministic-gate.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| L2 | coverage | ${{ needs.coverage.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| L2 | deadcode | ${{ needs.deadcode.result }} |" >> $GITHUB_STEP_SUMMARY
@@ -361,6 +401,7 @@ jobs:
"${{ needs.fast-gate.result }}" \
"${{ needs.unit-test.result }}" \
"${{ needs.lint.result }}" \
"${{ needs.script-test.result }}" \
"${{ needs.deterministic-gate.result }}" \
"${{ needs.coverage.result }}" \
"${{ needs.deadcode.result }}" \

28
.github/workflows/comment-audit.yml vendored Normal file
View File

@@ -0,0 +1,28 @@
name: Comment Audit
on:
issue_comment:
types: [created, edited]
pull_request_review:
types: [submitted, edited]
pull_request_review_comment:
types: [created, edited]
permissions:
contents: read
jobs:
public-content-comment-audit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
with:
persist-credentials: false
- uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6
with:
go-version-file: go.mod
- name: Post-publication comment audit
run: |
mkdir -p .tmp/comment-audit
cp "$GITHUB_EVENT_PATH" .tmp/comment-audit/event.json
go run ./internal/qualitygate/cmd/comment-audit --event .tmp/comment-audit/event.json --kind "$GITHUB_EVENT_NAME"

View File

@@ -88,31 +88,44 @@ jobs:
commit_sha: targetHeadSha,
});
const candidatePRs = associatedPRs.filter((candidate) =>
candidate.state === "open" &&
candidate.base?.repo?.id === context.payload.repository.id &&
candidate.head?.sha === targetHeadSha
);
if (candidatePRs.length > 1) {
throw new Error(`ambiguous open PRs for workflow_run head ${targetHeadSha}: ${candidatePRs.length}`);
const openCandidatePRs = candidatePRs.filter((candidate) => candidate.state === "open");
if (openCandidatePRs.length > 1) {
throw new Error(`ambiguous open PRs for workflow_run head ${targetHeadSha}: ${openCandidatePRs.length}`);
}
if (candidatePRs.length === 1) {
prNumber = candidatePRs[0].number;
if (openCandidatePRs.length === 1) {
prNumber = openCandidatePRs[0].number;
} else if (candidatePRs.length > 0) {
core.notice("PR quality summary skipped: workflow_run target PR is no longer open");
core.setOutput("stale", "true");
return;
}
}
if (!prNumber) {
const candidatePRs = await github.paginate(github.rest.pulls.list, {
owner: context.repo.owner,
repo: context.repo.repo,
state: "open",
state: "all",
per_page: 100,
}).then((prs) => prs.filter((candidate) =>
candidate.base?.repo?.id === context.payload.repository.id &&
candidate.head?.sha === targetHeadSha
));
if (candidatePRs.length !== 1) {
const openCandidatePRs = candidatePRs.filter((candidate) => candidate.state === "open");
if (openCandidatePRs.length > 1) {
throw new Error(`ambiguous open PRs from pull list fallback for workflow_run head ${targetHeadSha}: ${openCandidatePRs.length}`);
}
if (openCandidatePRs.length === 1) {
prNumber = openCandidatePRs[0].number;
} else if (candidatePRs.length > 0) {
core.notice("PR quality summary skipped: workflow_run target PR is no longer open");
core.setOutput("stale", "true");
return;
} else {
throw new Error(`expected one open PR from pull list fallback for workflow_run head ${targetHeadSha}, got ${candidatePRs.length}`);
}
prNumber = candidatePRs[0].number;
}
if (!Number.isInteger(prNumber) || prNumber <= 0) throw new Error("missing pull request binding");
const { data: pr } = await github.rest.pulls.get({
@@ -121,6 +134,11 @@ jobs:
pull_number: prNumber,
});
if (pr.base.repo.id !== context.payload.repository.id) throw new Error("PR base repo mismatch");
if (pr.state !== "open") {
core.notice("PR quality summary skipped: workflow_run target PR is no longer open");
core.setOutput("stale", "true");
return;
}
if (pr.head.sha !== targetHeadSha) {
core.notice("PR quality summary skipped: workflow_run is stale for this PR head");
core.setOutput("stale", "true");
@@ -299,31 +317,44 @@ jobs:
commit_sha: targetHeadSha,
});
const candidatePRs = associatedPRs.filter((candidate) =>
candidate.state === "open" &&
candidate.base?.repo?.id === context.payload.repository.id &&
candidate.head?.sha === targetHeadSha
);
if (candidatePRs.length > 1) {
throw new Error(`ambiguous open PRs for workflow_run head ${targetHeadSha}: ${candidatePRs.length}`);
const openCandidatePRs = candidatePRs.filter((candidate) => candidate.state === "open");
if (openCandidatePRs.length > 1) {
throw new Error(`ambiguous open PRs for workflow_run head ${targetHeadSha}: ${openCandidatePRs.length}`);
}
if (candidatePRs.length === 1) {
prNumber = candidatePRs[0].number;
if (openCandidatePRs.length === 1) {
prNumber = openCandidatePRs[0].number;
} else if (candidatePRs.length > 0) {
core.notice("semantic review skipped: workflow_run target PR is no longer open");
core.setOutput("stale", "true");
return;
}
}
if (!prNumber) {
const candidatePRs = await github.paginate(github.rest.pulls.list, {
owner: context.repo.owner,
repo: context.repo.repo,
state: "open",
state: "all",
per_page: 100,
}).then((prs) => prs.filter((candidate) =>
candidate.base?.repo?.id === context.payload.repository.id &&
candidate.head?.sha === targetHeadSha
));
if (candidatePRs.length !== 1) {
const openCandidatePRs = candidatePRs.filter((candidate) => candidate.state === "open");
if (openCandidatePRs.length > 1) {
throw new Error(`ambiguous open PRs from pull list fallback for workflow_run head ${targetHeadSha}: ${openCandidatePRs.length}`);
}
if (openCandidatePRs.length === 1) {
prNumber = openCandidatePRs[0].number;
} else if (candidatePRs.length > 0) {
core.notice("semantic review skipped: workflow_run target PR is no longer open");
core.setOutput("stale", "true");
return;
} else {
throw new Error(`expected one open PR from pull list fallback for workflow_run head ${targetHeadSha}, got ${candidatePRs.length}`);
}
prNumber = candidatePRs[0].number;
}
if (!Number.isInteger(prNumber) || prNumber <= 0) throw new Error("missing pull request binding");
const { data: pr } = await github.rest.pulls.get({
@@ -332,6 +363,16 @@ jobs:
pull_number: prNumber,
});
if (pr.base.repo.id !== context.payload.repository.id) throw new Error("PR base repo mismatch");
if (pr.state !== "open") {
core.notice("semantic review skipped: workflow_run target PR is no longer open");
core.setOutput("stale", "true");
return;
}
if (!pr.head.repo) {
core.notice("semantic review skipped: workflow_run target PR head repository is unavailable");
core.setOutput("stale", "true");
return;
}
if (pr.head.sha !== targetHeadSha) {
core.notice("semantic review skipped: workflow_run is stale for this PR head");
core.setOutput("stale", "true");
@@ -389,6 +430,10 @@ jobs:
repo: context.repo.repo,
pull_number: pr,
});
if (pull.state !== "open") {
core.notice("semantic review skipped infrastructure failure check: PR is no longer open");
return;
}
if (pull.head.sha !== headSha) {
core.notice("semantic review skipped infrastructure failure check: PR head changed");
return;

View File

@@ -12,6 +12,7 @@ QUALITY_GATE_DIR ?= .tmp/quality-gate
QUALITY_GATE_MANIFEST_OUT ?= $(QUALITY_GATE_DIR)/command-manifest.json
QUALITY_GATE_COMMAND_INDEX_OUT ?= $(QUALITY_GATE_DIR)/command-index.json
QUALITY_GATE_FACTS_OUT ?= $(QUALITY_GATE_DIR)/facts.json
PUBLIC_CONTENT_METADATA ?= $(QUALITY_GATE_DIR)/public-content-metadata.json
LDFLAGS := -s -w -X $(MODULE)/internal/build.Version=$(VERSION) -X $(MODULE)/internal/build.Date=$(DATE)
PREFIX ?= /usr/local
@@ -69,7 +70,8 @@ integration-test: build
test: vet fmt-check script-test unit-test examples-build integration-test
quality-gate: build
mkdir -p $(QUALITY_GATE_DIR) $(dir $(QUALITY_GATE_FACTS_OUT))
mkdir -p $(QUALITY_GATE_DIR) $(dir $(QUALITY_GATE_FACTS_OUT)) $(dir $(PUBLIC_CONTENT_METADATA))
test -f $(PUBLIC_CONTENT_METADATA) || printf '{}\n' > $(PUBLIC_CONTENT_METADATA)
LARKSUITE_CLI_REMOTE_META=off \
LARKSUITE_CLI_NO_UPDATE_NOTIFIER=1 \
LARKSUITE_CLI_NO_SKILLS_NOTIFIER=1 \
@@ -89,6 +91,7 @@ quality-gate: build
--changed-from $(QUALITY_GATE_CHANGED_FROM_RESOLVED) \
--manifest $(QUALITY_GATE_MANIFEST_OUT) \
--command-index $(QUALITY_GATE_COMMAND_INDEX_OUT) \
--public-content-metadata $(PUBLIC_CONTENT_METADATA) \
--facts-out $(QUALITY_GATE_FACTS_OUT)
install: build

View File

@@ -0,0 +1,92 @@
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT
package main
import (
"encoding/json"
"flag"
"fmt"
"os"
"github.com/larksuite/cli/errs"
"github.com/larksuite/cli/internal/qualitygate/publiccontent"
"github.com/larksuite/cli/internal/qualitygate/report"
"github.com/larksuite/cli/internal/validate"
"github.com/larksuite/cli/internal/vfs"
)
type eventPayload struct {
Comment *struct {
Body string `json:"body"`
} `json:"comment"`
Review *struct {
Body string `json:"body"`
} `json:"review"`
}
func main() {
eventPath := flag.String("event", os.Getenv("GITHUB_EVENT_PATH"), "GitHub event payload path")
kind := flag.String("kind", os.Getenv("GITHUB_EVENT_NAME"), "GitHub event kind")
flag.Parse()
if *eventPath == "" {
fmt.Fprintln(os.Stderr, "comment-audit: --event or GITHUB_EVENT_PATH is required")
os.Exit(2)
}
body, err := commentBody(*eventPath)
if err != nil {
fmt.Fprintf(os.Stderr, "comment-audit: %v\n", err)
os.Exit(2)
}
diags := diagnostics(publiccontent.ScanComment(*kind, body))
if len(diags) > 0 {
fmt.Fprintln(os.Stderr, auditFailureSummary(len(diags)))
}
report.Print(os.Stderr, diags)
os.Exit(report.ExitCode(diags))
}
func auditFailureSummary(count int) string {
return fmt.Sprintf("post-publication audit found public content findings: %d", count)
}
func commentBody(path string) (string, error) {
safePath, err := validate.SafeInputPath(path)
if err != nil {
return "", errs.NewValidationError(errs.SubtypeInvalidArgument, "invalid --event: %v", err).
WithParam("--event").
WithCause(err)
}
data, err := vfs.ReadFile(safePath)
if err != nil {
return "", err
}
var payload eventPayload
if err := json.Unmarshal(data, &payload); err != nil {
return "", err
}
switch {
case payload.Comment != nil:
return payload.Comment.Body, nil
case payload.Review != nil:
return payload.Review.Body, nil
default:
return "", nil
}
}
func diagnostics(items []publiccontent.Finding) []report.Diagnostic {
out := make([]report.Diagnostic, 0, len(items))
for _, item := range items {
out = append(out, report.Diagnostic{
Rule: item.Rule,
Action: item.Action,
File: item.File,
Line: item.Line,
Message: item.Message,
Suggestion: item.Suggestion,
})
}
return out
}

View File

@@ -0,0 +1,70 @@
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT
package main
import (
"errors"
"os"
"path/filepath"
"testing"
"github.com/larksuite/cli/errs"
)
func TestCommentBodyReadsSafeRelativeEventPath(t *testing.T) {
dir := t.TempDir()
if err := writeTestFile(filepath.Join(dir, "event.json"), `{"comment":{"body":"clean comment"}}`); err != nil {
t.Fatal(err)
}
origDir, err := os.Getwd()
if err != nil {
t.Fatal(err)
}
if err := os.Chdir(dir); err != nil {
t.Fatal(err)
}
t.Cleanup(func() {
_ = os.Chdir(origDir)
})
got, err := commentBody("event.json")
if err != nil {
t.Fatalf("commentBody() error = %v", err)
}
if got != "clean comment" {
t.Fatalf("comment body = %q", got)
}
}
func TestCommentBodyRejectsUnsafeEventPath(t *testing.T) {
path := filepath.Join(t.TempDir(), "event.json")
if err := writeTestFile(path, `{"comment":{"body":"clean"}}`); err != nil {
t.Fatal(err)
}
_, err := commentBody(path)
problem, ok := errs.ProblemOf(err)
if err == nil || !ok {
t.Fatalf("commentBody(%q) error = %v, want unsafe path validation error", path, err)
}
if problem.Category != errs.CategoryValidation || problem.Subtype != errs.SubtypeInvalidArgument {
t.Fatalf("commentBody(%q) problem = %#v, want invalid argument validation", path, problem)
}
var validationErr *errs.ValidationError
if !errors.As(err, &validationErr) || validationErr.Param != "--event" {
t.Fatalf("commentBody(%q) error = %v, want --event validation param", path, err)
}
}
func TestAuditFailureSummaryStatesPostPublicationAudit(t *testing.T) {
got := auditFailureSummary(2)
want := "post-publication audit found public content findings: 2"
if got != want {
t.Fatalf("auditFailureSummary() = %q, want %q", got, want)
}
}
func writeTestFile(path, data string) error {
return os.WriteFile(path, []byte(data), 0o644)
}

View File

@@ -13,6 +13,7 @@ import (
"github.com/larksuite/cli/internal/qualitygate/manifest"
"github.com/larksuite/cli/internal/qualitygate/report"
"github.com/larksuite/cli/internal/qualitygate/rules"
"github.com/larksuite/cli/internal/validate"
)
func main() {
@@ -41,6 +42,7 @@ func runCheck(args []string) int {
fs.StringVar(&opts.FactsOut, "facts-out", "", "write facts JSON to this path")
fs.StringVar(&opts.ManifestPath, "manifest", "", "hand-authored command manifest JSON")
fs.StringVar(&opts.CommandIndexPath, "command-index", "", "full command index JSON")
fs.StringVar(&opts.PublicContentMetadataPath, "public-content-metadata", "", "PR title/body metadata JSON for public content checks")
fs.BoolVar(&printLegacyCommandCandidates, "print-legacy-command-candidates", false, "print current non-kebab-case hand-authored command candidates")
fs.BoolVar(&printLegacyFlagCandidates, "print-legacy-flag-candidates", false, "print current non-kebab-case flag candidates")
if err := fs.Parse(args); err != nil {
@@ -48,6 +50,15 @@ func runCheck(args []string) int {
return 2
}
if opts.PublicContentMetadataPath != "" {
safePath, err := validate.SafeInputPath(opts.PublicContentMetadataPath)
if err != nil {
fmt.Fprintf(os.Stderr, "quality-gate check: --public-content-metadata: %v\n", err)
return 2
}
opts.PublicContentMetadataPath = safePath
}
if opts.ManifestPath == "" || opts.CommandIndexPath == "" {
fmt.Fprintln(os.Stderr, "quality-gate check: --manifest and --command-index are required")
return 2

View File

@@ -37,6 +37,37 @@ func TestCheckRequiresManifestInputs(t *testing.T) {
}
}
func TestCheckAcceptsPublicContentMetadataFlag(t *testing.T) {
code, stderr := runCheckCaptureStderr(t, []string{
"--repo", t.TempDir(),
"--cli-bin", "./lark-cli",
"--public-content-metadata", ".tmp/quality-gate/pr.json",
})
if code != 2 {
t.Fatalf("exit code = %d, stderr=%s", code, stderr)
}
if strings.Contains(stderr, "flag provided but not defined") {
t.Fatalf("public content metadata flag was not registered: %s", stderr)
}
if !strings.Contains(stderr, "--manifest and --command-index are required") {
t.Fatalf("stderr = %s", stderr)
}
}
func TestCheckRejectsUnsafePublicContentMetadataPath(t *testing.T) {
code, stderr := runCheckCaptureStderr(t, []string{
"--repo", t.TempDir(),
"--cli-bin", "./lark-cli",
"--public-content-metadata", filepath.Join(t.TempDir(), "pr.json"),
})
if code != 2 {
t.Fatalf("exit code = %d, stderr=%s", code, stderr)
}
if !strings.Contains(stderr, "--public-content-metadata") || !strings.Contains(stderr, "--file") {
t.Fatalf("stderr = %s, want unsafe public content metadata path error", stderr)
}
}
func TestCheckReportsManifestReadErrorsWithFlagName(t *testing.T) {
dir := t.TempDir()
manifestPath := filepath.Join(dir, "command-manifest.json")

View File

@@ -56,6 +56,14 @@ func run(args []string) int {
_ = semantic.WriteMarkdown(markdownOut, decision)
return 0
}
if reviewPath == "" && !semantic.BuildInputView(f).HasReviewableFacts() {
decision := finalizeDecision(block, waiverDiags, semantic.Decision{})
if err := writeSemanticOutputs(decisionOut, markdownOut, decision); err != nil {
fmt.Fprintf(os.Stderr, "semantic-review: %v\n", err)
return 2
}
return decisionExitCode(decision)
}
review, err := semantic.LoadOrReviewWithConfig(context.Background(), f, reviewPath, modelConfig)
if err != nil {
fmt.Fprintf(os.Stderr, "semantic-review: %v\n", err)
@@ -72,6 +80,15 @@ func run(args []string) int {
return 0
}
decision := semantic.DecideWithWaivers(f, review, policy, waivers)
decision = finalizeDecision(block, waiverDiags, decision)
if err := writeSemanticOutputs(decisionOut, markdownOut, decision); err != nil {
fmt.Fprintf(os.Stderr, "semantic-review: %v\n", err)
return 2
}
return decisionExitCode(decision)
}
func finalizeDecision(block bool, waiverDiags []report.Diagnostic, decision semantic.Decision) semantic.Decision {
decision.BlockMode = block
if !block && len(decision.Blockers) > 0 {
for i := range decision.Blockers {
@@ -81,15 +98,21 @@ func run(args []string) int {
decision.Blockers = nil
}
decision.SystemWarnings = append(diagnosticSystemWarnings(waiverDiags), decision.SystemWarnings...)
return decision
}
func writeSemanticOutputs(decisionOut, markdownOut string, decision semantic.Decision) error {
if err := semantic.WriteDecision(decisionOut, decision); err != nil {
fmt.Fprintf(os.Stderr, "semantic-review: write decision: %v\n", err)
return 2
return fmt.Errorf("write decision: %w", err)
}
if err := semantic.WriteMarkdown(markdownOut, decision); err != nil {
fmt.Fprintf(os.Stderr, "semantic-review: write markdown: %v\n", err)
return 2
return fmt.Errorf("write markdown: %w", err)
}
if block && len(decision.Blockers) > 0 {
return nil
}
func decisionExitCode(decision semantic.Decision) int {
if decision.BlockMode && len(decision.Blockers) > 0 {
return 1
}
return 0

View File

@@ -7,6 +7,7 @@ import (
"encoding/json"
"os"
"path/filepath"
"strings"
"testing"
"github.com/larksuite/cli/internal/qualitygate/facts"
@@ -211,7 +212,19 @@ func TestRunWritesSkippedDecisionForUnavailableReviewer(t *testing.T) {
"allowed_base_urls": ["https://ark.ap-southeast.bytepluses.com/api/v3"]
}`, "")
factsPath := filepath.Join(t.TempDir(), "facts.json")
if err := (facts.Facts{SchemaVersion: 1}).WriteFile(factsPath); err != nil {
f := facts.Facts{
SchemaVersion: 1,
Skills: []facts.SkillFact{{
SourceFile: "skills/lark-wiki/SKILL.md",
Line: 30,
Changed: true,
ReferencesInvalidCommand: true,
}},
}
if !semantic.BuildInputView(f).HasReviewableFacts() {
t.Fatal("test setup must contain reviewable facts")
}
if err := f.WriteFile(factsPath); err != nil {
t.Fatalf("write facts: %v", err)
}
decisionPath := filepath.Join(t.TempDir(), "decision.json")
@@ -228,6 +241,71 @@ func TestRunWritesSkippedDecisionForUnavailableReviewer(t *testing.T) {
}
}
func TestRunShortCircuitsEmptySemanticInputWithoutReviewer(t *testing.T) {
t.Setenv("ARK_API_KEY", "")
t.Setenv("ARK_BASE_URL", "")
t.Setenv("ARK_MODEL", "")
repo := t.TempDir()
writeSemanticConfig(t, repo, `{
"schema_version": 1,
"default_enforcement": "observe",
"block_categories": ["skill_quality"]
}`, `{
"allowed": ["semantic-review-v1"],
"allowed_base_urls": ["https://ark.ap-southeast.bytepluses.com/api/v3"]
}`, "")
factsPath := filepath.Join(t.TempDir(), "facts.json")
f := facts.Facts{
SchemaVersion: 1,
Commands: []facts.CommandFact{{
Path: "service command 1",
Domain: "service",
Changed: true,
Source: "service",
}},
Outputs: []facts.OutputFact{{
Command: "service command 1",
Domain: "service",
Changed: true,
Source: "service",
IsList: true,
HasDefaultLimit: true,
HasDecisionField: true,
}},
}
if semantic.BuildInputView(f).HasReviewableFacts() {
t.Fatal("test setup must not contain reviewable facts")
}
if err := f.WriteFile(factsPath); err != nil {
t.Fatalf("write facts: %v", err)
}
decisionPath := filepath.Join(t.TempDir(), "decision.json")
markdownPath := filepath.Join(t.TempDir(), "semantic.md")
code := run([]string{"--repo", repo, "--facts", factsPath, "--decision-out", decisionPath, "--markdown-out", markdownPath, "--block"})
if code != 0 {
t.Fatalf("run() = %d, want clean pass", code)
}
decision := readDecision(t, decisionPath)
if decision.Skipped || decision.Degraded || decision.InfrastructureFailure || !decision.BlockMode {
t.Fatalf("expected non-degraded pass decision: %#v", decision)
}
if len(decision.SystemWarnings) != 0 || len(decision.Warnings) != 0 || len(decision.Blockers) != 0 {
t.Fatalf("empty semantic view should not produce findings: %#v", decision)
}
data, err := os.ReadFile(markdownPath)
if err != nil {
t.Fatalf("read markdown: %v", err)
}
markdown := string(data)
if !strings.Contains(markdown, "No semantic blockers.") {
t.Fatalf("markdown missing pass summary: %s", markdown)
}
if strings.Contains(strings.ToLower(markdown), "skipped") || strings.Contains(strings.ToLower(markdown), "degraded") {
t.Fatalf("markdown should not report semantic review as skipped/degraded: %s", markdown)
}
}
func TestRunWritesInfrastructureFailureDecisionForInvalidReviewerConfig(t *testing.T) {
t.Setenv("ARK_API_KEY", "test-key")
t.Setenv("ARK_BASE_URL", "")
@@ -243,7 +321,19 @@ func TestRunWritesInfrastructureFailureDecisionForInvalidReviewerConfig(t *testi
"allowed_base_urls": ["https://ark.ap-southeast.bytepluses.com/api/v3"]
}`, "")
factsPath := filepath.Join(t.TempDir(), "facts.json")
if err := (facts.Facts{SchemaVersion: 1}).WriteFile(factsPath); err != nil {
f := facts.Facts{
SchemaVersion: 1,
Skills: []facts.SkillFact{{
SourceFile: "skills/lark-wiki/SKILL.md",
Line: 30,
Changed: true,
ReferencesInvalidCommand: true,
}},
}
if !semantic.BuildInputView(f).HasReviewableFacts() {
t.Fatal("test setup must contain reviewable facts")
}
if err := f.WriteFile(factsPath); err != nil {
t.Fatalf("write facts: %v", err)
}
decisionPath := filepath.Join(t.TempDir(), "decision.json")

View File

@@ -5,7 +5,8 @@
"error_hint",
"default_output",
"naming",
"skill_quality"
"skill_quality",
"public_content_leakage"
],
"rollout_groups": [
{
@@ -16,7 +17,8 @@
},
"categories": [
"error_hint",
"skill_quality"
"skill_quality",
"public_content_leakage"
],
"owner": "cli-owner",
"reason": "first semantic blocking rollout only affects changed facts"

View File

@@ -13,14 +13,15 @@ import (
)
type Facts struct {
SchemaVersion int `json:"schema_version"`
Commands []CommandFact `json:"commands,omitempty"`
Skills []SkillFact `json:"skills,omitempty"`
SkillQuality []SkillQualityFact `json:"skill_quality,omitempty"`
Errors []ErrorFact `json:"errors,omitempty"`
Outputs []OutputFact `json:"outputs,omitempty"`
Examples []CommandExample `json:"examples,omitempty"`
Diagnostics []DiagnosticFact `json:"diagnostics,omitempty"`
SchemaVersion int `json:"schema_version"`
Commands []CommandFact `json:"commands,omitempty"`
Skills []SkillFact `json:"skills,omitempty"`
SkillQuality []SkillQualityFact `json:"skill_quality,omitempty"`
Errors []ErrorFact `json:"errors,omitempty"`
Outputs []OutputFact `json:"outputs,omitempty"`
Examples []CommandExample `json:"examples,omitempty"`
PublicContent []PublicContentFact `json:"public_content,omitempty"`
Diagnostics []DiagnosticFact `json:"diagnostics,omitempty"`
}
type CommandFact struct {
@@ -109,6 +110,17 @@ type OutputFact struct {
HasDecisionField bool `json:"has_decision_field,omitempty"`
}
type PublicContentFact struct {
Rule string `json:"rule"`
Action report.Action `json:"action"`
File string `json:"file"`
Line int `json:"line"`
Source string `json:"source,omitempty"`
Excerpt string `json:"excerpt,omitempty"`
Message string `json:"message,omitempty"`
Suggestion string `json:"suggestion,omitempty"`
}
type DryRunRequest struct {
Method string `json:"method"`
URL string `json:"url"`
@@ -206,6 +218,11 @@ func BuildWithCommandLookup(m manifest.Manifest, commandLookup manifest.Manifest
}
}
func WithPublicContent(f Facts, publicContent []PublicContentFact) Facts {
f.PublicContent = publicContent
return f
}
type commandScope struct {
Domain string
Source string

View File

@@ -34,6 +34,7 @@ func TestFactsSchemaCarriesGatekeeperFields(t *testing.T) {
Errors: []ErrorFact{{Code: "invalid_input", Message: "bad path", Hint: "pass --file", Retryable: false, HintActionCount: 1, RequiredHint: true}},
Outputs: []OutputFact{{Command: "im messages list", Fields: []string{"message_id", "sender", "create_time"}, IsList: true, HasDefaultLimit: true, HasDecisionField: true}},
Skills: []SkillFact{{SourceFile: "skills/lark-doc/SKILL.md", Line: 1, DestructiveWithoutGuard: true, ScopeConflict: true}},
PublicContent: []PublicContentFact{{Rule: "public_content_generic_credential", Action: report.ActionReject, File: "docs/public.md", Line: 4, Excerpt: "api_key = <redacted>"}},
}
data, err := json.Marshal(f)
if err != nil {
@@ -43,7 +44,10 @@ func TestFactsSchemaCarriesGatekeeperFields(t *testing.T) {
if err := json.Unmarshal(data, &got); err != nil {
t.Fatalf("unmarshal facts: %v", err)
}
if !got.Errors[0].RequiredHint || got.Outputs[0].Fields[0] != "message_id" || !got.Skills[0].ScopeConflict {
if !got.Errors[0].RequiredHint ||
got.Outputs[0].Fields[0] != "message_id" ||
!got.Skills[0].ScopeConflict ||
got.PublicContent[0].Rule != "public_content_generic_credential" {
t.Fatalf("facts lost gatekeeper fields: %#v", got)
}
}

View File

@@ -0,0 +1,343 @@
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT
package publiccontent
import (
"bytes"
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"sort"
"strconv"
"strings"
)
func Collect(ctx context.Context, opts Options) ([]Finding, error) {
metadata, err := LoadMetadata(opts.MetadataPath)
if err != nil {
return nil, err
}
var out []Finding
changedFiles, base, err := changedFiles(ctx, opts.Repo, opts.ChangedFrom)
if err != nil {
return nil, err
}
patches := map[string][]changedChunk{}
if base != "" {
patches, err = changedPatches(ctx, opts.Repo, base)
if err != nil {
return nil, err
}
}
for _, file := range changedFiles {
if !scanChangedFile(file) {
continue
}
for _, chunk := range patches[file] {
findings := scanText(file, "file", chunk.Text, isDetectorRuleFile(file))
for i := range findings {
findings[i].Line += chunk.StartLine - 1
}
out = append(out, findings...)
out = append(out, semanticCandidate(file, "file", chunk.Text, chunk.StartLine)...)
}
privateKeyFindings, err := scanTouchedPrivateKeyBlocks(ctx, opts.Repo, file, patches[file])
if err != nil {
return nil, err
}
out = appendUniqueFindings(out, privateKeyFindings...)
}
if base != "" {
commitFindings, err := scanCommitMessages(ctx, opts.Repo, base)
if err != nil {
return nil, err
}
out = append(out, commitFindings...)
}
branchName := opts.BranchName
if branchName == "" {
branchName = metadata.Branch
}
if branchName == "" {
branchName = branchFromEnv()
}
if branchName == "" {
branchName = currentBranch(ctx, opts.Repo)
}
if branchName != "" {
out = append(out, scanText("branch", "branch", branchName, false)...)
}
out = append(out, scanMetadata(metadata)...)
sort.SliceStable(out, func(i, j int) bool {
if out[i].File != out[j].File {
return out[i].File < out[j].File
}
if out[i].Line != out[j].Line {
return out[i].Line < out[j].Line
}
return out[i].Rule < out[j].Rule
})
return out, nil
}
func currentBranch(ctx context.Context, repo string) string {
data, err := gitOutput(ctx, repo, "branch", "--show-current")
if err != nil {
return ""
}
return strings.TrimSpace(string(data))
}
func branchFromEnv() string {
for _, key := range []string{"PR_BRANCH", "GITHUB_HEAD_REF", "GITHUB_REF_NAME"} {
if value := strings.TrimSpace(os.Getenv(key)); value != "" {
return value
}
}
return ""
}
func changedFiles(ctx context.Context, repo, changedFrom string) ([]string, string, error) {
if changedFrom == "" {
return nil, "", nil
}
baseBytes, err := gitOutput(ctx, repo, "merge-base", changedFrom, "HEAD")
if err != nil {
return nil, "", err
}
base := strings.TrimSpace(string(baseBytes))
files, err := diffFileNames(ctx, repo, base)
if err != nil {
return nil, "", err
}
sort.Strings(files)
return files, base, nil
}
func diffFileNames(ctx context.Context, repo, base string) ([]string, error) {
data, err := gitOutput(ctx, repo, "diff", "--name-only", "-z", "--diff-filter=ACMR", base+"..HEAD")
if err != nil {
return nil, err
}
var files []string
for _, file := range bytes.Split(data, []byte{0}) {
if len(file) == 0 {
continue
}
files = append(files, filepath.ToSlash(string(file)))
}
return files, nil
}
var detectorFixtureExclusions = map[string]bool{
"internal/qualitygate/publiccontent/collect_test.go": true,
"internal/qualitygate/publiccontent/rules.go": true,
"internal/qualitygate/publiccontent/scan.go": true,
"internal/qualitygate/publiccontent/scan_test.go": true,
}
func scanChangedFile(file string) bool {
normalized := strings.TrimPrefix(strings.ReplaceAll(file, "\\", "/"), "./")
return !detectorFixtureExclusions[normalized]
}
type changedChunk struct {
StartLine int
Text string
}
func (c changedChunk) endLine() int {
lines := strings.Count(strings.TrimRight(c.Text, "\n"), "\n") + 1
if lines < 1 {
lines = 1
}
return c.StartLine + lines - 1
}
func changedPatches(ctx context.Context, repo, base string) (map[string][]changedChunk, error) {
files, err := diffFileNames(ctx, repo, base)
if err != nil {
return nil, err
}
data, err := gitOutput(ctx, repo, "diff", "--no-ext-diff", "--unified=0", "--diff-filter=ACMR", base+"..HEAD")
if err != nil {
return nil, err
}
out := map[string][]changedChunk{}
var file string
var chunk *changedChunk
nextLine := 0
nextFile := 0
flush := func() {
if file == "" || chunk == nil || chunk.Text == "" {
chunk = nil
return
}
out[file] = append(out[file], *chunk)
chunk = nil
}
for _, raw := range strings.Split(string(data), "\n") {
switch {
case strings.HasPrefix(raw, "diff --git "):
flush()
file = ""
if nextFile < len(files) {
file = files[nextFile]
nextFile++
}
case strings.HasPrefix(raw, "@@ "):
flush()
start, ok := parseNewHunkStart(raw)
if !ok {
nextLine = 0
continue
}
nextLine = start
chunk = &changedChunk{StartLine: start}
case strings.HasPrefix(raw, "+") && !strings.HasPrefix(raw, "+++"):
if chunk == nil {
chunk = &changedChunk{StartLine: max(nextLine, 1)}
}
chunk.Text += strings.TrimPrefix(raw, "+") + "\n"
nextLine++
case strings.HasPrefix(raw, "-"):
continue
default:
if chunk != nil && strings.HasPrefix(raw, `\ No newline at end of file`) {
continue
}
flush()
}
}
flush()
return out, nil
}
func parseNewHunkStart(header string) (int, bool) {
parts := strings.Split(header, " ")
for _, part := range parts {
if !strings.HasPrefix(part, "+") {
continue
}
raw := strings.TrimPrefix(part, "+")
if before, _, ok := strings.Cut(raw, ","); ok {
raw = before
}
start, err := strconv.Atoi(raw)
return start, err == nil && start > 0
}
return 0, false
}
func scanCommitMessages(ctx context.Context, repo, base string) ([]Finding, error) {
data, err := gitOutput(ctx, repo, "log", "--format=%H%x00%B%x00", base+"..HEAD")
if err != nil {
return nil, err
}
parts := bytes.Split(data, []byte{0})
var out []Finding
for i := 0; i+1 < len(parts); i += 2 {
sha := strings.TrimSpace(string(parts[i]))
body := string(parts[i+1])
if sha == "" || body == "" {
continue
}
short := sha
if len(short) > 12 {
short = short[:12]
}
out = append(out, scanText("commit:"+short, "commit", body, false)...)
out = append(out, semanticCandidate("commit:"+short, "commit", body, 1)...)
}
return out, nil
}
type lineRange struct {
Start int
End int
}
func scanTouchedPrivateKeyBlocks(ctx context.Context, repo, file string, chunks []changedChunk) ([]Finding, error) {
if len(chunks) == 0 {
return nil, nil
}
data, err := gitOutput(ctx, repo, "show", "HEAD:"+file)
if err != nil {
return nil, err
}
var added []lineRange
for _, chunk := range chunks {
added = append(added, lineRange{Start: chunk.StartLine, End: chunk.endLine()})
}
var out []Finding
for _, block := range privateKeyBlocks(string(data)) {
if !rangesIntersectAny(block, added) {
continue
}
out = append(out, newFinding("public_content_private_key_block", file, block.Start, "file", "private key block"))
}
return out, nil
}
func privateKeyBlocks(text string) []lineRange {
lines := strings.Split(text, "\n")
var out []lineRange
inPrivateKey := false
start := 0
for i, line := range lines {
lineNo := i + 1
if !inPrivateKey && strings.Contains(line, privateKeyBeginPrefix) && strings.Contains(line, privateKeyMarker) {
inPrivateKey = true
start = lineNo
}
if inPrivateKey && strings.Contains(line, privateKeyEndPrefix) && strings.Contains(line, privateKeyMarker) {
out = append(out, lineRange{Start: start, End: lineNo})
inPrivateKey = false
}
}
return out
}
func rangesIntersectAny(block lineRange, ranges []lineRange) bool {
for _, r := range ranges {
if block.Start <= r.End && r.Start <= block.End {
return true
}
}
return false
}
func appendUniqueFindings(items []Finding, additions ...Finding) []Finding {
for _, addition := range additions {
duplicate := false
for _, item := range items {
if item.Rule == addition.Rule &&
item.File == addition.File &&
item.Line == addition.Line &&
item.Source == addition.Source {
duplicate = true
break
}
}
if !duplicate {
items = append(items, addition)
}
}
return items
}
func gitOutput(ctx context.Context, repo string, args ...string) ([]byte, error) {
cmd := exec.CommandContext(ctx, "git", args...)
cmd.Dir = repo
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
return nil, fmt.Errorf("git %s: %w\n%s", strings.Join(args, " "), err, stderr.Bytes())
}
return stdout.Bytes(), nil
}

View File

@@ -0,0 +1,885 @@
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT
package publiccontent
import (
"context"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
)
func TestCollectScansOnlyCurrentContributionAndMetadata(t *testing.T) {
repo := t.TempDir()
runGit(t, repo, "init")
runGit(t, repo, "config", "user.email", "test@example.com")
runGit(t, repo, "config", "user.name", "Test User")
writeFile(t, filepath.Join(repo, "baseline.md"), `BASE_`+`TOKEN="baseline-only"
`)
runGit(t, repo, "add", "baseline.md")
runGit(t, repo, "commit", "-m", "base")
writeFile(t, filepath.Join(repo, "docs", "public.md"), `# Public change
api_`+`key = "example-public-key"
`)
runGit(t, repo, "add", "docs/public.md")
runGit(t, repo, "commit", "-m", "add public doc", "-m", "Change"+"-Id: I0123456789abcdef0123456789abcdef01234567")
metadataPath := filepath.Join(repo, "pr-metadata.json")
writeFile(t, metadataPath, `{"title":"publish public docs","body":"Reviewed`+`-on: https://review.example.test/c/project/+/123"}`)
got, err := Collect(context.Background(), Options{
Repo: repo,
ChangedFrom: "HEAD~1",
MetadataPath: metadataPath,
})
if err != nil {
t.Fatalf("Collect() error = %v", err)
}
rules := findingRules(got)
for _, want := range []string{
"public_content_generic_credential",
"public_content_change_id_trailer",
"public_content_reviewed_on_trailer",
} {
if !rules[want] {
t.Fatalf("missing rule %s in findings %#v", want, got)
}
}
for _, item := range got {
if item.File == "baseline.md" {
t.Fatalf("collector scanned unchanged baseline file: %#v", got)
}
}
}
func TestCollectScansOnlyChangedLinesInChangedFiles(t *testing.T) {
repo := t.TempDir()
runGit(t, repo, "init")
runGit(t, repo, "config", "user.email", "test@example.com")
runGit(t, repo, "config", "user.name", "Test User")
writeFile(t, filepath.Join(repo, "docs", "workflow.md"), "SECRET_TOKEN=legacy-example\npublic baseline\n")
runGit(t, repo, "add", "docs/workflow.md")
runGit(t, repo, "commit", "-m", "base")
writeFile(t, filepath.Join(repo, "docs", "workflow.md"), "SECRET_TOKEN=legacy-example\npublic baseline\nnew public line\n")
runGit(t, repo, "add", "docs/workflow.md")
runGit(t, repo, "commit", "-m", "add public line")
metadataPath := filepath.Join(repo, "pr-metadata.json")
writeFile(t, metadataPath, `{}`)
got, err := Collect(context.Background(), Options{
Repo: repo,
ChangedFrom: "HEAD~1",
MetadataPath: metadataPath,
})
if err != nil {
t.Fatalf("Collect() error = %v", err)
}
for _, item := range got {
if item.Rule == "public_content_generic_credential" && item.File == "docs/workflow.md" {
t.Fatalf("collector scanned unchanged legacy line in changed file: %#v", got)
}
}
}
func TestCollectSemanticCandidatesStoreSanitizedReviewText(t *testing.T) {
repo := t.TempDir()
runGit(t, repo, "init")
runGit(t, repo, "config", "user.email", "test@example.com")
runGit(t, repo, "config", "user.name", "Test User")
writeFile(t, filepath.Join(repo, "docs", "public.md"), "base\n")
runGit(t, repo, "add", "docs/public.md")
runGit(t, repo, "commit", "-m", "base")
raw := "private launch plan for alpha-service rollout on Friday with SERVICE_" + "TOKEN=real-" + "secret-value"
writeFile(t, filepath.Join(repo, "docs", "public.md"), "base\n"+raw+"\n")
runGit(t, repo, "add", "docs/public.md")
runGit(t, repo, "commit", "-m", "add semantic candidate")
metadataPath := filepath.Join(repo, "pr-metadata.json")
writeFile(t, metadataPath, `{}`)
got, err := Collect(context.Background(), Options{
Repo: repo,
ChangedFrom: "HEAD~1",
MetadataPath: metadataPath,
})
if err != nil {
t.Fatalf("Collect() error = %v", err)
}
var found bool
for _, item := range got {
if item.Rule != "public_content_semantic_candidate" || item.File != "docs/public.md" {
continue
}
found = true
if !strings.Contains(item.Excerpt, "alpha-service rollout on Friday") {
t.Fatalf("semantic candidate should include sanitized review text, got %#v", item)
}
if strings.Contains(item.Excerpt, "real-"+"secret-value") {
t.Fatalf("semantic candidate leaked credential value: %#v", item)
}
if !strings.Contains(item.Excerpt, "SERVICE_TOKEN=<redacted>") {
t.Fatalf("semantic candidate should redact credentials in review text, got %#v", item)
}
if !strings.Contains(item.Excerpt, "semantic signals") || !strings.Contains(item.Excerpt, "roadmap_timing") {
t.Fatalf("semantic candidate excerpt should preserve semantic signals, got %#v", item)
}
}
if !found {
t.Fatalf("missing semantic candidate in findings %#v", got)
}
}
func TestCollectSemanticCandidatesDoNotLeakWhitespaceCredentialTail(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "docs", "public.md"), "base\n")
runGit(t, repo, "add", "docs/public.md")
runGit(t, repo, "commit", "-m", "base")
raw := "private launch plan for internal rollout on Friday with SERVICE_" + "TOKEN=\"real " + "secret value\""
writeFile(t, filepath.Join(repo, "docs", "public.md"), "base\n"+raw+"\n")
runGit(t, repo, "add", "docs/public.md")
runGit(t, repo, "commit", "-m", "add semantic candidate")
got := collectFromPreviousCommit(t, repo)
for _, item := range got {
if item.Rule != "public_content_semantic_candidate" || item.File != "docs/public.md" {
continue
}
if strings.Contains(item.Excerpt, "secret value") || strings.Contains(item.Excerpt, "real "+"secret value") {
t.Fatalf("semantic candidate leaked credential tail: %#v", item)
}
if !strings.Contains(item.Excerpt, "SERVICE_TOKEN=<redacted>") {
t.Fatalf("semantic candidate should redact full credential assignment, got %#v", item)
}
return
}
t.Fatalf("missing semantic candidate in findings %#v", got)
}
func TestCollectJSONBearerHeadersDoNotLeakIntoSemanticCandidates(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "docs", "public.md"), "base\n")
runGit(t, repo, "add", "docs/public.md")
runGit(t, repo, "commit", "-m", "base")
token := "abcdefghijklmnopqrstuvwxyz"
raw := "private launch plan for internal rollout on Friday with " +
`{"headers":{"Authorization":"Bearer ` + token + `"}}`
writeFile(t, filepath.Join(repo, "docs", "public.md"), "base\n"+raw+"\n")
runGit(t, repo, "add", "docs/public.md")
runGit(t, repo, "commit", "-m", "add json bearer")
got := collectFromPreviousCommit(t, repo)
requireFinding(t, got, "docs/public.md", "public_content_bearer_header")
for _, item := range got {
if item.File != "docs/public.md" {
continue
}
if strings.Contains(item.Excerpt, token) {
t.Fatalf("finding leaked JSON bearer token: %#v", item)
}
}
}
func TestCollectDetectsQuotedJSONCredentialAssignments(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "docs", "public.json"), "{}\n")
runGit(t, repo, "add", "docs/public.json")
runGit(t, repo, "commit", "-m", "base")
writeFile(t, filepath.Join(repo, "docs", "public.json"), strings.Join([]string{
`{"access_` + `token":"real-json-token"}`,
`{"client_` + `secret": "real ` + `secret value"}`,
`{"tenantAccess` + `Token":"real-tenant-camel-token"}`,
`{"github` + `Token":"real-github-token"}`,
`{"vendorApi` + `Key":"real-vendor-key"}`,
`{"slackBot` + `Token":"xoxb-real-token"}`,
}, "\n")+"\n")
runGit(t, repo, "add", "docs/public.json")
runGit(t, repo, "commit", "-m", "add json config")
got := collectFromPreviousCommit(t, repo)
var count int
for _, item := range got {
if item.File == "docs/public.json" && item.Rule == "public_content_generic_credential" {
count++
for _, forbidden := range []string{
"real-json-token",
"real secret value",
"real-tenant-camel-token",
"real-github-token",
"real-vendor-key",
"xoxb-real-token",
} {
if strings.Contains(item.Excerpt, forbidden) {
t.Fatalf("JSON credential finding leaked value %q in excerpt %q", forbidden, item.Excerpt)
}
}
}
}
if count != 6 {
t.Fatalf("JSON credential findings = %d, want 6: %#v", count, got)
}
}
func TestCollectAllowsBenignJSONTokenFields(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "docs", "public.json"), "{}\n")
runGit(t, repo, "add", "docs/public.json")
runGit(t, repo, "commit", "-m", "base")
writeFile(t, filepath.Join(repo, "docs", "public.json"), strings.Join([]string{
`{"tokenizer":"cl100k_base"}`,
`{"token_count": 42}`,
`{"page_token":"next"}`,
`{"next_page_token":"next"}`,
`{"file_token":"file-example"}`,
`{"doc_token":"doc-example"}`,
`{"node_token":"node-example"}`,
`{"wiki_token":"wikcn_public_doc_example"}`,
`{"folder_token":"folder-example"}`,
`{"obj_token":"obj-example"}`,
`{"spreadsheet_token":"sheet-example"}`,
`{"parent_node_token":"parent-example"}`,
`{"origin_node_token":"origin-example"}`,
`{"drive_route_token":"route-example"}`,
`{"token":"<wiki_token>"}`,
`{"token":"wiki_token"}`,
`{"token_url":"https://example.com/oauth/token"}`,
`{"token_endpoint":"https://example.com/oauth/token"}`,
`{"token_format":"Bearer"}`,
`{"secret_name":"public-example-secret"}`,
`{"base_token":"base-example"}`,
`{"app_token":"app-example"}`,
`{"sync_token":"sync-example"}`,
`{"parent_token":"parent-example"}`,
`{"target_token":"target-example"}`,
`{"parent_file_token":"parent-file-example"}`,
`{"refresh_token_expires_in": 7200}`,
`{"access_token_expires_in": 7200}`,
`{"token_expires_in": 7200}`,
`{"token_status":"active"}`,
}, "\n")+"\n")
runGit(t, repo, "add", "docs/public.json")
runGit(t, repo, "commit", "-m", "add benign json token fields")
got := collectFromPreviousCommit(t, repo)
for _, item := range got {
if item.File == "docs/public.json" && item.Rule == "public_content_generic_credential" {
t.Fatalf("benign JSON token field should not be credential finding: %#v", got)
}
}
}
func TestCollectDetectsAngleWrappedRealisticCredentialValues(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "docs", "config.yaml"), "base: true\n")
runGit(t, repo, "add", "docs/config.yaml")
runGit(t, repo, "commit", "-m", "base")
stripeLike := "sk_" + "live_1234567890abcdef"
patLike := "gh" + "p_1234567890abcdef1234567890abcdef1234"
writeFile(t, filepath.Join(repo, "docs", "config.yaml"), strings.Join([]string{
"API_KEY: <" + stripeLike + ">",
"SECRET_TOKEN: <" + patLike + ">",
"CLIENT_SECRET: <real-client-secret-value>",
}, "\n")+"\n")
runGit(t, repo, "add", "docs/config.yaml")
runGit(t, repo, "commit", "-m", "add credential config")
got := collectFromPreviousCommit(t, repo)
var count int
for _, item := range got {
if item.File == "docs/config.yaml" && item.Rule == "public_content_generic_credential" {
count++
}
}
if count != 3 {
t.Fatalf("angle-wrapped realistic credential findings = %d, want 3: %#v", count, got)
}
}
func TestCollectDetectsCredentialShapedValuesUnderBenignKeys(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "docs", "public.json"), "{}\n")
runGit(t, repo, "add", "docs/public.json")
runGit(t, repo, "commit", "-m", "base")
stripeLike := "sk_" + "live_1234567890abcdef"
patLike := "gh" + "p_1234567890abcdef1234567890abcdef1234"
writeFile(t, filepath.Join(repo, "docs", "public.json"), strings.Join([]string{
`{"access_token_expires_in":"` + patLike + `"}`,
`{"refresh_token_expires_in":"` + stripeLike + `"}`,
`{"client_secret_status":"real-client-secret-value"}`,
`{"client_secret_name":"real-client-secret-value"}`,
`{"app_token":"` + patLike + `"}`,
`{"sync_token":"` + stripeLike + `"}`,
`{"target_token":"real-client-secret-value"}`,
}, "\n")+"\n")
runGit(t, repo, "add", "docs/public.json")
runGit(t, repo, "commit", "-m", "add credential-shaped benign fields")
got := collectFromPreviousCommit(t, repo)
var count int
for _, item := range got {
if item.File == "docs/public.json" && item.Rule == "public_content_generic_credential" {
count++
}
}
if count != 7 {
t.Fatalf("credential-shaped benign-key findings = %d, want 7: %#v", count, got)
}
}
func TestCollectDetectsBareIdentifierCredentialsWithMetadataSuffixes(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "docs", "config.yaml"), "base: true\n")
runGit(t, repo, "add", "docs/config.yaml")
runGit(t, repo, "commit", "-m", "base")
writeFile(t, filepath.Join(repo, "docs", "config.yaml"), strings.Join([]string{
"API_KEY_NAME: prod_key",
"CLIENT_SECRET_NAME: prod_secret",
"SECRET_STATUS: prod_secret",
}, "\n")+"\n")
runGit(t, repo, "add", "docs/config.yaml")
runGit(t, repo, "commit", "-m", "add credential config")
got := collectFromPreviousCommit(t, repo)
var count int
for _, item := range got {
if item.File == "docs/config.yaml" && item.Rule == "public_content_generic_credential" {
count++
}
}
if count != 3 {
t.Fatalf("metadata-suffixed bare credential findings = %d, want 3: %#v", count, got)
}
}
func TestCollectDetectsAccessKeyCredentials(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "docs", "config.yaml"), "base: true\n")
runGit(t, repo, "add", "docs/config.yaml")
runGit(t, repo, "commit", "-m", "base")
accessKey := "AK" + "IAIOSFODNN7EXAMPX"
writeFile(t, filepath.Join(repo, "docs", "config.yaml"), strings.Join([]string{
"AWS_ACCESS_KEY_ID: " + accessKey,
"ACCESS_KEY_ID: " + accessKey,
"ACCESS_KEY: " + accessKey,
}, "\n")+"\n")
runGit(t, repo, "add", "docs/config.yaml")
runGit(t, repo, "commit", "-m", "add access key config")
got := collectFromPreviousCommit(t, repo)
var count int
for _, item := range got {
if item.File != "docs/config.yaml" || item.Rule != "public_content_generic_credential" {
continue
}
count++
if strings.Contains(item.Excerpt, "AKIAIOSFODNN7EXAMPX") {
t.Fatalf("access key finding leaked value in excerpt %q", item.Excerpt)
}
}
if count != 3 {
t.Fatalf("access key credential findings = %d, want 3: %#v", count, got)
}
}
func TestCollectDetectsPrivateKeyAssignments(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "docs", "config.yaml"), "base: true\n")
runGit(t, repo, "add", "docs/config.yaml")
runGit(t, repo, "commit", "-m", "base")
privateKey := "LS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0t"
writeFile(t, filepath.Join(repo, "docs", "config.yaml"), strings.Join([]string{
"PRIVATE_KEY: " + privateKey,
"SSH_PRIVATE_KEY: " + privateKey,
"JWT_PRIVATE_KEY: " + privateKey,
"SIGNING_PRIVATE_KEY: " + privateKey,
}, "\n")+"\n")
runGit(t, repo, "add", "docs/config.yaml")
runGit(t, repo, "commit", "-m", "add private key config")
got := collectFromPreviousCommit(t, repo)
var count int
for _, item := range got {
if item.File != "docs/config.yaml" || item.Rule != "public_content_generic_credential" {
continue
}
count++
if strings.Contains(item.Excerpt, privateKey) {
t.Fatalf("private key finding leaked value in excerpt %q", item.Excerpt)
}
}
if count != 4 {
t.Fatalf("private key assignment findings = %d, want 4: %#v", count, got)
}
}
func TestCollectDetectsCredentialValuesThatLookLikeBareIdentifiers(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "docs", "config.yaml"), "base: true\n")
runGit(t, repo, "add", "docs/config.yaml")
runGit(t, repo, "commit", "-m", "base")
writeFile(t, filepath.Join(repo, "docs", "config.yaml"), strings.Join([]string{
"API_KEY_OPENAI: prod_key",
"CLIENT_SECRET_GOOGLE: prod_secret",
"TOKEN_GITHUB: github_token",
"APP_PASSWORD_PROD: prod_password",
}, "\n")+"\n")
runGit(t, repo, "add", "docs/config.yaml")
runGit(t, repo, "commit", "-m", "add credential config")
got := collectFromPreviousCommit(t, repo)
var count int
for _, item := range got {
if item.File == "docs/config.yaml" && item.Rule == "public_content_generic_credential" {
count++
}
}
if count != 4 {
t.Fatalf("bare identifier credential findings = %d, want 4: %#v", count, got)
}
}
func TestCollectAllowsBenignUnquotedTokenFields(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "docs", "config.yaml"), "base: true\n")
runGit(t, repo, "add", "docs/config.yaml")
runGit(t, repo, "commit", "-m", "base")
writeFile(t, filepath.Join(repo, "docs", "config.yaml"), strings.Join([]string{
"tokens: 128",
"token_type: bearer",
"max_tokens: 2000",
"completion_tokens: 200",
"prompt_tokens: 100",
}, "\n")+"\n")
runGit(t, repo, "add", "docs/config.yaml")
runGit(t, repo, "commit", "-m", "add benign token config")
got := collectFromPreviousCommit(t, repo)
for _, item := range got {
if item.File == "docs/config.yaml" && item.Rule == "public_content_generic_credential" {
t.Fatalf("benign unquoted token field should not be credential finding: %#v", got)
}
}
}
func TestCollectDetectsCredentialPhraseBeforeEnvironmentSuffix(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "docs", "config.yaml"), "base: true\n")
runGit(t, repo, "add", "docs/config.yaml")
runGit(t, repo, "commit", "-m", "base")
writeFile(t, filepath.Join(repo, "docs", "config.yaml"), strings.Join([]string{
"API_KEY_OPENAI: real-openai-key",
"TOKEN_GITHUB: real-github-token",
"CLIENT_SECRET_GOOGLE: real-google-secret",
"SECRET_KEY_BASE: real-secret-key-base",
"APP_PASSWORD_PROD: real-prod-password",
}, "\n")+"\n")
runGit(t, repo, "add", "docs/config.yaml")
runGit(t, repo, "commit", "-m", "add credential config")
got := collectFromPreviousCommit(t, repo)
var count int
for _, item := range got {
if item.File != "docs/config.yaml" || item.Rule != "public_content_generic_credential" {
continue
}
count++
for _, forbidden := range []string{
"real-openai-key",
"real-github-token",
"real-google-secret",
"real-secret-key-base",
"real-prod-password",
} {
if strings.Contains(item.Excerpt, forbidden) {
t.Fatalf("credential finding leaked value %q in excerpt %q", forbidden, item.Excerpt)
}
}
}
if count != 5 {
t.Fatalf("credential suffix variants findings = %d, want 5: %#v", count, got)
}
}
func TestCollectDetectsPrivateKeyWhenOnlyEndIsAdded(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "docs", "key.pem"), privateKeyBegin()+"legacy-body\n")
runGit(t, repo, "add", "docs/key.pem")
runGit(t, repo, "commit", "-m", "base")
writeFile(t, filepath.Join(repo, "docs", "key.pem"), privateKeyBegin()+"legacy-body\nnew-body\n"+privateKeyEnd())
runGit(t, repo, "add", "docs/key.pem")
runGit(t, repo, "commit", "-m", "complete key")
got := collectFromPreviousCommit(t, repo)
requireFinding(t, got, "docs/key.pem", "public_content_private_key_block")
}
func TestCollectDetectsPrivateKeyWhenOnlyBeginIsAdded(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "docs", "key.pem"), "legacy-body\n"+privateKeyEnd())
runGit(t, repo, "add", "docs/key.pem")
runGit(t, repo, "commit", "-m", "base")
writeFile(t, filepath.Join(repo, "docs", "key.pem"), privateKeyBegin()+"legacy-body\n"+privateKeyEnd())
runGit(t, repo, "add", "docs/key.pem")
runGit(t, repo, "commit", "-m", "complete key")
got := collectFromPreviousCommit(t, repo)
requireFinding(t, got, "docs/key.pem", "public_content_private_key_block")
}
func TestCollectDetectsPrivateKeyWhenOnlyBodyIsAdded(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "docs", "key.pem"), privateKeyBegin()+privateKeyEnd())
runGit(t, repo, "add", "docs/key.pem")
runGit(t, repo, "commit", "-m", "base")
writeFile(t, filepath.Join(repo, "docs", "key.pem"), privateKeyBegin()+"new-body\n"+privateKeyEnd())
runGit(t, repo, "add", "docs/key.pem")
runGit(t, repo, "commit", "-m", "add body")
got := collectFromPreviousCommit(t, repo)
requireFinding(t, got, "docs/key.pem", "public_content_private_key_block")
}
func TestCollectIgnoresUntouchedHistoricalPrivateKey(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "docs", "key.pem"), privateKeyBegin()+"legacy-body\n"+privateKeyEnd())
runGit(t, repo, "add", "docs/key.pem")
runGit(t, repo, "commit", "-m", "base")
writeFile(t, filepath.Join(repo, "docs", "key.pem"), privateKeyBegin()+"legacy-body\n"+privateKeyEnd())
writeFile(t, filepath.Join(repo, "docs", "public.md"), "public docs update\n")
runGit(t, repo, "add", "docs/public.md")
runGit(t, repo, "commit", "-m", "docs update")
got := collectFromPreviousCommit(t, repo)
for _, item := range got {
if item.File == "docs/key.pem" && item.Rule == "public_content_private_key_block" {
t.Fatalf("collector reported untouched historical private key: %#v", got)
}
}
}
func TestCollectIgnoresDeletedPrivateKeyLine(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "docs", "key.pem"), privateKeyBegin()+"legacy-body\n"+privateKeyEnd())
runGit(t, repo, "add", "docs/key.pem")
runGit(t, repo, "commit", "-m", "base")
writeFile(t, filepath.Join(repo, "docs", "key.pem"), privateKeyBegin()+privateKeyEnd())
runGit(t, repo, "add", "docs/key.pem")
runGit(t, repo, "commit", "-m", "remove body")
got := collectFromPreviousCommit(t, repo)
for _, item := range got {
if item.File == "docs/key.pem" && item.Rule == "public_content_private_key_block" {
t.Fatalf("collector reported delete-only private key cleanup: %#v", got)
}
}
}
func TestCollectSkipsOnlyKnownQualityGateFixtureFiles(t *testing.T) {
repo := t.TempDir()
runGit(t, repo, "init")
runGit(t, repo, "config", "user.email", "test@example.com")
runGit(t, repo, "config", "user.name", "Test User")
writeFile(t, filepath.Join(repo, "README.md"), "base\n")
runGit(t, repo, "add", "README.md")
runGit(t, repo, "commit", "-m", "base")
writeFile(t, filepath.Join(repo, "internal", "qualitygate", "publiccontent", "collect_test.go"), "SECRET_TOKEN=fixture\n")
writeFile(t, filepath.Join(repo, "internal", "qualitygate", "publiccontent", "scan_test.go"), "SECRET_TOKEN=fixture\n")
writeFile(t, filepath.Join(repo, "internal", "qualitygate", "publiccontent", "scan.go"), "const privateKeyFixture = \""+privateKeyBeginPrefix+privateKeyMarker+"\"\n")
writeFile(t, filepath.Join(repo, "internal", "qualitygate", "publiccontent", "rules.go"), "markers := []string{\"generated with automation\"}\n")
writeFile(t, filepath.Join(repo, "tests", "e2e", "new-public-workflow.test.sh"), "SECRET_TOKEN=real-leak\n")
runGit(t, repo, "add", ".")
runGit(t, repo, "commit", "-m", "add scanner fixtures")
metadataPath := filepath.Join(repo, "pr-metadata.json")
writeFile(t, metadataPath, `{}`)
got, err := Collect(context.Background(), Options{
Repo: repo,
ChangedFrom: "HEAD~1",
MetadataPath: metadataPath,
})
if err != nil {
t.Fatalf("Collect() error = %v", err)
}
var foundOrdinaryTestLeak bool
for _, item := range got {
switch item.File {
case "internal/qualitygate/publiccontent/collect_test.go",
"internal/qualitygate/publiccontent/scan.go",
"internal/qualitygate/publiccontent/scan_test.go",
"internal/qualitygate/publiccontent/rules.go":
t.Fatalf("collector scanned known fixture or detector implementation file: %#v", got)
}
if item.File == "tests/e2e/new-public-workflow.test.sh" && item.Rule == "public_content_generic_credential" {
foundOrdinaryTestLeak = true
}
}
if !foundOrdinaryTestLeak {
t.Fatalf("collector should still scan ordinary test files for real leaks: %#v", got)
}
}
func TestScanChangedFileDocumentsFixtureExclusions(t *testing.T) {
excluded := []string{
"internal/qualitygate/publiccontent/collect_test.go",
"internal/qualitygate/publiccontent/rules.go",
"internal/qualitygate/publiccontent/scan.go",
"internal/qualitygate/publiccontent/scan_test.go",
}
for _, file := range excluded {
if scanChangedFile(file) {
t.Fatalf("scanChangedFile(%q) = true, want false for detector fixture/implementation path", file)
}
}
included := []string{
"internal/qualitygate/publiccontent/new_test.go",
"tests/e2e/new-public-workflow.test.sh",
"docs/public.md",
}
for _, file := range included {
if !scanChangedFile(file) {
t.Fatalf("scanChangedFile(%q) = false, want true", file)
}
}
}
func TestCollectScansAddedLinesInSpecialPathNames(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "docs", "old.md"), "base\n")
runGit(t, repo, "add", ".")
runGit(t, repo, "commit", "-m", "base")
writeFile(t, filepath.Join(repo, "docs", "has space.md"), "SECRET_TOKEN=space-value\n")
writeFile(t, filepath.Join(repo, `weird"quote.md`), "SECRET_TOKEN=quote-value\n")
runGit(t, repo, "mv", "docs/old.md", "docs/new name.md")
writeFile(t, filepath.Join(repo, "docs", "new name.md"), "base\nSECRET_TOKEN=rename-value\n")
runGit(t, repo, "add", ".")
runGit(t, repo, "commit", "-m", "add special paths")
got := collectFromPreviousCommit(t, repo)
requireFinding(t, got, "docs/has space.md", "public_content_generic_credential")
requireFinding(t, got, `weird"quote.md`, "public_content_generic_credential")
requireFinding(t, got, "docs/new name.md", "public_content_generic_credential")
}
func TestCollectScansBranchNameAsWarning(t *testing.T) {
repo := t.TempDir()
metadataPath := filepath.Join(repo, "pr-metadata.json")
writeFile(t, metadataPath, `{"branch":"bot/public-doc-update"}`)
got, err := Collect(context.Background(), Options{
Repo: repo,
MetadataPath: metadataPath,
})
if err != nil {
t.Fatalf("Collect() error = %v", err)
}
if len(got) != 1 || got[0].Rule != "public_content_automation_branch" {
t.Fatalf("branch findings = %#v", got)
}
}
func TestCollectUsesExplicitBranchNameWhenDetached(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "README.md"), "base\n")
runGit(t, repo, "add", "README.md")
runGit(t, repo, "commit", "-m", "base")
runGit(t, repo, "checkout", "-b", "bot/public-doc-update")
writeFile(t, filepath.Join(repo, "docs.md"), "safe docs\n")
runGit(t, repo, "add", "docs.md")
runGit(t, repo, "commit", "-m", "docs")
head := strings.TrimSpace(string(runGitOutput(t, repo, "rev-parse", "HEAD")))
runGit(t, repo, "checkout", "--detach", head)
metadataPath := filepath.Join(repo, "pr-metadata.json")
writeFile(t, metadataPath, `{}`)
got, err := Collect(context.Background(), Options{
Repo: repo,
MetadataPath: metadataPath,
BranchName: "bot/public-doc-update",
})
if err != nil {
t.Fatalf("Collect() error = %v", err)
}
requireFinding(t, got, "branch", "public_content_automation_branch")
}
func TestCollectUsesBranchEnvironmentWhenDetached(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "README.md"), "base\n")
runGit(t, repo, "add", "README.md")
runGit(t, repo, "commit", "-m", "base")
runGit(t, repo, "checkout", "-b", "bot/public-env-update")
writeFile(t, filepath.Join(repo, "docs.md"), "safe docs\n")
runGit(t, repo, "add", "docs.md")
runGit(t, repo, "commit", "-m", "docs")
head := strings.TrimSpace(string(runGitOutput(t, repo, "rev-parse", "HEAD")))
runGit(t, repo, "checkout", "--detach", head)
t.Setenv("GITHUB_HEAD_REF", "bot/public-env-update")
metadataPath := filepath.Join(repo, "pr-metadata.json")
writeFile(t, metadataPath, `{}`)
got, err := Collect(context.Background(), Options{
Repo: repo,
MetadataPath: metadataPath,
})
if err != nil {
t.Fatalf("Collect() error = %v", err)
}
requireFinding(t, got, "branch", "public_content_automation_branch")
}
func TestCollectPreservesFindingAttributionForChangedLines(t *testing.T) {
repo := newGitRepo(t)
writeFile(t, filepath.Join(repo, "docs", "auth.md"), "intro\n")
runGit(t, repo, "add", "docs/auth.md")
runGit(t, repo, "commit", "-m", "base")
writeFile(t, filepath.Join(repo, "docs", "auth.md"), "intro\nAuthorization: Bearer abcdefghijklmnopqrstuvwxyz\n")
runGit(t, repo, "add", "docs/auth.md")
runGit(t, repo, "commit", "-m", "add auth docs")
got := collectFromPreviousCommit(t, repo)
for _, item := range got {
if item.Rule == "public_content_bearer_header" {
if item.File != "docs/auth.md" || item.Line != 2 || item.Source != "file" {
t.Fatalf("changed-line attribution = %#v", item)
}
return
}
}
t.Fatalf("missing bearer finding: %#v", got)
}
func TestAppendUniqueFindingsDeduplicatesByRuleFileLineAndSource(t *testing.T) {
base := []Finding{newFinding("public_content_private_key_block", "docs/key.pem", 1, "file", "private key block")}
got := appendUniqueFindings(base,
newFinding("public_content_private_key_block", "docs/key.pem", 1, "file", "private key block"),
newFinding("public_content_private_key_block", "docs/key.pem", 2, "file", "private key block"),
)
if len(got) != 2 {
t.Fatalf("appendUniqueFindings len = %d, want 2: %#v", len(got), got)
}
}
func newGitRepo(t *testing.T) string {
t.Helper()
repo := t.TempDir()
runGit(t, repo, "init")
runGit(t, repo, "config", "user.email", "test@example.com")
runGit(t, repo, "config", "user.name", "Test User")
return repo
}
func privateKeyBegin() string {
return privateKeyBeginPrefix + privateKeyMarker + "\n"
}
func privateKeyEnd() string {
return privateKeyEndPrefix + privateKeyMarker + "\n"
}
func collectFromPreviousCommit(t *testing.T, repo string) []Finding {
t.Helper()
metadataPath := filepath.Join(repo, "pr-metadata.json")
writeFile(t, metadataPath, `{}`)
got, err := Collect(context.Background(), Options{
Repo: repo,
ChangedFrom: "HEAD~1",
MetadataPath: metadataPath,
})
if err != nil {
t.Fatalf("Collect() error = %v", err)
}
return got
}
func requireFinding(t *testing.T, got []Finding, file, rule string) {
t.Helper()
for _, item := range got {
if item.File == file && item.Rule == rule {
return
}
}
t.Fatalf("missing %s in %s findings: %#v", rule, file, got)
}
func TestCollectRequiresValidMetadataJSON(t *testing.T) {
repo := t.TempDir()
metadataPath := filepath.Join(repo, "pr-metadata.json")
writeFile(t, metadataPath, `{"title":`)
_, err := Collect(context.Background(), Options{Repo: repo, MetadataPath: metadataPath})
if err == nil || !strings.Contains(err.Error(), "public content metadata") {
t.Fatalf("Collect() error = %v, want metadata parse error", err)
}
}
func runGit(t *testing.T, repo string, args ...string) {
t.Helper()
if len(args) > 0 && args[0] == "commit" {
args = append([]string{"commit", "--no-verify"}, args[1:]...)
}
cmd := exec.Command("git", args...)
cmd.Dir = repo
out, err := cmd.CombinedOutput()
if err != nil {
t.Fatalf("git %v failed: %v\n%s", args, err, out)
}
}
func runGitOutput(t *testing.T, repo string, args ...string) []byte {
t.Helper()
cmd := exec.Command("git", args...)
cmd.Dir = repo
out, err := cmd.CombinedOutput()
if err != nil {
t.Fatalf("git %v failed: %v\n%s", args, err, out)
}
return out
}
func writeFile(t *testing.T, path, data string) {
t.Helper()
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(path, []byte(data), 0o644); err != nil {
t.Fatal(err)
}
}

View File

@@ -0,0 +1,11 @@
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT
package publiccontent
func ScanComment(kind, body string) []Finding {
if kind == "" {
kind = "comment"
}
return scanText(kind, "comment", body, false)
}

View File

@@ -0,0 +1,19 @@
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT
package publiccontent
import "testing"
func TestScanCommentAuditsPublishedCommentBodies(t *testing.T) {
got := ScanComment("issue_comment", `The published comment included /tmp/harness`+`-agent/run and CCM`+`-Harness: stage-4`)
rules := findingRules(got)
if !rules["public_content_harness_metadata"] || !rules["public_content_ccm_harness_trailer"] {
t.Fatalf("comment audit findings = %#v", got)
}
for _, item := range got {
if item.File != "issue_comment" {
t.Fatalf("comment finding file = %q, want issue_comment", item.File)
}
}
}

View File

@@ -0,0 +1,45 @@
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT
package publiccontent
import (
"encoding/json"
"fmt"
"github.com/larksuite/cli/internal/vfs"
)
func LoadMetadata(path string) (Metadata, error) {
if path == "" {
return Metadata{}, nil
}
data, err := vfs.ReadFile(path)
if err != nil {
return Metadata{}, fmt.Errorf("public content metadata: %w", err)
}
if len(data) == 0 {
return Metadata{}, nil
}
var out Metadata
if err := json.Unmarshal(data, &out); err != nil {
return Metadata{}, fmt.Errorf("public content metadata: %w", err)
}
return out, nil
}
func scanMetadata(m Metadata) []Finding {
text := ""
if m.Title != "" {
text += "title: " + m.Title + "\n"
}
if m.Body != "" {
text += "body:\n" + m.Body + "\n"
}
if text == "" {
return nil
}
out := scanText("pull_request_metadata", "metadata", text, false)
out = append(out, semanticCandidate("pull_request_metadata", "metadata", text, 1)...)
return out
}

View File

@@ -0,0 +1,22 @@
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT
package publiccontent
import (
"path/filepath"
"testing"
)
func TestLoadMetadataReadsTitleAndBody(t *testing.T) {
path := filepath.Join(t.TempDir(), "metadata.json")
writeFile(t, path, `{"title":"public change","body":"pass`+`word = \"example-password\""}`)
got, err := LoadMetadata(path)
if err != nil {
t.Fatalf("LoadMetadata() error = %v", err)
}
if got.Title != "public change" || got.Body == "" {
t.Fatalf("metadata = %#v", got)
}
}

View File

@@ -0,0 +1,421 @@
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT
package publiccontent
import (
"net/url"
"path/filepath"
"regexp"
"strings"
"github.com/larksuite/cli/internal/qualitygate/report"
)
var (
credentialAssignmentRE = regexp.MustCompile(`(?i)["']?\b[A-Za-z0-9_-]*(?:api[_-]?key|access[_-]?key|private[_-]?key|secret|password|passwd|token|webhook|access[_-]?token|client[_-]?secret)[A-Za-z0-9_-]*\b["']?\s*[:=]\s*(?:"((?:\\.|[^"\\])*)"|'((?:\\.|[^'\\])*)'|(\$\([^)]*\))|(\$\{\{[^}]+\}\})|([^"'\s,}\]]+))`)
jwtLikeRE = regexp.MustCompile(`\b[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b`)
credentialURLRE = regexp.MustCompile(`(?i)\b[a-z][a-z0-9+.-]*://[^/\s:@]*:[^@\s/]+@[^)\s]+`)
bearerHeaderRE = regexp.MustCompile(`(?i)(?:\bAuthorization\s*:\s*Bearer\s+|["']Authorization["']\s*:\s*["']Bearer\s+)[A-Za-z0-9._+/=-]{12,}`)
semanticBearerHeaderRE = regexp.MustCompile(`(?i)(?:\bAuthorization\s*:\s*Bearer\s+[^"'\s,}\]]+|["']Authorization["']\s*:\s*["']Bearer\s+[^"'\\\s,}\]]+)`)
changeIDTrailerRE = regexp.MustCompile(`(?i)^\s*Change-Id:\s*\S+`)
reviewedOnTrailerRE = regexp.MustCompile(`(?i)^\s*Reviewed-on:\s*\S+`)
ccmHarnessTrailerRE = regexp.MustCompile(`(?i)\bCCM-Harness:\s*\S+`)
privateIPv4RE = regexp.MustCompile(`\b(?:10\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}|192\.168\.[0-9]{1,3}\.[0-9]{1,3}|172\.(?:1[6-9]|2[0-9]|3[0-1])\.[0-9]{1,3}\.[0-9]{1,3})\b`)
automationBranchRE = regexp.MustCompile(`(?i)(^|/)(bot|automation)[-/]`)
)
func actionForRule(rule string) report.Action {
switch rule {
case "public_content_generic_credential",
"public_content_private_key_block",
"public_content_jwt_like_token",
"public_content_bearer_header",
"public_content_credential_url",
"public_content_change_id_trailer",
"public_content_reviewed_on_trailer",
"public_content_provenance_marker",
"public_content_detector_fingerprint",
"public_content_harness_metadata",
"public_content_ccm_harness_trailer":
return report.ActionReject
case "public_content_private_ipv4",
"public_content_automation_branch":
return report.ActionWarning
default:
return report.ActionWarning
}
}
func isPlaceholderValue(value string) bool {
trimmed := strings.Trim(value, `"'`)
normalized := strings.ToLower(trimmed)
if normalized == "" ||
normalized == "=" ||
percentWrappedPlaceholder(normalized) ||
angleWrappedPlaceholder(normalized) ||
urlWithAnglePlaceholder(normalized) ||
isCredentialReferenceValue(trimmed) {
return true
}
return namedPlaceholderValue(normalized)
}
func namedPlaceholderValue(value string) bool {
switch value {
case "placeholder", "redacted", "<redacted>", "xxxx":
return true
}
return strings.Contains(value, "cli_example") || allXPlaceholder(value)
}
func allXPlaceholder(value string) bool {
if len(value) < 4 {
return false
}
for _, r := range value {
if r != 'x' {
return false
}
}
return true
}
func urlWithAnglePlaceholder(value string) bool {
if !strings.Contains(value, "://") ||
!strings.Contains(value, "<") ||
!strings.Contains(value, ">") {
return false
}
return !urlRemainderLooksCredentialLike(removeAnglePlaceholders(value))
}
func removeAnglePlaceholders(value string) string {
var out strings.Builder
for len(value) > 0 {
start := strings.Index(value, "<")
if start < 0 {
out.WriteString(value)
break
}
out.WriteString(value[:start])
end := strings.Index(value[start+1:], ">")
if end < 0 {
out.WriteString(value[start:])
break
}
value = value[start+end+2:]
}
return out.String()
}
func urlRemainderLooksCredentialLike(value string) bool {
normalized := strings.ToLower(value)
for _, marker := range []string{
"secret",
"token",
"password",
"passwd",
"api_key",
"apikey",
"private_key",
"privatekey",
"client_secret",
"clientsecret",
} {
if strings.Contains(normalized, marker) {
return true
}
}
for _, part := range strings.FieldsFunc(normalized, func(r rune) bool {
return !((r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '_' || r == '-')
}) {
if credentialShapedIdentifier(part) || longCredentialSegment(part) {
return true
}
}
return false
}
func longCredentialSegment(value string) bool {
if len(value) < 16 {
return false
}
var hasLetter, hasDigit bool
for _, r := range value {
switch {
case r >= 'a' && r <= 'z':
hasLetter = true
case r >= '0' && r <= '9':
hasDigit = true
case r == '_' || r == '-':
default:
return false
}
}
return hasLetter || hasDigit
}
func isCredentialReferenceValue(value string) bool {
normalized := strings.ToLower(value)
switch {
case strings.HasPrefix(normalized, "${{"):
return githubExpressionReference(normalized)
case strings.HasPrefix(normalized, "$("):
return !commandSubstitutionLooksCredentialLike(normalized)
case strings.HasPrefix(normalized, "process.env."):
return credentialReferenceIdentifier(strings.TrimPrefix(normalized, "process.env."))
case strings.HasPrefix(normalized, "${"):
return credentialReferenceIdentifier(strings.TrimSuffix(strings.TrimPrefix(normalized, "${"), "}"))
case strings.HasPrefix(value, "$"):
return credentialReferenceIdentifier(strings.TrimPrefix(normalized, "$"))
default:
return false
}
}
func commandSubstitutionLooksCredentialLike(value string) bool {
if !strings.HasPrefix(value, "$(") || !strings.HasSuffix(value, ")") {
return false
}
inner := strings.TrimSuffix(strings.TrimPrefix(value, "$("), ")")
for _, part := range strings.FieldsFunc(inner, func(r rune) bool {
return !((r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '_' || r == '-')
}) {
if credentialShapedIdentifier(part) || longCredentialSegment(part) {
return true
}
}
return false
}
func githubExpressionReference(value string) bool {
if !strings.HasPrefix(value, "${{") || !strings.HasSuffix(value, "}}") {
return false
}
expr := strings.TrimSpace(strings.TrimSuffix(strings.TrimPrefix(value, "${{"), "}}"))
switch {
case strings.HasPrefix(expr, "secrets."):
return dottedReferenceIdentifier(strings.TrimPrefix(expr, "secrets."))
case strings.HasPrefix(expr, "env."):
return dottedReferenceIdentifier(strings.TrimPrefix(expr, "env."))
case strings.HasPrefix(expr, "vars."):
return dottedReferenceIdentifier(strings.TrimPrefix(expr, "vars."))
case expr == "github.token":
return true
default:
return false
}
}
func dottedReferenceIdentifier(value string) bool {
if value == "" {
return false
}
for _, part := range strings.Split(value, ".") {
if !referenceIdentifier(part) {
return false
}
}
return true
}
func credentialReferenceIdentifier(value string) bool {
return referenceIdentifier(value) && !credentialShapedIdentifier(value)
}
func referenceIdentifier(value string) bool {
if value == "" {
return false
}
for i, r := range value {
switch {
case r >= 'a' && r <= 'z':
case r >= '0' && r <= '9' && i > 0:
case r == '_' && i > 0:
default:
return false
}
}
return true
}
func angleWrappedPlaceholder(value string) bool {
if len(value) < 3 || !strings.HasPrefix(value, "<") || !strings.HasSuffix(value, ">") {
return false
}
return anglePlaceholderIdentifier(strings.Trim(value, "<>"))
}
func percentWrappedPlaceholder(value string) bool {
if len(value) < 3 || !strings.HasPrefix(value, "%") || !strings.HasSuffix(value, "%") {
return false
}
inner := strings.Trim(value, "%")
return delimitedPlaceholderIdentifier(inner) && !credentialShapedIdentifier(inner)
}
func delimitedPlaceholderIdentifier(value string) bool {
if value == "" {
return false
}
for _, r := range value {
if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '_' || r == '-' {
continue
}
return false
}
return true
}
func anglePlaceholderIdentifier(value string) bool {
if value == "" {
return false
}
for _, r := range value {
if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '_' || r == '-' {
continue
}
return false
}
if credentialShapedIdentifier(value) {
return false
}
switch value {
case "token",
"id",
"key",
"secret",
"password",
"api-key",
"client-secret",
"access-token",
"refresh-token",
"auth-token",
"bearer-token",
"session-token",
"service-token":
return true
}
for _, suffix := range []string{"_token", "_id", "_key", "_secret", "_password"} {
if strings.HasSuffix(value, suffix) {
return true
}
}
for _, suffix := range []string{"-token", "-id", "-key", "-secret", "-password"} {
if strings.HasSuffix(value, suffix) {
return true
}
}
return false
}
func credentialShapedValue(value string) bool {
normalized := strings.ToLower(strings.Trim(value, `"'<>`))
return credentialShapedIdentifier(normalized)
}
func credentialShapedIdentifier(value string) bool {
switch {
case strings.HasPrefix(value, "sk_live_"),
strings.HasPrefix(value, "sk_test_"),
strings.HasPrefix(value, "ghp_"),
strings.HasPrefix(value, "gho_"),
strings.HasPrefix(value, "ghu_"),
strings.HasPrefix(value, "github_pat_"),
strings.HasPrefix(value, "xoxb_"),
strings.HasPrefix(value, "xoxp_"),
strings.HasPrefix(value, "xoxa_"):
return true
case strings.HasPrefix(value, "real-") &&
(strings.Contains(value, "secret") ||
strings.Contains(value, "token") ||
strings.Contains(value, "key") ||
strings.Contains(value, "password")):
return true
default:
return false
}
}
func resourceTokenPlaceholderValue(value string) bool {
normalized := strings.ToLower(strings.Trim(value, `"'`))
switch normalized {
case "wiki_token",
"folder_token",
"obj_token",
"spreadsheet_token",
"file_token",
"doc_token",
"node_token",
"parent_node_token",
"origin_node_token",
"drive_route_token":
return true
default:
return false
}
}
func provenanceMarker(line string) bool {
normalized := strings.ToLower(line)
markers := []string{
"generat" + "ed by tool",
"creat" + "ed by tool",
"generat" + "ed by automation",
"creat" + "ed by automation",
"machine-" + "generated",
"generated with automated",
"generated with automation",
"🤖 generated",
}
for _, marker := range markers {
if strings.Contains(normalized, marker) {
return true
}
}
if strings.HasPrefix(normalized, "co-authored-by:") &&
(strings.Contains(normalized, "<bot@") ||
strings.Contains(normalized, " bot@") ||
strings.Contains(normalized, "[bot]") ||
strings.Contains(normalized, "automation") ||
strings.Contains(normalized, "automated-code-assistant")) {
return true
}
return false
}
// Detector fingerprint checks are intentionally scoped to public rule/config
// files. They do not try to hide this package's implementation; they prevent
// publishing reusable detector identifiers in external-facing rule bundles.
func isDetectorRuleFile(path string) bool {
normalized := filepath.ToSlash(path)
base := filepath.Base(normalized)
return base == ".gitleaks.toml" ||
strings.Contains(normalized, "public-rules/") ||
strings.Contains(normalized, "public_rules/")
}
func detectorFingerprint(line string) bool {
normalized := strings.ToLower(line)
fingerprints := []string{
strings.Join([]string{"public", "content", "leakage"}, "-"),
strings.Join([]string{"public", "content", "detector"}, "-"),
"publiccontent",
}
for _, fingerprint := range fingerprints {
if strings.Contains(normalized, fingerprint) {
return true
}
}
return false
}
func redactCredentialURL(raw string) string {
u, err := url.Parse(raw)
if err != nil || u.User == nil {
return "<credential-url>"
}
u.User = url.UserPassword("<user>", "<redacted>")
return u.String()
}

View File

@@ -0,0 +1,656 @@
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT
package publiccontent
import (
"fmt"
"path/filepath"
"sort"
"strings"
"unicode"
)
const (
privateKeyBeginPrefix = "-----" + "BEGIN "
privateKeyEndPrefix = "-----" + "END "
privateKeyMarker = "PRIVATE " + "KEY-----"
)
func ScanFile(path string, data []byte) []Finding {
return scanText(filepath.ToSlash(path), "file", string(data), isDetectorRuleFile(path))
}
func semanticCandidate(file, source, text string, line int) []Finding {
excerpt := redactedSemanticExcerpt(text)
if excerpt == "" {
return nil
}
return []Finding{newFinding("public_content_semantic_candidate", file, line, source, excerpt)}
}
func scanText(file, source, text string, detectorFile bool) []Finding {
var out []Finding
lines := strings.Split(text, "\n")
inPrivateKey := false
privateKeyLine := 0
for i, line := range lines {
lineNo := i + 1
if strings.Contains(line, privateKeyBeginPrefix) && strings.Contains(line, privateKeyMarker) {
inPrivateKey = true
privateKeyLine = lineNo
}
if inPrivateKey && strings.Contains(line, privateKeyEndPrefix) && strings.Contains(line, privateKeyMarker) {
out = append(out, newFinding("public_content_private_key_block", file, privateKeyLine, source, "private key block"))
inPrivateKey = false
}
for _, match := range credentialAssignmentRE.FindAllStringSubmatch(line, -1) {
if !isCredentialAssignmentMatch(match[0]) {
continue
}
value := credentialAssignmentValue(match)
keyName, _ := normalizedCredentialAssignmentKey(match[0])
if value == "" ||
isNonSecretLiteralValue(value) ||
isPlaceholderValue(value) ||
isResourceTokenPlaceholderAssignment(keyName, value) {
continue
}
if looksLikeEqualityComparison(value) {
continue
}
out = append(out, newFinding("public_content_generic_credential", file, lineNo, source, redactAssignment(match[0])))
}
for _, match := range jwtLikeRE.FindAllString(line, -1) {
out = append(out, newFinding("public_content_jwt_like_token", file, lineNo, source, redactToken(match)))
}
for range bearerHeaderRE.FindAllString(line, -1) {
out = append(out, newFinding("public_content_bearer_header", file, lineNo, source, "Authorization: Bearer <redacted>"))
}
for _, match := range credentialURLRE.FindAllString(line, -1) {
if isPlaceholderCredentialURL(match) {
continue
}
out = append(out, newFinding("public_content_credential_url", file, lineNo, source, redactCredentialURL(match)))
}
for _, match := range privateIPv4RE.FindAllString(line, -1) {
out = append(out, newFinding("public_content_private_ipv4", file, lineNo, source, match))
}
if source == "branch" && automationBranchRE.MatchString(line) {
out = append(out, newFinding("public_content_automation_branch", file, lineNo, source, "automation branch marker"))
}
switch {
case changeIDTrailerRE.MatchString(line):
out = append(out, newFinding("public_content_change_id_trailer", file, lineNo, source, "Change-Id: <redacted>"))
case reviewedOnTrailerRE.MatchString(line):
out = append(out, newFinding("public_content_reviewed_on_trailer", file, lineNo, source, "Reviewed-on: <redacted>"))
case ccmHarnessTrailerRE.MatchString(line):
out = append(out, newFinding("public_content_ccm_harness_trailer", file, lineNo, source, "CCM-Harness: <redacted>"))
}
if provenanceMarker(line) {
out = append(out, newFinding("public_content_provenance_marker", file, lineNo, source, "provenance marker"))
}
if strings.Contains(line, "/tmp/harness-agent") {
out = append(out, newFinding("public_content_harness_metadata", file, lineNo, source, "/tmp/harness-agent"))
}
if detectorFile && detectorFingerprint(line) {
out = append(out, newFinding("public_content_detector_fingerprint", file, lineNo, source, "public detector fingerprint"))
}
}
sort.SliceStable(out, func(i, j int) bool {
if out[i].File != out[j].File {
return out[i].File < out[j].File
}
if out[i].Line != out[j].Line {
return out[i].Line < out[j].Line
}
return out[i].Rule < out[j].Rule
})
return out
}
func isCredentialAssignmentMatch(match string) bool {
name, value, ok := normalizedCredentialAssignment(match)
if !ok {
return false
}
if isWebhookCredentialKey(name) && webhookAssignmentValueLooksCredentialLike(value) {
return true
}
if isBenignTokenField(name) && !credentialShapedValue(value) {
return false
}
return isExplicitCredentialKey(name)
}
func normalizedCredentialAssignmentKey(match string) (string, bool) {
key, _, ok := normalizedCredentialAssignment(match)
return key, ok
}
func normalizedCredentialAssignment(match string) (string, string, bool) {
key, ok := credentialAssignmentKey(match)
if !ok {
return "", "", false
}
key = strings.TrimSpace(key)
if key == "" {
return "", "", false
}
submatches := credentialAssignmentRE.FindStringSubmatch(match)
return normalizedCredentialKey(strings.Trim(key, `"'`)), credentialAssignmentValue(submatches), true
}
func normalizedCredentialKey(key string) string {
key = strings.TrimSpace(key)
var out []rune
var prev rune
for i, r := range key {
if r == '-' {
r = '_'
}
if i > 0 && isCredentialKeyBoundary(prev, r) {
out = append(out, '_')
}
out = append(out, unicode.ToLower(r))
prev = r
}
key = string(out)
key = strings.ReplaceAll(key, "-", "_")
return key
}
func isCredentialKeyBoundary(prev, current rune) bool {
if prev == '_' || current == '_' {
return false
}
return (unicode.IsLower(prev) || unicode.IsDigit(prev)) && unicode.IsUpper(current)
}
func isBenignTokenField(key string) bool {
if isTokenMetricField(key) ||
isTokenMetadataField(key) ||
isResourceTokenField(key) ||
isPaginationOrSyncTokenField(key) {
return true
}
return false
}
func isTokenMetricField(key string) bool {
switch key {
case "tokenizer",
"token_count",
"tokens",
"max_tokens",
"completion_tokens",
"prompt_tokens":
return true
default:
return false
}
}
func isTokenMetadataField(key string) bool {
switch key {
case "access_token_expires_in",
"refresh_token_expires_in",
"token_expires_in",
"token_status",
"token_type",
"token_url",
"token_endpoint",
"token_format",
"secret_name":
return true
default:
return false
}
}
func isPaginationOrSyncTokenField(key string) bool {
switch key {
case "page_token",
"next_page_token",
"sync_token":
return true
default:
return false
}
}
func isResourceTokenField(key string) bool {
if !strings.HasSuffix(key, "_token") {
return false
}
prefix := strings.TrimSuffix(key, "_token")
switch prefix {
case "app",
"base",
"board",
"doc",
"drive_route",
"file",
"folder",
"host_node",
"minute",
"node",
"obj",
"origin_node",
"parent",
"parent_file",
"parent_node",
"share",
"spreadsheet",
"target",
"wiki":
return true
default:
return false
}
}
func isResourceTokenPlaceholderAssignment(key, value string) bool {
return key == "token" && resourceTokenPlaceholderValue(value)
}
func isNonSecretLiteralValue(value string) bool {
switch strings.ToLower(strings.TrimSpace(strings.Trim(value, `"'`))) {
case "true", "false", "null", "nil":
return true
default:
return false
}
}
func isWebhookCredentialKey(key string) bool {
return strings.Contains(strings.ReplaceAll(key, "_", ""), "webhook")
}
func webhookAssignmentValueLooksCredentialLike(value string) bool {
normalized := strings.ToLower(strings.Trim(value, `"'`))
if normalized == "" || isPlaceholderValue(normalized) || isNonSecretLiteralValue(normalized) {
return false
}
return urlRemainderLooksCredentialLike(removeAnglePlaceholders(normalized)) ||
credentialShapedIdentifier(strings.Trim(normalized, "$"))
}
func isExplicitCredentialKey(key string) bool {
compact := strings.ReplaceAll(key, "_", "")
switch compact {
case "token",
"accesstoken",
"refreshtoken",
"authtoken",
"bearertoken",
"sessiontoken",
"servicetoken",
"apikey",
"accesskey",
"privatekey",
"apisecret",
"secret",
"secretkey",
"clientsecret",
"password",
"passwd":
return true
}
for _, phrase := range []string{
"accesstoken",
"refreshtoken",
"authtoken",
"bearertoken",
"sessiontoken",
"servicetoken",
"bottoken",
"apikey",
"accesskey",
"privatekey",
"apisecret",
"clientsecret",
"secretkey",
} {
if strings.Contains(compact, phrase) {
return true
}
}
parts := credentialKeyParts(key)
for _, phrase := range [][2]string{
{"access", "token"},
{"refresh", "token"},
{"auth", "token"},
{"bearer", "token"},
{"session", "token"},
{"service", "token"},
{"bot", "token"},
{"api", "key"},
{"access", "key"},
{"private", "key"},
{"api", "secret"},
{"client", "secret"},
{"secret", "key"},
} {
if hasAdjacentCredentialParts(parts, phrase[0], phrase[1]) {
return true
}
}
for _, part := range parts {
switch part {
case "token", "secret", "password", "passwd":
return true
}
}
for _, suffix := range []string{
"token",
"accesstoken",
"refreshtoken",
"authtoken",
"bearertoken",
"sessiontoken",
"servicetoken",
"bottoken",
"apikey",
"accesskey",
"privatekey",
"apisecret",
"clientsecret",
"secret",
"secretkey",
"password",
"passwd",
} {
if strings.HasSuffix(compact, suffix) {
return true
}
}
for _, suffix := range []string{
"_access_token",
"_refresh_token",
"_auth_token",
"_bearer_token",
"_session_token",
"_service_token",
"_api_key",
"_access_key",
"_private_key",
"_api_secret",
"_client_secret",
"_secret",
"_secret_key",
"_password",
"_passwd",
} {
if strings.HasSuffix(key, suffix) {
return true
}
}
return false
}
func credentialKeyParts(key string) []string {
var parts []string
for _, part := range strings.Split(key, "_") {
if part != "" {
parts = append(parts, part)
}
}
return parts
}
func hasAdjacentCredentialParts(parts []string, first, second string) bool {
for i := 0; i+1 < len(parts); i++ {
if parts[i] == first && parts[i+1] == second {
return true
}
}
return false
}
func credentialAssignmentValue(match []string) string {
for _, value := range match[1:] {
if value != "" {
return value
}
}
return ""
}
func looksLikeEqualityComparison(value string) bool {
return strings.HasPrefix(strings.TrimSpace(value), "=")
}
func isPlaceholderCredentialURL(raw string) bool {
userInfo, ok := credentialURLUserInfo(raw)
if !ok {
return false
}
_, password, ok := strings.Cut(userInfo, ":")
if !ok {
return false
}
return credentialURLPasswordPlaceholder(password)
}
func credentialURLPasswordPlaceholder(password string) bool {
normalized := strings.ToLower(password)
decoded := strings.ReplaceAll(normalized, "%3c", "<")
decoded = strings.ReplaceAll(decoded, "%3e", ">")
switch decoded {
case "placeholder", "redacted", "<redacted>", "xxxx":
return true
}
return angleWrappedPlaceholder(decoded) || percentWrappedPlaceholder(decoded)
}
func credentialURLUserInfo(raw string) (string, bool) {
schemeIdx := strings.Index(raw, "://")
if schemeIdx < 0 {
return "", false
}
rest := raw[schemeIdx+len("://"):]
atIdx := strings.Index(rest, "@")
if atIdx < 0 {
return "", false
}
return rest[:atIdx], true
}
func newFinding(rule, file string, line int, source, excerpt string) Finding {
return Finding{
Rule: rule,
Action: actionForRule(rule),
File: file,
Line: line,
Source: source,
Excerpt: excerpt,
Message: messageForRule(rule),
Suggestion: suggestionForRule(rule),
}
}
func messageForRule(rule string) string {
switch rule {
case "public_content_generic_credential":
return "public contribution contains a generic credential assignment"
case "public_content_private_key_block":
return "public contribution contains a private key block"
case "public_content_jwt_like_token":
return "public contribution contains a JWT-like token"
case "public_content_bearer_header":
return "public contribution contains an Authorization bearer token"
case "public_content_credential_url":
return "public contribution contains credentials embedded in a URL"
case "public_content_private_ipv4":
return "public contribution contains a private-network IP address"
case "public_content_automation_branch":
return "public contribution uses an automation-shaped branch name"
case "public_content_change_id_trailer":
return "public contribution contains a Change-Id trailer"
case "public_content_reviewed_on_trailer":
return "public contribution contains a Reviewed-on trailer"
case "public_content_provenance_marker":
return "public contribution contains a prohibited provenance marker"
case "public_content_detector_fingerprint":
return "public rule/config content exposes public detector fingerprints"
case "public_content_harness_metadata":
return "public contribution contains visible harness pipeline metadata"
case "public_content_ccm_harness_trailer":
return "public contribution contains a CCM-Harness trailer"
case "public_content_semantic_candidate":
return "public contribution contains text for semantic public content review"
default:
return "public contribution contains content that should not be published"
}
}
func suggestionForRule(rule string) string {
switch actionForRule(rule) {
case "REJECT":
return "remove the value from the public contribution and replace it with a non-sensitive placeholder"
default:
return "remove private workflow metadata before publishing the public contribution"
}
}
func redactAssignment(match string) string {
key, ok := credentialAssignmentKey(match)
if !ok {
return "<credential-assignment>"
}
return fmt.Sprintf("%s= <redacted>", strings.TrimSpace(key))
}
func credentialAssignmentKey(match string) (string, bool) {
idx := -1
for _, sep := range []string{":", "="} {
if candidate := strings.Index(match, sep); candidate >= 0 && (idx < 0 || candidate < idx) {
idx = candidate
}
}
if idx < 0 {
return "", false
}
return match[:idx], true
}
func redactToken(_ string) string {
return "<jwt-like-token>"
}
func redactedSemanticExcerpt(text string) string {
normalized := strings.Join(strings.Fields(text), " ")
if normalized == "" {
return ""
}
signals := semanticSignals(normalized)
if len(signals) == 0 {
return ""
}
sanitized := truncateRunes(sanitizeSemanticExcerpt(text), 600)
return fmt.Sprintf("semantic signals: %s; excerpt: %q", strings.Join(signals, ","), sanitized)
}
func semanticSignals(normalized string) []string {
lower := strings.ToLower(normalized)
var signals []string
add := func(signal string) {
for _, existing := range signals {
if existing == signal {
return
}
}
signals = append(signals, signal)
}
hasPrivateScope := strings.Contains(lower, "private") || strings.Contains(lower, "internal-only")
hasRequestMetadata := strings.Contains(lower, "request header") || strings.Contains(lower, "request headers") || strings.Contains(lower, "authorization header") || strings.Contains(lower, "metadata header")
hasTrustBoundary := strings.Contains(lower, "spoof") || strings.Contains(lower, "trust") || strings.Contains(lower, "risk scoring") || strings.Contains(lower, "classification")
hasRoadmap := strings.Contains(lower, "roadmap") || strings.Contains(lower, "migration") || strings.Contains(lower, "rollout") || strings.Contains(lower, "cutover") || strings.Contains(lower, "unpublished")
hasTiming := strings.Contains(lower, "target date") || strings.Contains(lower, "friday") || strings.Contains(lower, "monday") || strings.Contains(lower, "tuesday") || strings.Contains(lower, "wednesday") || strings.Contains(lower, "thursday") || strings.Contains(lower, "customer-visible")
hasImplementation := strings.Contains(lower, "server-side") || strings.Contains(lower, "implementation")
if hasPrivateScope && hasRequestMetadata && hasTrustBoundary {
add("private_scope")
add("request_metadata")
add("trust_boundary_detail")
}
if hasRoadmap && (hasPrivateScope || hasTiming) {
add("roadmap_detail")
if hasPrivateScope {
add("private_scope")
}
if hasTiming {
add("roadmap_timing")
}
}
if hasPrivateScope && hasImplementation && hasTrustBoundary {
add("private_scope")
add("implementation_detail")
add("trust_boundary_detail")
}
return signals
}
func sanitizeSemanticExcerpt(text string) string {
text = redactPrivateKeyBlocks(text)
text = credentialAssignmentRE.ReplaceAllStringFunc(text, sanitizeCredentialAssignment)
text = strings.ReplaceAll(text, `<redacted>"`, `<redacted>`)
text = strings.ReplaceAll(text, `<redacted>'`, `<redacted>`)
text = semanticBearerHeaderRE.ReplaceAllString(text, "Authorization: Bearer <redacted>")
text = jwtLikeRE.ReplaceAllString(text, "<jwt-like-token>")
text = credentialURLRE.ReplaceAllStringFunc(text, sanitizeCredentialURL)
return strings.Join(strings.Fields(text), " ")
}
func redactPrivateKeyBlocks(text string) string {
lines := strings.Split(text, "\n")
var out []string
inPrivateKey := false
for _, line := range lines {
if strings.Contains(line, privateKeyBeginPrefix) && strings.Contains(line, privateKeyMarker) {
out = append(out, "<private-key-block>")
inPrivateKey = true
if strings.Contains(line, privateKeyEndPrefix) && strings.Contains(line, privateKeyMarker) {
inPrivateKey = false
}
continue
}
if inPrivateKey {
if strings.Contains(line, privateKeyEndPrefix) && strings.Contains(line, privateKeyMarker) {
inPrivateKey = false
}
continue
}
out = append(out, line)
}
return strings.Join(out, "\n")
}
func sanitizeCredentialAssignment(match string) string {
key, ok := credentialAssignmentKey(match)
if !ok {
return "<credential-assignment>"
}
return strings.TrimSpace(key) + "=<redacted>"
}
func sanitizeCredentialURL(raw string) string {
redacted := redactCredentialURL(raw)
redacted = strings.ReplaceAll(redacted, "%3Cuser%3E", "<user>")
redacted = strings.ReplaceAll(redacted, "%3Credacted%3E", "<redacted>")
return redacted
}
func truncateRunes(text string, limit int) string {
if limit <= 0 {
return ""
}
runes := []rune(text)
if len(runes) <= limit {
return text
}
return string(runes[:limit]) + "..."
}

View File

@@ -0,0 +1,915 @@
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT
package publiccontent
import (
"strings"
"testing"
)
func TestScanFileDetectsPublicLeakSignalsInPRDocs(t *testing.T) {
text := `# Pull Request
The public README accidentally contains realistic leak-shaped content:
` + "```bash\n" + `export SERVICE_` + `PASSWORD="example-password"
curl https://user:pass@` + `example.com/repo.git
` + "```\n" + `
` + privateKeyBeginPrefix + privateKeyMarker + `
example-key-body
` + privateKeyEndPrefix + privateKeyMarker + `
session_token: "` + jwtFixture("ZXhhbXBsZQ") + `"
Change` + `-Id: I0123456789abcdef0123456789abcdef01234567
Reviewed` + `-on: https://review.example.test/c/project/+/123
` + "Generated by " + "auto" + "mation" + `
/tmp/harness` + `-agent/work
CCM` + `-Harness: stage-17
`
got := ScanFile("docs/public-pr.md", []byte(text))
rules := findingRules(got)
for _, want := range []string{
"public_content_generic_credential",
"public_content_private_key_block",
"public_content_jwt_like_token",
"public_content_credential_url",
"public_content_change_id_trailer",
"public_content_reviewed_on_trailer",
"public_content_provenance_marker",
"public_content_harness_metadata",
"public_content_ccm_harness_trailer",
} {
if !rules[want] {
t.Fatalf("missing rule %s in findings %#v", want, got)
}
}
}
func TestScanFileWarnsForPrivateIPv4Examples(t *testing.T) {
got := ScanFile("docs/network.md", []byte("Local lab address: 192.168."+"0.10\n"))
rules := findingRules(got)
if !rules["public_content_private_ipv4"] {
t.Fatalf("missing private IPv4 warning, got %#v", got)
}
for _, item := range got {
if item.Rule == "public_content_private_ipv4" && string(item.Action) != "WARNING" {
t.Fatalf("private IPv4 action = %s, want WARNING", item.Action)
}
}
}
func TestSemanticCandidateRequiresSpecificRiskSignals(t *testing.T) {
benign := semanticCandidate("docs/network.md", "file", "For a local lab, use RFC1918 example host 192.168."+"0.10 only.", 1)
if len(benign) != 0 {
t.Fatalf("benign RFC1918 documentation should not produce semantic candidates: %#v", benign)
}
risky := semanticCandidate("docs/roadmap.md", "file", "private launch plan for internal migration rollout on Friday", 1)
if len(risky) != 1 {
t.Fatalf("risky semantic text should produce one semantic candidate, got %#v", risky)
}
if !strings.Contains(risky[0].Excerpt, "private_scope") || !strings.Contains(risky[0].Excerpt, "roadmap_detail") {
t.Fatalf("semantic candidate should retain redacted risk signals, got %#v", risky[0])
}
if !strings.Contains(risky[0].Excerpt, "private launch plan") {
t.Fatalf("semantic candidate should include sanitized review text, got %#v", risky[0])
}
}
func TestSemanticCandidateIgnoresBroadBenignSignals(t *testing.T) {
cases := []string{
"internal package refactor",
"internal request handling docs",
"request header behavior",
"implementation detail cleanup",
}
for _, tc := range cases {
if got := semanticCandidate("docs/public.md", "file", tc, 1); len(got) != 0 {
t.Fatalf("semanticCandidate(%q) = %#v, want none", tc, got)
}
}
}
func TestSemanticCandidateKeepsHighRiskCombinations(t *testing.T) {
cases := []string{
"private request header controls trust classification and spoof-prevention behavior",
"unpublished migration rollout has target date next Tuesday",
"private roadmap cutover exposes customer-visible timing",
}
for _, tc := range cases {
if got := semanticCandidate("docs/public.md", "file", tc, 1); len(got) != 1 {
t.Fatalf("semanticCandidate(%q) len = %d, want 1: %#v", tc, len(got), got)
}
}
}
func TestSemanticCandidateSanitizesReviewText(t *testing.T) {
text := `private rollout uses internal request headers.
SERVICE_PASSWORD="real-password-value"
Authorization: Bearer abcdefghijklmnopqrstuvwxyz
Authorization: Bearer abcdefghijkl+/Zm9vQmFy==
callback=https://user:secretpass@example.com/hook
token: ` + jwtFixture("c2VjcmV0") + `
standalone ` + jwtFixture("c3RhbmRhbG9uZQ") + `
` + privateKeyBeginPrefix + privateKeyMarker + `
secret-key-body
` + privateKeyEndPrefix + privateKeyMarker + `
`
got := semanticCandidate("docs/public.md", "file", text, 1)
if len(got) != 1 {
t.Fatalf("semantic candidate len = %d, want 1: %#v", len(got), got)
}
excerpt := got[0].Excerpt
for _, forbidden := range []string{
"real-password-value",
"abcdefghijklmnopqrstuvwxyz",
"Zm9vQmFy",
"user:secretpass@example.com",
jwtHeaderFixture(),
"secret-key-body",
} {
if strings.Contains(excerpt, forbidden) {
t.Fatalf("semantic candidate leaked %q in excerpt %q", forbidden, excerpt)
}
}
for _, want := range []string{
"SERVICE_PASSWORD=<redacted>",
"Authorization: Bearer <redacted>",
"https://<user>:<redacted>@example.com/hook",
"<jwt-like-token>",
"<private-key-block>",
} {
if !strings.Contains(excerpt, want) {
t.Fatalf("semantic candidate missing sanitized marker %q in excerpt %q", want, excerpt)
}
}
}
func TestSemanticCandidateRedactsCredentialValuesWithWhitespace(t *testing.T) {
text := "private launch plan for internal rollout on Friday\n" +
"SERVICE_" + "TOKEN=\"real " + "secret value\"\n"
got := semanticCandidate("docs/public.md", "file", text, 1)
if len(got) != 1 {
t.Fatalf("semantic candidate len = %d, want 1: %#v", len(got), got)
}
excerpt := got[0].Excerpt
for _, forbidden := range []string{"real " + "secret value", "secret value"} {
if strings.Contains(excerpt, forbidden) {
t.Fatalf("semantic candidate leaked credential tail %q in excerpt %q", forbidden, excerpt)
}
}
if !strings.Contains(excerpt, "SERVICE_TOKEN=<redacted>") {
t.Fatalf("semantic candidate should redact full credential assignment, got %q", excerpt)
}
}
func TestSemanticCandidateCoversRealE2ESemanticCases(t *testing.T) {
cases := []struct {
name string
text string
signals []string
}{
{
name: "private header detail",
text: "Public docs describe a private request header, server-side trust classification, and spoof-prevention behavior in enough detail for an implementation review.",
signals: []string{
"private_scope",
"request_metadata",
"trust_boundary_detail",
"implementation_detail",
},
},
{
name: "specific roadmap",
text: "Public release notes mention a specific unpublished migration phase, target date, and rollout direction for an internal-only plan.",
signals: []string{
"private_scope",
"roadmap_detail",
"roadmap_timing",
},
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got := semanticCandidate("docs/public.md", "file", tc.text, 1)
if len(got) != 1 {
t.Fatalf("semantic candidate len = %d, want 1: %#v", len(got), got)
}
for _, signal := range tc.signals {
if !strings.Contains(got[0].Excerpt, signal) {
t.Fatalf("semantic candidate missing signal %q: %#v", signal, got[0])
}
}
})
}
}
func TestScanFileDetectsDetectorFingerprintOnlyInPublicRuleFiles(t *testing.T) {
got := ScanFile(".gitleaks.toml", []byte("[[rules]]\nid = \"public"+"-content-leakage\"\n"))
if !findingRules(got)["public_content_detector_fingerprint"] {
t.Fatalf("expected detector fingerprint finding, got %#v", got)
}
clean := ScanFile("docs/release-notes.md", []byte("public-content-leakage is discussed as ordinary release text\n"))
if findingRules(clean)["public_content_detector_fingerprint"] {
t.Fatalf("detector fingerprint should be scoped to public rule/config files: %#v", clean)
}
}
func TestScanFileIgnoresBenignPublicPlaceholders(t *testing.T) {
got := ScanFile("docs/examples.md", []byte(`Use APP_ID=cli_example_app_id and APP_SECRET=cli_example_app_secret in examples.
The docs may mention bearer-token placeholders, but they should not contain realistic tokens.
`))
if len(got) != 0 {
t.Fatalf("benign placeholders produced findings: %#v", got)
}
}
func TestScanFileDoesNotTreatURLEncodedCredentialAsPlaceholder(t *testing.T) {
got := ScanFile("docs/config.md", []byte("client_secret=abc%2Fdef%3Drealvalue\n"))
if !findingRules(got)["public_content_generic_credential"] {
t.Fatalf("URL-encoded credential should still be reported, got %#v", got)
}
}
func TestScanFileDoesNotTreatPlaceholderMarkerSubstringsAsPlaceholders(t *testing.T) {
got := ScanFile("docs/config.md", []byte(strings.Join([]string{
"API_KEY=notredactedreal",
"API_KEY=notplaceholdersecret",
"API_KEY=abcxxxxreal",
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
count++
}
}
if count != 3 {
t.Fatalf("placeholder-marker substring findings = %d, want 3: %#v", count, got)
}
}
func TestScanFileDetectsBase64PaddedCredentialAssignments(t *testing.T) {
paddedSecretPrefix := "dGhpc2lz" + "YXNlY3JldA"
paddedTokenPrefix := "YWJj" + "ZGVmZ2g"
paddedSecret := base64PaddedFixture(paddedSecretPrefix)
paddedToken := base64PaddedFixture(paddedTokenPrefix)
got := ScanFile("docs/config.md", []byte(strings.Join([]string{
`API_SECRET="` + paddedSecret + `"`,
"api_secret=" + paddedToken,
"api_secret: " + paddedToken,
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
count++
for _, forbidden := range []string{paddedSecret, paddedToken, paddedSecretPrefix, paddedTokenPrefix} {
if strings.Contains(item.Excerpt, forbidden) {
t.Fatalf("credential finding leaked base64 value %q in excerpt %q", forbidden, item.Excerpt)
}
}
}
}
if count != 3 {
t.Fatalf("base64 padded credentials findings = %d, want 3: %#v", count, got)
}
}
func TestScanFileDetectsQuotedJSONCredentialAssignments(t *testing.T) {
jsonToken := "real-json-token"
jsonSecret := "real " + "secret value"
jsonKey := "real-json-key"
jsonTenantToken := "real-tenant-json-token"
jsonAppSecret := "real-app-secret"
jsonPrefixedKey := "real-prefixed-key"
jsonTenantCamelToken := "real-tenant-camel-token"
jsonGithubToken := "real-github-token"
jsonVendorKey := "real-vendor-key"
jsonSlackBotToken := "xoxb-real-token"
got := ScanFile("docs/public.json", []byte(strings.Join([]string{
`{"access_` + `token":"` + jsonToken + `"}`,
`{"client_` + `secret": "` + jsonSecret + `"}`,
`{'api_` + `key': '` + jsonKey + `'}`,
`{"tenant_access_` + `token":"` + jsonTenantToken + `"}`,
`{"app_` + `secret":"` + jsonAppSecret + `"}`,
`{"x_api_` + `key":"` + jsonPrefixedKey + `"}`,
`{"tenantAccess` + `Token":"` + jsonTenantCamelToken + `"}`,
`{"github` + `Token":"` + jsonGithubToken + `"}`,
`{"vendorApi` + `Key":"` + jsonVendorKey + `"}`,
`{"slackBot` + `Token":"` + jsonSlackBotToken + `"}`,
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
count++
for _, forbidden := range []string{jsonToken, jsonSecret, jsonKey, jsonTenantToken, jsonAppSecret, jsonPrefixedKey, jsonTenantCamelToken, jsonGithubToken, jsonVendorKey, jsonSlackBotToken} {
if strings.Contains(item.Excerpt, forbidden) {
t.Fatalf("JSON credential finding leaked value %q in excerpt %q", forbidden, item.Excerpt)
}
}
}
}
if count != 10 {
t.Fatalf("JSON credential findings = %d, want 10: %#v", count, got)
}
}
func TestScanFileDetectsCredentialPhraseBeforeEnvironmentSuffix(t *testing.T) {
got := ScanFile("docs/config.yaml", []byte(strings.Join([]string{
"API_KEY_OPENAI: real-openai-key",
"TOKEN_GITHUB: real-github-token",
"CLIENT_SECRET_GOOGLE: real-google-secret",
"SECRET_KEY_BASE: real-secret-key-base",
"APP_PASSWORD_PROD: real-prod-password",
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule != "public_content_generic_credential" {
continue
}
count++
for _, forbidden := range []string{
"real-openai-key",
"real-github-token",
"real-google-secret",
"real-secret-key-base",
"real-prod-password",
} {
if strings.Contains(item.Excerpt, forbidden) {
t.Fatalf("credential finding leaked value %q in excerpt %q", forbidden, item.Excerpt)
}
}
}
if count != 5 {
t.Fatalf("credential suffix variants findings = %d, want 5: %#v", count, got)
}
}
func TestScanFileDetectsCredentialValuesThatLookLikeBareIdentifiers(t *testing.T) {
got := ScanFile("docs/config.yaml", []byte(strings.Join([]string{
"API_KEY_OPENAI: prod_key",
"CLIENT_SECRET_GOOGLE: prod_secret",
"TOKEN_GITHUB: github_token",
"APP_PASSWORD_PROD: prod_password",
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
count++
}
}
if count != 4 {
t.Fatalf("bare identifier credential findings = %d, want 4: %#v", count, got)
}
}
func TestScanFileDetectsAngleWrappedRealisticCredentialValues(t *testing.T) {
stripeLike := "sk_" + "live_1234567890abcdef"
patLike := "gh" + "p_1234567890abcdef1234567890abcdef1234"
got := ScanFile("docs/config.yaml", []byte(strings.Join([]string{
"API_KEY: <" + stripeLike + ">",
"SECRET_TOKEN: <" + patLike + ">",
"CLIENT_SECRET: <real-client-secret-value>",
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
count++
}
}
if count != 3 {
t.Fatalf("angle-wrapped realistic credential findings = %d, want 3: %#v", count, got)
}
}
func TestScanFileDetectsCredentialShapedValuesUnderBenignKeys(t *testing.T) {
stripeLike := "sk_" + "live_1234567890abcdef"
patLike := "gh" + "p_1234567890abcdef1234567890abcdef1234"
got := ScanFile("docs/public.json", []byte(strings.Join([]string{
`{"access_token_expires_in":"` + patLike + `"}`,
`{"refresh_token_expires_in":"` + stripeLike + `"}`,
`{"client_secret_status":"real-client-secret-value"}`,
`{"client_secret_name":"real-client-secret-value"}`,
`{"app_token":"` + patLike + `"}`,
`{"sync_token":"` + stripeLike + `"}`,
`{"target_token":"real-client-secret-value"}`,
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
count++
}
}
if count != 7 {
t.Fatalf("credential-shaped benign-key findings = %d, want 7: %#v", count, got)
}
}
func TestScanFileDetectsBareIdentifierCredentialsWithMetadataSuffixes(t *testing.T) {
got := ScanFile("docs/config.yaml", []byte(strings.Join([]string{
"API_KEY_NAME: prod_key",
"CLIENT_SECRET_NAME: prod_secret",
"SECRET_STATUS: prod_secret",
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
count++
}
}
if count != 3 {
t.Fatalf("metadata-suffixed bare credential findings = %d, want 3: %#v", count, got)
}
}
func TestScanFileDetectsAccessKeyCredentials(t *testing.T) {
accessKey := "AK" + "IAIOSFODNN7EXAMPX"
got := ScanFile("docs/config.yaml", []byte(strings.Join([]string{
"AWS_ACCESS_KEY_ID: " + accessKey,
"ACCESS_KEY_ID: " + accessKey,
"ACCESS_KEY: " + accessKey,
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule != "public_content_generic_credential" {
continue
}
count++
for _, forbidden := range []string{
accessKey,
} {
if strings.Contains(item.Excerpt, forbidden) {
t.Fatalf("access key finding leaked value %q in excerpt %q", forbidden, item.Excerpt)
}
}
}
if count != 3 {
t.Fatalf("access key credential findings = %d, want 3: %#v", count, got)
}
}
func TestScanFileDetectsPrivateKeyAssignments(t *testing.T) {
privateKey := "LS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0t"
got := ScanFile("docs/config.yaml", []byte(strings.Join([]string{
"PRIVATE_KEY: " + privateKey,
"SSH_PRIVATE_KEY: " + privateKey,
"JWT_PRIVATE_KEY: " + privateKey,
"SIGNING_PRIVATE_KEY: " + privateKey,
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule != "public_content_generic_credential" {
continue
}
count++
if strings.Contains(item.Excerpt, privateKey) {
t.Fatalf("private key finding leaked value in excerpt %q", item.Excerpt)
}
}
if count != 4 {
t.Fatalf("private key assignment findings = %d, want 4: %#v", count, got)
}
}
func TestScanFileDetectsWebhookURLs(t *testing.T) {
got := ScanFile("docs/config.yaml", []byte(strings.Join([]string{
"SLACK_WEBHOOK_URL=https://hooks." + "slack.com/services/T00000000/B00000000/abcdefghijklmnopqrstuvwx",
"DISCORD_WEBHOOK_URL=https://discord.com/api/" + "webhooks/123456789012345678/abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
"WEBHOOK_URL=https://example.invalid/hooks/secret-path-token-1234567890",
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule != "public_content_generic_credential" {
continue
}
count++
for _, forbidden := range []string{
"hooks." + "slack.com/services",
"discord.com/api/" + "webhooks",
"secret-path-token-1234567890",
} {
if strings.Contains(item.Excerpt, forbidden) {
t.Fatalf("webhook finding leaked value %q in excerpt %q", forbidden, item.Excerpt)
}
}
}
if count != 3 {
t.Fatalf("webhook URL findings = %d, want 3: %#v", count, got)
}
}
func TestScanFileDetectsWebhookURLsWithHostPlaceholders(t *testing.T) {
got := ScanFile("docs/config.yaml", []byte(strings.Join([]string{
"WEBHOOK_URL=https://<host>/hooks/real-secret-token-1234567890",
"SLACK_WEBHOOK_URL=https://<host>/services/T00000000/B00000000/abcdefghijklmnopqrstuvwx",
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule != "public_content_generic_credential" {
continue
}
count++
}
if count != 2 {
t.Fatalf("host-placeholder webhook findings = %d, want 2: %#v", count, got)
}
}
func TestScanFileAllowsBenignWebhookFields(t *testing.T) {
got := ScanFile("docs/config.yaml", []byte(strings.Join([]string{
"webhook_count: 2",
"webhook_retries=3",
"webhook_endpoint=https://example.invalid/hooks/example",
"webhook_path=/hooks/example",
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("benign webhook field should not be credential finding: %#v", got)
}
}
}
func TestScanFileDetectsCredentialURLWithEmptyUsername(t *testing.T) {
got := ScanFile("docs/config.yaml", []byte("REDIS_URL=redis://:password@example.invalid/0\n"))
for _, item := range got {
if item.Rule == "public_content_credential_url" {
if strings.Contains(item.Excerpt, "password") {
t.Fatalf("credential URL finding leaked password in excerpt %q", item.Excerpt)
}
return
}
}
t.Fatalf("missing empty-username credential URL finding: %#v", got)
}
func TestScanFileAllowsPrivateKeyStateBooleans(t *testing.T) {
got := ScanFile("internal/qualitygate/publiccontent/collect.go", []byte(strings.Join([]string{
"inPrivateKey = true",
"inPrivateKey = false",
"hasPrivateKey: false",
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("private key state boolean should not be credential finding: %#v", got)
}
}
}
func TestScanFileAllowsCredentialReferenceValues(t *testing.T) {
got := ScanFile("docs/config.yaml", []byte(strings.Join([]string{
"API_KEY=${API_KEY}",
"API_KEY=$API_KEY",
"API_KEY=process.env.API_KEY",
"API_KEY: ${{ secrets.API_KEY }}",
"TOKEN: ${{ env.TOKEN }}",
"GITHUB_TOKEN: ${{ github.token }}",
"TOKEN=$(vault kv get -field=token secret/path)",
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("credential reference should not be generic credential finding: %#v", got)
}
}
}
func TestScanFileDetectsMalformedGithubExpressionCredentialValues(t *testing.T) {
stripeLike := "sk_" + "live_1234567890abcdef"
got := ScanFile("docs/config.yaml", []byte(strings.Join([]string{
"API_KEY=${{" + stripeLike + "}}",
"TOKEN=${{real-secret-token-value}}",
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
count++
}
}
if count != 2 {
t.Fatalf("malformed GitHub expression credential findings = %d, want 2: %#v", count, got)
}
}
func TestScanFileDetectsDollarPrefixedCredentialValues(t *testing.T) {
stripeLike := "sk_" + "live_1234567890abcdef"
patLike := "gh" + "p_1234567890abcdef1234567890abcdef1234"
got := ScanFile("docs/config.yaml", []byte(strings.Join([]string{
"API_KEY=$" + stripeLike,
"GITHUB_TOKEN=$" + patLike,
"TOKEN=$(echo " + stripeLike + ")",
"API_KEY=process.env." + stripeLike,
"GITHUB_TOKEN=process.env." + patLike,
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
count++
}
}
if count != 5 {
t.Fatalf("reference-shaped credential findings = %d, want 5: %#v", count, got)
}
}
func TestScanFileAllowsCredentialURLPlaceholders(t *testing.T) {
got := ScanFile("docs/config.yaml", []byte(strings.Join([]string{
"DATABASE_URL=postgres://<user>:<password>@example.invalid/db",
"DATABASE_URL=postgres://user:%3Cpassword%3E@example.invalid/db",
"WEBHOOK_URL=https://example.invalid/hooks/<webhook-token>",
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_credential_url" {
t.Fatalf("credential URL placeholder should not be credential URL finding: %#v", got)
}
if item.Rule == "public_content_generic_credential" {
t.Fatalf("credential URL placeholder should not be generic credential finding: %#v", got)
}
}
}
func TestScanFileDetectsCredentialURLsWithRedactedSubstringPasswords(t *testing.T) {
got := ScanFile("docs/config.yaml", []byte("DATABASE_URL=postgres://user:notredactedreal@example.invalid/db\n"))
for _, item := range got {
if item.Rule == "public_content_credential_url" {
return
}
}
t.Fatalf("missing credential URL with redacted substring password: %#v", got)
}
func TestScanFileDetectsCredentialURLsWithPlaceholderUserAndRealPassword(t *testing.T) {
stripeLike := "sk_" + "live_1234567890abcdef"
got := ScanFile("docs/config.yaml", []byte(strings.Join([]string{
"DATABASE_URL=postgres://<user>:real-secret@example.invalid/db",
"DATABASE_URL=postgres://<user>:" + stripeLike + "@example.invalid/db",
"URL=https://<user>:real-secret@example.invalid/path",
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule != "public_content_credential_url" {
continue
}
count++
for _, forbidden := range []string{"real-secret", stripeLike} {
if strings.Contains(item.Excerpt, forbidden) {
t.Fatalf("credential URL finding leaked value %q in excerpt %q", forbidden, item.Excerpt)
}
}
}
if count != 3 {
t.Fatalf("placeholder-user credential URL findings = %d, want 3: %#v", count, got)
}
}
func TestScanFileAllowsCommonAngleWrappedCredentialPlaceholders(t *testing.T) {
got := ScanFile("docs/config.yaml", []byte(strings.Join([]string{
"API_KEY=<api-key>",
"CLIENT_SECRET=<client-secret>",
"ACCESS_TOKEN=<access-token>",
"API_KEY=<your-api-key>",
"SECRET_TOKEN=<github-token>",
"CLIENT_SECRET=<my-client-secret>",
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("common angle-wrapped placeholder should not be credential finding: %#v", got)
}
}
}
func TestScanFileAllowsBenignJSONTokenFields(t *testing.T) {
got := ScanFile("docs/public.json", []byte(strings.Join([]string{
`{"tokenizer":"cl100k_base"}`,
`{"token_count": 42}`,
`{"page_token":"next"}`,
`{"next_page_token":"next"}`,
`{"file_token":"file-example"}`,
`{"doc_token":"doc-example"}`,
`{"node_token":"node-example"}`,
`{"wiki_token":"wikcn_public_doc_example"}`,
`{"folder_token":"folder-example"}`,
`{"obj_token":"obj-example"}`,
`{"spreadsheet_token":"sheet-example"}`,
`{"parent_node_token":"parent-example"}`,
`{"origin_node_token":"origin-example"}`,
`{"drive_route_token":"route-example"}`,
`{"token":"<wiki_token>"}`,
`{"token":"wiki_token"}`,
`{"token_url":"https://example.com/oauth/token"}`,
`{"token_endpoint":"https://example.com/oauth/token"}`,
`{"token_format":"Bearer"}`,
`{"secret_name":"public-example-secret"}`,
`{"base_token":"base-example"}`,
`{"app_token":"app-example"}`,
`{"sync_token":"sync-example"}`,
`{"parent_token":"parent-example"}`,
`{"target_token":"target-example"}`,
`{"parent_file_token":"parent-file-example"}`,
`{"refresh_token_expires_in": 7200}`,
`{"access_token_expires_in": 7200}`,
`{"token_expires_in": 7200}`,
`{"token_status":"active"}`,
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("benign JSON token field should not be credential finding: %#v", got)
}
}
}
func TestScanFileAllowsBenignUnquotedTokenFields(t *testing.T) {
got := ScanFile("docs/config.yaml", []byte(strings.Join([]string{
"tokens: 128",
"token_type: bearer",
"max_tokens: 2000",
"completion_tokens: 200",
"prompt_tokens: 100",
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("benign unquoted token field should not be credential finding: %#v", got)
}
}
}
func TestSemanticCandidateRedactsColonAssignmentsWithEqualsInValue(t *testing.T) {
paddedSecretPrefix := "YWJj" + "ZGVmZ2g"
paddedSecret := base64PaddedFixture(paddedSecretPrefix)
text := "private launch plan for internal rollout on Friday\n" +
"api_" + "secret: " + paddedSecret + "\n" +
`{"access_` + `token":"` + paddedSecret + `"}` + "\n"
got := semanticCandidate("docs/public.md", "file", text, 1)
if len(got) != 1 {
t.Fatalf("semantic candidate len = %d, want 1: %#v", len(got), got)
}
if strings.Contains(got[0].Excerpt, paddedSecret) || strings.Contains(got[0].Excerpt, paddedSecretPrefix) {
t.Fatalf("semantic candidate leaked colon assignment with padding: %#v", got[0])
}
}
func TestSemanticCandidateRedactsEscapedQuoteCredentialValues(t *testing.T) {
doubleQuotedValue := "abc\\\"def-secret"
singleQuotedValue := "abc\\'def-secret"
text := "private launch plan for internal rollout on Friday\n" +
`{"access_` + `token":"` + doubleQuotedValue + `"}` + "\n" +
`{'client_` + `secret': '` + singleQuotedValue + `'}` + "\n"
got := semanticCandidate("docs/public.md", "file", text, 1)
if len(got) != 1 {
t.Fatalf("semantic candidate len = %d, want 1: %#v", len(got), got)
}
for _, forbidden := range []string{doubleQuotedValue, singleQuotedValue, "def-secret"} {
if strings.Contains(got[0].Excerpt, forbidden) {
t.Fatalf("semantic candidate leaked escaped-quote credential value %q: %#v", forbidden, got[0])
}
}
}
func TestScanFileDoesNotTreatEqualityComparisonAsCredential(t *testing.T) {
got := ScanFile("docs/example.md", []byte("if token == \"expected\" { return nil }\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("equality comparison should not be credential assignment: %#v", got)
}
}
}
func TestScanFileDetectsBearerHeaderFinding(t *testing.T) {
got := ScanFile("docs/auth.md", []byte("Authorization: Bearer abcdefghijklmnopqrstuvwxyz\n"))
for _, item := range got {
if item.Rule == "public_content_bearer_header" {
if item.Action != "REJECT" || item.File != "docs/auth.md" || item.Line != 1 || item.Source != "file" {
t.Fatalf("bearer finding attribution = %#v", item)
}
return
}
}
t.Fatalf("missing bearer finding: %#v", got)
}
func TestScanFileDetectsJSONBearerHeaders(t *testing.T) {
token := "abcdefghijklmnopqrstuvwxyz"
got := ScanFile("docs/auth.json", []byte(strings.Join([]string{
`{"Authorization":"Bearer ` + token + `"}`,
`{"headers":{"Authorization":"Bearer ` + token + `"}}`,
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule != "public_content_bearer_header" {
continue
}
count++
if item.Action != "REJECT" || item.File != "docs/auth.json" || item.Source != "file" {
t.Fatalf("bearer finding attribution = %#v", item)
}
if strings.Contains(item.Excerpt, token) {
t.Fatalf("bearer finding leaked token: %#v", item)
}
}
if count != 2 {
t.Fatalf("JSON bearer findings = %d, want 2: %#v", count, got)
}
}
func TestSemanticCandidateRedactsJSONBearerHeaders(t *testing.T) {
token := "abcdefghijklmnopqrstuvwxyz"
text := "private launch plan for internal rollout on Friday\n" +
`{"headers":{"Authorization":"Bearer ` + token + `"}}` + "\n"
got := semanticCandidate("docs/public.md", "file", text, 1)
if len(got) != 1 {
t.Fatalf("semantic candidate len = %d, want 1: %#v", len(got), got)
}
if strings.Contains(got[0].Excerpt, token) {
t.Fatalf("semantic candidate leaked JSON bearer token: %#v", got[0])
}
if !strings.Contains(got[0].Excerpt, "Authorization: Bearer <redacted>") {
t.Fatalf("semantic candidate should redact JSON bearer header, got %#v", got[0])
}
}
func TestScanFileDetectsCommonProvenanceMarkers(t *testing.T) {
text := strings.Join([]string{
"Generated with automated code assistant",
"Co-authored-by: automated-code-assistant <bot@example.invalid>",
"🤖 generated by automation",
}, "\n")
got := ScanFile("docs/public.md", []byte(text))
var count int
for _, item := range got {
if item.Rule == "public_content_provenance_marker" {
count++
}
}
if count != 3 {
t.Fatalf("provenance marker count = %d, want 3: %#v", count, got)
}
}
func TestScanFileAllowsHumanCoAuthorTrailer(t *testing.T) {
got := ScanFile("docs/public.md", []byte(strings.Join([]string{
"Co-authored-by: Jane Doe <jane@example.invalid>",
"Co-authored-by: Alice Abbot <abbot@example.invalid>",
}, "\n")))
for _, item := range got {
if item.Rule == "public_content_provenance_marker" {
t.Fatalf("human co-author trailer should not be blocked: %#v", got)
}
}
}
func TestScanFileAllowsPercentWrappedPlaceholder(t *testing.T) {
got := ScanFile("docs/config.md", []byte("client_secret=%CLIENT_SECRET%\n"))
if len(got) != 0 {
t.Fatalf("percent-wrapped placeholder produced findings: %#v", got)
}
}
func TestScanFileDetectsPercentWrappedCredentialValues(t *testing.T) {
stripeLike := "sk_" + "live_1234567890abcdef"
patLike := "gh" + "p_1234567890abcdef1234567890abcdef1234"
got := ScanFile("docs/config.md", []byte(strings.Join([]string{
"CLIENT_SECRET=%" + stripeLike + "%",
"GITHUB_TOKEN=%" + patLike + "%",
"TOKEN=%real-secret-token-value%",
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
count++
}
}
if count != 3 {
t.Fatalf("percent-wrapped credential findings = %d, want 3: %#v", count, got)
}
}
func findingRules(items []Finding) map[string]bool {
out := map[string]bool{}
for _, item := range items {
out[item.Rule] = true
}
return out
}
func jwtFixture(subject string) string {
return strings.Join([]string{
jwtHeaderFixture(),
"eyJzdWIiOiJ" + subject + "In0",
"signature" + "part",
}, ".")
}
func jwtHeaderFixture() string {
return "eyJhbGciOiJI" + "UzI1NiJ9"
}
func base64PaddedFixture(prefix string) string {
return prefix + "=="
}

View File

@@ -0,0 +1,30 @@
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT
package publiccontent
import "github.com/larksuite/cli/internal/qualitygate/report"
type Options struct {
Repo string
ChangedFrom string
MetadataPath string
BranchName string
}
type Metadata struct {
Title string `json:"title"`
Body string `json:"body"`
Branch string `json:"branch"`
}
type Finding struct {
Rule string
Action report.Action
File string
Line int
Source string
Excerpt string
Message string
Suggestion string
}

View File

@@ -174,8 +174,9 @@ type materializedExample struct {
}
type placeholderContext struct {
FlagName string
FlagUsage string
FlagName string
FlagUsage string
FlagDefault string
}
func materializePlaceholderExample(raw string, cmd manifest.Command) (materializedExample, bool) {
@@ -247,6 +248,7 @@ func placeholderContextForFlag(name string, flag *manifest.Flag) placeholderCont
ctx := placeholderContext{FlagName: name}
if flag != nil {
ctx.FlagUsage = flag.Usage
ctx.FlagDefault = flag.DefValue
}
return ctx
}
@@ -309,11 +311,17 @@ func fakeValueForPlaceholder(raw string, ctx placeholderContext) (string, bool)
if name == "" {
return "", false
}
if value, ok := fakeNumericValueForPlaceholder(name, ctx); ok {
return value, true
}
if value, ok := fakeContextualURLValueForPlaceholder(name, ctx); ok {
return value, true
}
if value, ok := fakeValueFromPlaceholderName(name); ok {
return value, true
}
if isGenericPlaceholderName(name) {
return fakeValueFromUsageHint(ctx.FlagUsage)
return fakeValueFromContextHint(ctx)
}
return "", false
}
@@ -336,16 +344,26 @@ func fakeValueFromPlaceholderName(name string) (string, bool) {
return "file_test123", true
case hasPlaceholderToken(tokens, "file") && hasPlaceholderToken(tokens, "token"):
return "file_test123", true
case hasPlaceholderToken(tokens, "folder") && hasPlaceholderToken(tokens, "token"):
return "fld_test123", true
case hasPlaceholderToken(tokens, "image", "img"):
return "img_test123", true
case hasPlaceholderToken(tokens, "app"):
return "app_test123", true
case hasPlaceholderToken(tokens, "draft"):
return "draft_test123", true
case hasPlaceholderToken(tokens, "label"):
return "label_test123", true
case hasPlaceholderToken(tokens, "share"):
return "share_test123", true
case hasPlaceholderToken(tokens, "doc", "document"):
return "doc_test123", true
case hasPlaceholderToken(tokens, "sheet", "spreadsheet"):
return "shtcn_test123", true
case hasPlaceholderToken(tokens, "base"):
return "base_test123", true
case hasPlaceholderToken(tokens, "space"):
return "space_test123", true
case hasPlaceholderToken(tokens, "table"):
return "tbl_test123", true
case hasPlaceholderToken(tokens, "view"):
@@ -377,17 +395,98 @@ func fakeValueFromPlaceholderName(name string) (string, bool) {
}
}
func fakeValueFromUsageHint(usage string) (string, bool) {
match := placeholderValuePattern.FindStringSubmatch(strings.ToLower(usage))
func fakeValueFromContextHint(ctx placeholderContext) (string, bool) {
if value, ok := fakeNumericValueForPlaceholder("", ctx); ok {
return value, true
}
if value, ok := fakeContextualURLValueForPlaceholder("", ctx); ok {
return value, true
}
match := placeholderValuePattern.FindStringSubmatch(strings.ToLower(ctx.FlagUsage))
if len(match) != 2 || !knownTokenPrefix(match[1]) {
return "", false
}
return match[1] + "_test123", true
}
func fakeContextualURLValueForPlaceholder(name string, ctx placeholderContext) (string, bool) {
nameTokens := placeholderTokenSet(name)
flagName := strings.ReplaceAll(strings.ToLower(ctx.FlagName), "-", "_")
flagTokens := placeholderTokenSet(flagName)
if !hasPlaceholderToken(nameTokens, "url", "link") && !hasPlaceholderToken(flagTokens, "url", "link") {
return "", false
}
usage := strings.ToLower(ctx.FlagUsage)
if strings.Contains(usage, "lark") || strings.Contains(usage, "feishu") || strings.Contains(usage, "document url") {
return "https://example.feishu.cn/docx/doc_test123", true
}
return "", false
}
func fakeNumericValueForPlaceholder(name string, ctx placeholderContext) (string, bool) {
nameTokens := placeholderTokenSet(name)
flagName := strings.ReplaceAll(strings.ToLower(ctx.FlagName), "-", "_")
flagTokens := placeholderTokenSet(flagName)
usage := strings.ToLower(ctx.FlagUsage)
switch {
case placeholderTokenPair(nameTokens, "meeting", "id") || placeholderTokenPair(flagTokens, "meeting", "id"):
return "400000000001", true
case placeholderTokenPair(nameTokens, "meeting", "ids") || placeholderTokenPair(flagTokens, "meeting", "ids"):
return "400000000001", true
case placeholderTokenPair(nameTokens, "meeting", "no") || placeholderTokenPair(flagTokens, "meeting", "no"):
return "123456789", true
case placeholderTokenPair(nameTokens, "meeting", "number") || placeholderTokenPair(flagTokens, "meeting", "number"):
return "123456789", true
case hasPlaceholderToken(nameTokens, "timestamp") || hasPlaceholderToken(flagTokens, "timestamp") || strings.Contains(usage, "unix timestamp"):
return defaultPositiveInteger(ctx.FlagDefault, "1893456000"), true
case placeholderTokenPair(nameTokens, "page", "size") || placeholderTokenPair(flagTokens, "page", "size"):
return defaultPositiveInteger(ctx.FlagDefault, "20"), true
case placeholderTokenPair(nameTokens, "page", "limit") || placeholderTokenPair(flagTokens, "page", "limit"):
return defaultPositiveInteger(ctx.FlagDefault, "10"), true
case numericPlaceholderName(nameTokens) || numericPlaceholderName(flagTokens) || numericUsageHint(usage):
return defaultPositiveInteger(ctx.FlagDefault, "20"), true
default:
return "", false
}
}
func numericPlaceholderName(tokens map[string]bool) bool {
if len(tokens) == 0 || hasPlaceholderToken(tokens, "token", "format", "type", "status", "mode") {
return false
}
return hasPlaceholderToken(tokens,
"amount", "count", "depth", "height", "index", "length", "limit", "max",
"number", "revision", "size", "width",
)
}
func numericUsageHint(usage string) bool {
if usage == "" {
return false
}
return strings.Contains(usage, "positive integer") ||
strings.Contains(usage, "decimal integer") ||
strings.Contains(usage, "number of ") ||
strings.Contains(usage, "(number)")
}
func defaultPositiveInteger(raw, fallback string) string {
raw = strings.TrimSpace(raw)
if raw == "" || strings.HasPrefix(raw, "-") || raw == "0" {
return fallback
}
for _, r := range raw {
if r < '0' || r > '9' {
return fallback
}
}
return raw
}
func knownTokenPrefix(prefix string) bool {
switch prefix {
case "app", "base", "doc", "file", "fld", "img", "item", "meeting", "obcn", "oc", "od", "om", "ou", "page", "rec", "shtcn", "task", "tbl", "token", "viw", "wiki":
case "app", "base", "doc", "draft", "file", "fld", "img", "item", "label", "meeting", "obcn", "oc", "od", "om", "ou", "page", "rec", "share", "shtcn", "space", "task", "tbl", "token", "viw", "wiki":
return true
default:
return false
@@ -431,6 +530,10 @@ func hasPlaceholderToken(tokens map[string]bool, wants ...string) bool {
return false
}
func placeholderTokenPair(tokens map[string]bool, first, second string) bool {
return tokens[first] && tokens[second]
}
func hasUnresolvedDryRunPlaceholder(value string) bool {
if skillscan.HasPlaceholder(value) {
return true
@@ -623,6 +726,7 @@ func appendDryRunArg(raw string) ([]string, error) {
return nil, fmt.Errorf("not a lark-cli command")
}
argv = truncateShellTail(argv)
argv = forceDryRunJSONFormat(argv)
hasDryRunArg := false
dryRunEnabled := false
for _, arg := range argv[1:] {
@@ -642,6 +746,23 @@ func appendDryRunArg(raw string) ([]string, error) {
return append(argv[1:], "--dry-run"), nil
}
func forceDryRunJSONFormat(argv []string) []string {
for i := 1; i < len(argv); i++ {
arg := argv[i]
if arg == "--format" {
if i+1 < len(argv) && argv[i+1] == "pretty" {
argv[i+1] = "json"
}
return argv
}
if arg == "--format=pretty" {
argv[i] = "--format=json"
return argv
}
}
return argv
}
func truncateShellTail(argv []string) []string {
for i, arg := range argv {
if i == 0 {

View File

@@ -305,6 +305,161 @@ func TestRunDryRunsMaterializesInlinePlaceholderFlagValues(t *testing.T) {
}
}
func TestRunDryRunsMaterializesNumericPlaceholderFlagValues(t *testing.T) {
cliBin, argsPath := fakeDryRunCLI(t, `{"api":[{"method":"GET","url":"/open-apis/vc/v1/bots/events","params":{"meeting_id":"400000000001","page_size":50}}]}`)
m := manifest.Manifest{Commands: []manifest.Command{{
Path: "vc +meeting-events",
Runnable: true,
Flags: []manifest.Flag{
{Name: "meeting-id", TakesValue: true, Usage: "meeting ID to query; must be a long positive integer, not a 9-digit meeting number"},
{Name: "page-size", TakesValue: true, Usage: "page size, 20-100 (default 50)", DefValue: "50"},
{Name: "dry-run"},
},
}}}
ex := skillscan.Example{
Raw: "lark-cli vc +meeting-events --meeting-id <meeting_id> --page-size <page_size>",
SourceFile: "skills/lark-vc-agent/SKILL.md",
Line: 120,
HasPlaceholder: true,
}
diags, facts := RunDryRuns(context.Background(), cliBin, m, []skillscan.Example{ex})
if len(diags) != 0 {
t.Fatalf("RunDryRuns() diagnostics = %#v", diags)
}
if len(facts) != 1 || !facts[0].Executable || facts[0].SkipReason != "" {
t.Fatalf("numeric placeholder example should be executable after materialization: %#v", facts)
}
wantArgs := []string{"vc", "+meeting-events", "--meeting-id", "400000000001", "--page-size", "50", "--dry-run"}
if gotArgs := readArgs(t, argsPath); !reflect.DeepEqual(gotArgs, wantArgs) {
t.Fatalf("fake CLI args = %#v, want %#v", gotArgs, wantArgs)
}
}
func TestRunDryRunsMaterializesNumericPlaceholdersInsideJSONFlags(t *testing.T) {
cliBin, argsPath := fakeDryRunCLI(t, `{"api":[{"method":"GET","url":"/open-apis/test","params":{"timestamp":"1893456000","count":"20"}}]}`)
m := manifest.Manifest{Commands: []manifest.Command{{
Path: "api GET",
Runnable: true,
Flags: []manifest.Flag{
{Name: "params", TakesValue: true},
{Name: "dry-run"},
},
}}}
ex := skillscan.Example{
Raw: `lark-cli api GET /open-apis/test --params '{"timestamp":"<timestamp>","count":"<count>"}'`,
SourceFile: "skills/lark-demo/SKILL.md",
Line: 20,
HasPlaceholder: true,
}
diags, facts := RunDryRuns(context.Background(), cliBin, m, []skillscan.Example{ex})
if len(diags) != 0 {
t.Fatalf("RunDryRuns() diagnostics = %#v", diags)
}
if len(facts) != 1 || !facts[0].Executable || facts[0].SkipReason != "" {
t.Fatalf("JSON numeric placeholder example should be executable after materialization: %#v", facts)
}
wantArgs := []string{"api", "GET", "/open-apis/test", "--params", `{"timestamp":"1893456000","count":"20"}`, "--dry-run"}
if gotArgs := readArgs(t, argsPath); !reflect.DeepEqual(gotArgs, wantArgs) {
t.Fatalf("fake CLI args = %#v, want %#v", gotArgs, wantArgs)
}
}
func TestRunDryRunsMaterializesLarkDocumentURLPlaceholders(t *testing.T) {
cliBin, argsPath := fakeDryRunCLI(t, `{"api":[{"method":"GET","url":"/open-apis/drive/v1/metas/batch_query"}]}`)
m := manifest.Manifest{Commands: []manifest.Command{{
Path: "drive +inspect",
Runnable: true,
Flags: []manifest.Flag{
{Name: "url", TakesValue: true, Usage: "Lark/Feishu document URL (docx, doc, sheet, bitable, wiki, file, folder, mindnote, slides)"},
{Name: "format", TakesValue: true},
{Name: "dry-run"},
},
}}}
ex := skillscan.Example{
Raw: "lark-cli drive +inspect --url '<url>' --format json",
SourceFile: "skills/lark-drive/references/lark-drive-workflow-permission-governance-commands.md",
Line: 15,
HasPlaceholder: true,
}
diags, facts := RunDryRuns(context.Background(), cliBin, m, []skillscan.Example{ex})
if len(diags) != 0 {
t.Fatalf("RunDryRuns() diagnostics = %#v", diags)
}
if len(facts) != 1 || !facts[0].Executable || facts[0].SkipReason != "" {
t.Fatalf("Lark URL placeholder example should be executable after materialization: %#v", facts)
}
wantArgs := []string{"drive", "+inspect", "--url", "https://example.feishu.cn/docx/doc_test123", "--format", "json", "--dry-run"}
if gotArgs := readArgs(t, argsPath); !reflect.DeepEqual(gotArgs, wantArgs) {
t.Fatalf("fake CLI args = %#v, want %#v", gotArgs, wantArgs)
}
}
func TestRunDryRunsMaterializesResourceIDPlaceholderFlagValues(t *testing.T) {
cliBin, argsPath := fakeDryRunCLI(t, `{"api":[{"method":"GET","url":"/open-apis/wiki/v2/spaces/space_test123/nodes"}]}`)
m := manifest.Manifest{Commands: []manifest.Command{{
Path: "wiki +node-list",
Runnable: true,
Flags: []manifest.Flag{
{Name: "space-id", TakesValue: true, Usage: "wiki space ID"},
{Name: "page-token", TakesValue: true, Usage: "page token"},
{Name: "format", TakesValue: true},
{Name: "dry-run"},
},
}}}
ex := skillscan.Example{
Raw: "lark-cli wiki +node-list --space-id <space_id> --page-token <PAGE_TOKEN> --format json",
SourceFile: "skills/lark-wiki/references/lark-wiki-node-list.md",
Line: 24,
HasPlaceholder: true,
}
diags, facts := RunDryRuns(context.Background(), cliBin, m, []skillscan.Example{ex})
if len(diags) != 0 {
t.Fatalf("RunDryRuns() diagnostics = %#v", diags)
}
if len(facts) != 1 || !facts[0].Executable || facts[0].SkipReason != "" {
t.Fatalf("resource ID placeholder example should be executable after materialization: %#v", facts)
}
wantArgs := []string{"wiki", "+node-list", "--space-id", "space_test123", "--page-token", "page_test123", "--format", "json", "--dry-run"}
if gotArgs := readArgs(t, argsPath); !reflect.DeepEqual(gotArgs, wantArgs) {
t.Fatalf("fake CLI args = %#v, want %#v", gotArgs, wantArgs)
}
}
func TestRunDryRunsMaterializesResourcePlaceholdersInsideJSONFlags(t *testing.T) {
cliBin, argsPath := fakeDryRunCLI(t, `{"api":[{"method":"POST","url":"/open-apis/mail/v1/user_mailboxes/me/drafts/draft_test123/send"}]}`)
m := manifest.Manifest{Commands: []manifest.Command{{
Path: "mail user_mailbox.drafts send",
Runnable: true,
Flags: []manifest.Flag{
{Name: "params", TakesValue: true},
{Name: "data", TakesValue: true},
{Name: "dry-run"},
},
}}}
ex := skillscan.Example{
Raw: `lark-cli mail user_mailbox.drafts send --params '{"user_mailbox_id":"me","draft_id":"<draft_id>"}' --data '{"send_time":"<unix_timestamp>"}'`,
SourceFile: "skills/lark-mail/references/lark-mail-send.md",
Line: 172,
HasPlaceholder: true,
}
diags, facts := RunDryRuns(context.Background(), cliBin, m, []skillscan.Example{ex})
if len(diags) != 0 {
t.Fatalf("RunDryRuns() diagnostics = %#v", diags)
}
if len(facts) != 1 || !facts[0].Executable || facts[0].SkipReason != "" {
t.Fatalf("JSON resource placeholder example should be executable after materialization: %#v", facts)
}
wantArgs := []string{"mail", "user_mailbox.drafts", "send", "--params", `{"user_mailbox_id":"me","draft_id":"draft_test123"}`, "--data", `{"send_time":"1893456000"}`, "--dry-run"}
if gotArgs := readArgs(t, argsPath); !reflect.DeepEqual(gotArgs, wantArgs) {
t.Fatalf("fake CLI args = %#v, want %#v", gotArgs, wantArgs)
}
}
func TestRunDryRunsSkipsUnknownFlagsBeforeDryRun(t *testing.T) {
m := manifest.Manifest{Commands: []manifest.Command{{
Path: "im +chat-messages-list",
@@ -600,6 +755,51 @@ func TestAppendDryRunArgDoesNotDuplicate(t *testing.T) {
}
}
func TestAppendDryRunArgForcesJSONFormat(t *testing.T) {
got, err := appendDryRunArg("lark-cli vc +meeting-events --meeting-id 400000000001 --format pretty")
if err != nil {
t.Fatalf("appendDryRunArg() error = %v", err)
}
want := []string{"vc", "+meeting-events", "--meeting-id", "400000000001", "--format", "json", "--dry-run"}
if !reflect.DeepEqual(got, want) {
t.Fatalf("appendDryRunArg() = %#v, want %#v", got, want)
}
}
func TestAppendDryRunArgForcesInlineJSONFormat(t *testing.T) {
got, err := appendDryRunArg("lark-cli vc +meeting-events --meeting-id 400000000001 --format=pretty --dry-run")
if err != nil {
t.Fatalf("appendDryRunArg() error = %v", err)
}
want := []string{"vc", "+meeting-events", "--meeting-id", "400000000001", "--format=json", "--dry-run"}
if !reflect.DeepEqual(got, want) {
t.Fatalf("appendDryRunArg() = %#v, want %#v", got, want)
}
}
func TestAppendDryRunArgPreservesNonPrettyFormat(t *testing.T) {
for _, raw := range []string{
"lark-cli mail +watch --format data --dry-run",
"lark-cli export +events --format=ndjson --dry-run",
"lark-cli docs +fetch --format table",
} {
got, err := appendDryRunArg(raw)
if err != nil {
t.Fatalf("appendDryRunArg(%q) error = %v", raw, err)
}
for _, arg := range got {
if arg == "--format=json" {
t.Fatalf("appendDryRunArg(%q) unexpectedly rewrote inline format: %#v", raw, got)
}
}
for i, arg := range got {
if arg == "--format" && i+1 < len(got) && got[i+1] == "json" {
t.Fatalf("appendDryRunArg(%q) unexpectedly rewrote split format: %#v", raw, got)
}
}
}
}
func TestAppendDryRunArgForcesDryRunWhenExplicitlyDisabled(t *testing.T) {
got, err := appendDryRunArg("lark-cli docs +fetch --dry-run=false --doc abc")
if err != nil {

View File

@@ -15,18 +15,20 @@ import (
manifestexamples "github.com/larksuite/cli/internal/qualitygate/examples"
"github.com/larksuite/cli/internal/qualitygate/facts"
"github.com/larksuite/cli/internal/qualitygate/manifest"
"github.com/larksuite/cli/internal/qualitygate/publiccontent"
"github.com/larksuite/cli/internal/qualitygate/report"
"github.com/larksuite/cli/internal/qualitygate/skillscan"
"github.com/larksuite/cli/internal/vfs"
)
type Options struct {
Repo string
CLIBin string
ChangedFrom string
FactsOut string
ManifestPath string
CommandIndexPath string
Repo string
CLIBin string
ChangedFrom string
FactsOut string
ManifestPath string
CommandIndexPath string
PublicContentMetadataPath string
}
func Run(ctx context.Context, opts Options) ([]report.Diagnostic, facts.Facts, error) {
@@ -98,9 +100,60 @@ func Run(ctx context.Context, opts Options) ([]report.Diagnostic, facts.Facts, e
if opts.ChangedFrom != "" {
diags = append(diags, errorDiags...)
}
publicContent, err := publiccontent.Collect(ctx, publiccontent.Options{
Repo: opts.Repo,
ChangedFrom: opts.ChangedFrom,
MetadataPath: opts.PublicContentMetadataPath,
})
if err != nil {
return nil, facts.Facts{}, err
}
diags = append(diags, publicContentDiagnostics(publicContent)...)
diags = filterPRDiagnostics(opts.Repo, opts.ChangedFrom, scope, m, diags)
return diags, facts.BuildWithCommandLookup(m, commandIndex, skillFacts, skillQualityFacts, errorFacts, exampleFacts, outputFacts, diags, scope.Files), nil
builtFacts := facts.BuildWithCommandLookup(m, commandIndex, skillFacts, skillQualityFacts, errorFacts, exampleFacts, outputFacts, diags, scope.Files)
return diags, facts.WithPublicContent(builtFacts, publicContentFacts(publicContent)), nil
}
func publicContentDiagnostics(items []publiccontent.Finding) []report.Diagnostic {
if len(items) == 0 {
return nil
}
out := make([]report.Diagnostic, 0, len(items))
for _, item := range items {
if item.Rule == "public_content_semantic_candidate" {
continue
}
out = append(out, report.Diagnostic{
Rule: item.Rule,
Action: item.Action,
File: item.File,
Line: item.Line,
Message: item.Message,
Suggestion: item.Suggestion,
})
}
return out
}
func publicContentFacts(items []publiccontent.Finding) []facts.PublicContentFact {
if len(items) == 0 {
return nil
}
out := make([]facts.PublicContentFact, 0, len(items))
for _, item := range items {
out = append(out, facts.PublicContentFact{
Rule: item.Rule,
Action: item.Action,
File: item.File,
Line: item.Line,
Source: item.Source,
Excerpt: item.Excerpt,
Message: item.Message,
Suggestion: item.Suggestion,
})
}
return out
}
func readManifestInput(path, kind, flag string) (manifest.Manifest, error) {
@@ -167,6 +220,9 @@ func filterPRDiagnostics(repo, changedFrom string, scope qdiff.Scope, m manifest
}
func prDiagnosticRelevant(repo string, changedFiles map[string]bool, commandScope diagnosticCommandScope, m manifest.Manifest, diag report.Diagnostic) bool {
if strings.HasPrefix(diag.Rule, "public_content_") {
return true
}
file := normalizeDiagnosticFile(repo, diag.File)
if file != "" && changedFiles[file] {
return true

View File

@@ -189,6 +189,99 @@ description: Manage Drive comments with service command references.
}
}
func TestRunCollectsPublicContentFindingsIntoDiagnosticsAndFacts(t *testing.T) {
repo := t.TempDir()
runGit(t, repo, "init")
runGit(t, repo, "config", "user.email", "test@example.com")
runGit(t, repo, "config", "user.name", "Test User")
if err := vfs.WriteFile(filepath.Join(repo, "README.md"), []byte("# test\n"), 0o644); err != nil {
t.Fatal(err)
}
runGit(t, repo, "add", "README.md")
runGit(t, repo, "commit", "-m", "base")
if err := vfs.MkdirAll(filepath.Join(repo, "docs"), 0o755); err != nil {
t.Fatal(err)
}
publicDoc := "api_" + "key = \"example-public-key\"\n" +
"Public docs describe a pri" + "vate request header and trust classification detail.\n"
if err := vfs.WriteFile(filepath.Join(repo, "docs", "public.md"), []byte(publicDoc), 0o644); err != nil {
t.Fatal(err)
}
runGit(t, repo, "add", "docs/public.md")
runGit(t, repo, "commit", "-m", "add public doc")
metadataPath := filepath.Join(repo, "pr-metadata.json")
if err := vfs.WriteFile(metadataPath, []byte(`{"title":"public docs","body":"Change`+`-Id: I0123456789abcdef0123456789abcdef01234567"}`), 0o644); err != nil {
t.Fatal(err)
}
manifestPath := filepath.Join(repo, "command-manifest.json")
indexPath := filepath.Join(repo, "command-index.json")
m := manifest.Manifest{SchemaVersion: 1, Commands: []manifest.Command{{
Path: "docs +fetch",
CanonicalPath: "docs +fetch",
Domain: "docs",
Source: manifest.SourceShortcut,
}}}
if err := manifest.WriteFile(manifestPath, manifest.KindCommandManifest, m); err != nil {
t.Fatal(err)
}
idx := manifest.Manifest{SchemaVersion: 1, Commands: append([]manifest.Command{}, m.Commands...)}
idx.Commands = append(idx.Commands, manifest.Command{
Path: "drive files get",
CanonicalPath: "drive files get",
Domain: "drive",
Source: manifest.SourceService,
Generated: true,
Runnable: true,
})
if err := manifest.WriteFile(indexPath, manifest.KindCommandIndex, idx); err != nil {
t.Fatal(err)
}
diags, gotFacts, err := Run(context.Background(), Options{
Repo: repo,
CLIBin: "./lark-cli",
ChangedFrom: "HEAD~1",
ManifestPath: manifestPath,
CommandIndexPath: indexPath,
PublicContentMetadataPath: metadataPath,
})
if err != nil {
t.Fatalf("Run() error = %v", err)
}
actions := map[string]report.Action{}
for _, diag := range diags {
actions[diag.Rule] = diag.Action
}
if actions["public_content_generic_credential"] != report.ActionReject {
t.Fatalf("generic credential diagnostic action = %q, diagnostics=%#v", actions["public_content_generic_credential"], diags)
}
if actions["public_content_change_id_trailer"] != report.ActionReject {
t.Fatalf("change-id diagnostic action = %q, diagnostics=%#v", actions["public_content_change_id_trailer"], diags)
}
if actions["public_content_semantic_candidate"] != "" {
t.Fatalf("semantic candidates should not become deterministic diagnostics: %#v", diags)
}
factRules := map[string]bool{}
for _, item := range gotFacts.PublicContent {
factRules[item.Rule] = true
}
for _, want := range []string{
"public_content_generic_credential",
"public_content_change_id_trailer",
"public_content_semantic_candidate",
} {
if !factRules[want] {
t.Fatalf("missing public content fact %s: %#v", want, gotFacts.PublicContent)
}
}
if len(gotFacts.PublicContent) < 3 {
t.Fatalf("public content facts = %#v", gotFacts.PublicContent)
}
}
func TestLoadBaseReferenceManifestReadsCommandGolden(t *testing.T) {
repo := t.TempDir()
runGit(t, repo, "init")
@@ -506,7 +599,7 @@ func TestNormalizeDiagnosticFileHandlesAbsoluteRepo(t *testing.T) {
func runGit(t *testing.T, repo string, args ...string) {
t.Helper()
cmd := exec.Command("git", append([]string{"-C", repo}, args...)...)
cmd := exec.Command("git", append([]string{"-c", "core.hooksPath=/dev/null", "-C", repo}, args...)...)
cmd.Env = append(os.Environ(), "GIT_AUTHOR_DATE=2026-06-17T00:00:00Z", "GIT_COMMITTER_DATE=2026-06-17T00:00:00Z")
out, err := cmd.CombinedOutput()
if err != nil {

View File

@@ -339,7 +339,7 @@ func jsonSchemaResponseFormat() map[string]any {
"properties": map[string]any{
"category": map[string]any{
"type": "string",
"enum": []string{"error_hint", "default_output", "naming", "skill_quality"},
"enum": []string{"error_hint", "default_output", "naming", "skill_quality", "public_content_leakage"},
},
"severity": map[string]any{
"type": "string",

View File

@@ -10,9 +10,10 @@ import (
"strings"
"github.com/larksuite/cli/internal/qualitygate/facts"
"github.com/larksuite/cli/internal/qualitygate/report"
)
var evidencePattern = regexp.MustCompile(`^facts\.(commands|skills|errors|outputs)\[(\d+)\]$`)
var evidencePattern = regexp.MustCompile(`^facts\.(commands|skills|errors|outputs|public_content)\[(\d+)\]$`)
func Decide(f facts.Facts, r Review, p Policy) Decision {
return DecideWithWaivers(f, r, p, Waivers{})
@@ -172,6 +173,16 @@ func evidenceFingerprint(f facts.Facts, ev string) string {
"has_default_limit:" + strconv.FormatBool(out.HasDefaultLimit),
"has_decision_field:" + strconv.FormatBool(out.HasDecisionField),
}, ":")
case "public_content":
item := f.PublicContent[idx]
return strings.Join([]string{
"public_content",
"rule:" + item.Rule,
"action:" + string(item.Action),
"file:" + item.File,
"line:" + strconv.Itoa(item.Line),
"source:" + item.Source,
}, ":")
default:
return "ref:" + ev
}
@@ -201,7 +212,7 @@ func validFinding(f Finding) bool {
func allowedCategory(category string) bool {
switch category {
case "error_hint", "default_output", "naming", "skill_quality":
case "error_hint", "default_output", "naming", "skill_quality", "public_content_leakage":
return true
default:
return false
@@ -247,6 +258,12 @@ func reproducibleEvidence(f facts.Facts, category, kind string, idx int) bool {
}
skill := f.Skills[idx]
return skill.ReferencesInvalidCommand
case "public_content_leakage":
if kind != "public_content" {
return false
}
item := f.PublicContent[idx]
return item.Action == report.ActionReject || item.Rule == "public_content_semantic_candidate"
default:
return false
}
@@ -277,6 +294,8 @@ func evidenceExists(f facts.Facts, kind string, idx int) bool {
return idx < len(f.Errors)
case "outputs":
return idx < len(f.Outputs)
case "public_content":
return idx < len(f.PublicContent)
default:
return false
}

View File

@@ -242,6 +242,7 @@ func TestGatekeeperBlockerMatrix(t *testing.T) {
Outputs: []facts.OutputFact{{Command: "im messages list", IsList: true, HasDefaultLimit: false, HasDecisionField: false}},
Commands: []facts.CommandFact{{Path: "docs fetch", NameConflictsExisting: true}},
Skills: []facts.SkillFact{{SourceFile: "skills/lark-doc/SKILL.md", Line: 3, ReferencesInvalidCommand: true}},
PublicContent: []facts.PublicContentFact{{Rule: "public_content_generic_credential", Action: "REJECT", File: "docs/public.md", Line: 4, Source: "metadata"}},
}
for _, tc := range []struct {
category string
@@ -251,6 +252,7 @@ func TestGatekeeperBlockerMatrix(t *testing.T) {
{"default_output", "facts.outputs[0]"},
{"naming", "facts.commands[0]"},
{"skill_quality", "facts.skills[0]"},
{"public_content_leakage", "facts.public_content[0]"},
} {
t.Run(tc.category, func(t *testing.T) {
r := Review{Findings: []Finding{{
@@ -268,6 +270,59 @@ func TestGatekeeperBlockerMatrix(t *testing.T) {
}
}
func TestGatekeeperDoesNotPromotePublicContentWarningsToBlockers(t *testing.T) {
f := facts.Facts{
SchemaVersion: 1,
PublicContent: []facts.PublicContentFact{{
Rule: "public_content_" + "pri" + "vate_ipv4",
Action: "WARNING",
File: "docs/network.md",
Line: 1,
Source: "file",
}},
}
review := Review{Findings: []Finding{{
Category: "public_content_leakage",
Severity: "minor",
Evidence: []string{"facts.public_content[0]"},
Message: "pri" + "vate network address appears in public docs",
SuggestedAction: "confirm the public docs do not expose pri" + "vate deployment details",
}}}
got := Decide(f, review, DefaultPolicy())
if len(got.Blockers) != 0 || len(got.Warnings) != 1 {
t.Fatalf("public content warning should not become a blocker: %#v", got)
}
if got.Warnings[0].ReviewAction != ReviewActionObserve {
t.Fatalf("review action = %q, want %q", got.Warnings[0].ReviewAction, ReviewActionObserve)
}
}
func TestGatekeeperAllowsPublicContentSemanticCandidatesAsBlockers(t *testing.T) {
f := facts.Facts{
SchemaVersion: 1,
PublicContent: []facts.PublicContentFact{{
Rule: "public_content_semantic_candidate",
Action: "WARNING",
File: "docs/public.md",
Line: 1,
Source: "file",
}},
}
review := Review{Findings: []Finding{{
Category: "public_content_leakage",
Severity: "major",
Evidence: []string{"facts.public_content[0]"},
Message: "semantic review found pri" + "vate rollout detail",
SuggestedAction: "remove pri" + "vate rollout detail from public docs",
}}}
got := Decide(f, review, DefaultPolicy())
if len(got.Blockers) != 1 {
t.Fatalf("semantic candidate should remain blockable, got %#v", got)
}
}
func TestGatekeeperSkillQualityOnlyBlocksInvalidCommandReferences(t *testing.T) {
f := facts.Facts{
SchemaVersion: 1,

View File

@@ -24,7 +24,7 @@ func BuildPrompt(f facts.Facts) []Message {
"Use only the provided JSON view.",
"The changed_summary may summarize broad changed surfaces; review only listed facts, not omitted summarized items.",
"Use fact_ref values exactly when writing finding evidence.",
"Only facts.commands, facts.skills, facts.errors, and facts.outputs fact_ref values may be blocker evidence.",
"Only facts.commands, facts.skills, facts.errors, facts.outputs, and facts.public_content fact_ref values may be blocker evidence.",
"Evidence entries must be exact fact_ref strings such as \"facts.commands[0]\" with no explanations, labels, or suffix text.",
"facts.examples and facts.skill_quality entries are context only.",
"Report an error_hint finding for any facts.errors item where boundary is true, required_hint is true, and hint_action_count is 0.",
@@ -38,6 +38,9 @@ func BuildPrompt(f facts.Facts) []Message {
"For naming findings, use category \"naming\" and evidence containing that facts.commands fact_ref.",
"Report a skill_quality finding for any facts.skills item where references_invalid_command is true.",
"For skill_quality findings, use category \"skill_quality\" and evidence containing that facts.skills fact_ref.",
"Review public content leakage findings and semantic candidates without private dictionaries.",
"Do not reveal internal rule lists when explaining public content leakage.",
"For public_content_leakage findings, preserve the deterministic finding source and excerpt.",
"Report each distinct issue as a separate finding.",
"The verdict value must be \"pass\" when findings is empty and \"warn\" when findings is non-empty; never use \"fail\".",
"Severity must be one of \"minor\", \"major\", or \"critical\"; never use \"error\", \"warning\", \"medium\", or \"high\".",

View File

@@ -23,7 +23,10 @@ func TestBuildPromptContainsSemanticReviewContract(t *testing.T) {
"A facts.outputs item with is_list true, has_default_limit false, and has_decision_field true must still produce a default_output finding.",
"Report a naming finding for any facts.commands item where name_conflicts_existing is true or flag_alias_conflict is true.",
"Report a skill_quality finding for any facts.skills item where references_invalid_command is true.",
"Only facts.commands, facts.skills, facts.errors, and facts.outputs fact_ref values may be blocker evidence.",
"Review public content leakage findings and semantic candidates without private dictionaries.",
"Do not reveal internal rule lists when explaining public content leakage.",
"For public_content_leakage findings, preserve the deterministic finding source and excerpt.",
"Only facts.commands, facts.skills, facts.errors, facts.outputs, and facts.public_content fact_ref values may be blocker evidence.",
"Evidence entries must be exact fact_ref strings such as \"facts.commands[0]\" with no explanations, labels, or suffix text.",
"facts.examples and facts.skill_quality entries are context only.",
"Report each distinct issue as a separate finding.",

View File

@@ -78,11 +78,11 @@ func DefaultPolicy() Policy {
return Policy{
SchemaVersion: 1,
DefaultEnforcement: "observe",
BlockCategories: []string{"error_hint", "default_output", "naming", "skill_quality"},
BlockCategories: []string{"error_hint", "default_output", "naming", "skill_quality", "public_content_leakage"},
RolloutGroups: []RolloutGroup{{
ID: "all",
Enforcement: "blocking",
Categories: []string{"error_hint", "default_output", "naming", "skill_quality"},
Categories: []string{"error_hint", "default_output", "naming", "skill_quality", "public_content_leakage"},
Owner: "test",
Reason: "default in-memory policy",
}},

View File

@@ -82,6 +82,15 @@ func factScope(f facts.Facts, kind string, idx int) (FactScope, bool) {
Source: item.Source,
CommandPath: item.Command,
}, true
case "public_content":
item := f.PublicContent[idx]
return FactScope{
FactKind: "public_content",
Changed: true,
Source: item.Source,
SourceFile: item.File,
Line: item.Line,
}, true
default:
return FactScope{}, false
}
@@ -195,7 +204,7 @@ func containsString(values []string, want string) bool {
func allowedFactKind(kind string) bool {
switch kind {
case "skill", "command", "error", "output":
case "skill", "command", "error", "output", "public_content":
return true
default:
return false

View File

@@ -81,6 +81,30 @@ func TestGatekeeperSkillQualityUsesSkillEvidence(t *testing.T) {
}
}
func TestGatekeeperUsesPublicContentEvidence(t *testing.T) {
f := facts.Facts{
SchemaVersion: 1,
PublicContent: []facts.PublicContentFact{{
Rule: "public_content_generic_credential",
Action: "REJECT",
File: "docs/public.md",
Line: 12,
Source: "metadata",
}},
}
review := Review{Findings: []Finding{{
Category: "public_content_leakage",
Severity: "critical",
Evidence: []string{"facts.public_content[0]"},
Message: "public content finding needs review",
SuggestedAction: "remove the sensitive public content",
}}}
got := Decide(f, review, DefaultPolicy())
if len(got.Blockers) != 1 || got.Blockers[0].RolloutGroups[0] != "all" {
t.Fatalf("expected public content blocker, got %#v", got)
}
}
func TestGatekeeperAppliesSharedWaiverID(t *testing.T) {
f := facts.Facts{
SchemaVersion: 1,

View File

@@ -13,27 +13,29 @@ import (
)
type InputView struct {
SchemaVersion int `json:"schema_version"`
ChangedSummary ChangedSummary `json:"changed_summary"`
RuleSummary []RuleSummaryItem `json:"rule_summary,omitempty"`
Commands []CommandInput `json:"commands,omitempty"`
Skills []SkillInput `json:"skills,omitempty"`
SkillQuality []SkillQualityInput `json:"skill_quality,omitempty"`
Errors []ErrorInput `json:"errors,omitempty"`
Outputs []OutputInput `json:"outputs,omitempty"`
Examples []ExampleInput `json:"examples,omitempty"`
Diagnostics []facts.DiagnosticFact `json:"diagnostics,omitempty"`
SchemaVersion int `json:"schema_version"`
ChangedSummary ChangedSummary `json:"changed_summary"`
RuleSummary []RuleSummaryItem `json:"rule_summary,omitempty"`
Commands []CommandInput `json:"commands,omitempty"`
Skills []SkillInput `json:"skills,omitempty"`
SkillQuality []SkillQualityInput `json:"skill_quality,omitempty"`
Errors []ErrorInput `json:"errors,omitempty"`
Outputs []OutputInput `json:"outputs,omitempty"`
Examples []ExampleInput `json:"examples,omitempty"`
PublicContentLeakage []PublicContentInput `json:"public_content_leakage,omitempty"`
Diagnostics []facts.DiagnosticFact `json:"diagnostics,omitempty"`
}
type ChangedSummary struct {
Commands int `json:"commands,omitempty"`
Skills int `json:"skills,omitempty"`
SkillQuality int `json:"skill_quality,omitempty"`
Errors int `json:"errors,omitempty"`
Outputs int `json:"outputs,omitempty"`
Examples int `json:"examples,omitempty"`
Domains []string `json:"domains,omitempty"`
Sources []string `json:"sources,omitempty"`
Commands int `json:"commands,omitempty"`
Skills int `json:"skills,omitempty"`
SkillQuality int `json:"skill_quality,omitempty"`
Errors int `json:"errors,omitempty"`
Outputs int `json:"outputs,omitempty"`
Examples int `json:"examples,omitempty"`
PublicContent int `json:"public_content,omitempty"`
Domains []string `json:"domains,omitempty"`
Sources []string `json:"sources,omitempty"`
}
type RuleSummaryItem struct {
@@ -86,6 +88,22 @@ type ExampleInput struct {
facts.CommandExample
}
type PublicContentInput struct {
FactRef string `json:"fact_ref"`
facts.PublicContentFact
}
func (v InputView) HasReviewableFacts() bool {
return len(v.Commands) > 0 ||
len(v.Skills) > 0 ||
len(v.SkillQuality) > 0 ||
len(v.Errors) > 0 ||
len(v.Outputs) > 0 ||
len(v.Examples) > 0 ||
len(v.PublicContentLeakage) > 0 ||
len(v.Diagnostics) > 0
}
func BuildInputView(f facts.Facts) InputView {
selected := newInputSelection(f)
selected.addChangedReviewCandidates()
@@ -104,16 +122,17 @@ func BuildInputView(f facts.Facts) InputView {
}
return InputView{
SchemaVersion: f.SchemaVersion,
ChangedSummary: changedSummary(f),
RuleSummary: ruleSummary(f.Diagnostics),
Commands: selected.commandInputs(),
Skills: selected.skillInputs(),
SkillQuality: selected.skillQualityInputs(),
Errors: selected.errorInputs(),
Outputs: selected.outputInputs(),
Examples: selected.exampleInputs(),
Diagnostics: viewDiagnostics,
SchemaVersion: f.SchemaVersion,
ChangedSummary: changedSummary(f),
RuleSummary: ruleSummary(f.Diagnostics),
Commands: selected.commandInputs(),
Skills: selected.skillInputs(),
SkillQuality: selected.skillQualityInputs(),
Errors: selected.errorInputs(),
Outputs: selected.outputInputs(),
Examples: selected.exampleInputs(),
PublicContentLeakage: selected.publicContentInputs(),
Diagnostics: viewDiagnostics,
}
}
@@ -138,6 +157,11 @@ func (s *inputSelection) addChangedReviewCandidates() {
s.outputs[i] = true
}
}
for i, item := range s.f.PublicContent {
if publicContentReviewCandidate(item) {
s.publicContent[i] = true
}
}
}
func commandReviewCandidate(cmd facts.CommandFact) bool {
@@ -157,25 +181,31 @@ func outputReviewCandidate(_ facts.OutputFact) bool {
return false
}
func publicContentReviewCandidate(item facts.PublicContentFact) bool {
return item.Rule == "public_content_semantic_candidate"
}
type inputSelection struct {
f facts.Facts
commands []bool
skills []bool
skillQuality []bool
errors []bool
outputs []bool
examples []bool
f facts.Facts
commands []bool
skills []bool
skillQuality []bool
errors []bool
outputs []bool
examples []bool
publicContent []bool
}
func newInputSelection(f facts.Facts) *inputSelection {
return &inputSelection{
f: f,
commands: make([]bool, len(f.Commands)),
skills: make([]bool, len(f.Skills)),
skillQuality: make([]bool, len(f.SkillQuality)),
errors: make([]bool, len(f.Errors)),
outputs: make([]bool, len(f.Outputs)),
examples: make([]bool, len(f.Examples)),
f: f,
commands: make([]bool, len(f.Commands)),
skills: make([]bool, len(f.Skills)),
skillQuality: make([]bool, len(f.SkillQuality)),
errors: make([]bool, len(f.Errors)),
outputs: make([]bool, len(f.Outputs)),
examples: make([]bool, len(f.Examples)),
publicContent: make([]bool, len(f.PublicContent)),
}
}
@@ -194,6 +224,8 @@ func (s *inputSelection) diagnosticContext(diag facts.DiagnosticFact) *inputSele
s.addDiagnosticExamples(out, diag)
case diag.Rule == "no_bare_helper_error":
s.addDiagnosticErrors(out, diag)
case strings.HasPrefix(diag.Rule, "public_content_"):
s.addDiagnosticPublicContent(out, diag)
}
return out
}
@@ -256,6 +288,15 @@ func (s *inputSelection) addDiagnosticExamples(out *inputSelection, diag facts.D
}
}
func (s *inputSelection) addDiagnosticPublicContent(out *inputSelection, diag facts.DiagnosticFact) {
for i, item := range s.f.PublicContent {
if diagnosticLocationMatches(diag.File, diag.Line, item.File, item.Line) ||
diag.Rule == item.Rule {
out.publicContent[i] = true
}
}
}
func includeDiagnosticInView(diag facts.DiagnosticFact, selected, context *inputSelection) bool {
if diag.Action == report.ActionReject {
return true
@@ -270,6 +311,7 @@ func (s *inputSelection) merge(other *inputSelection) {
mergeSelections(s.errors, other.errors)
mergeSelections(s.outputs, other.outputs)
mergeSelections(s.examples, other.examples)
mergeSelections(s.publicContent, other.publicContent)
}
func (s *inputSelection) intersects(other *inputSelection) bool {
@@ -278,7 +320,8 @@ func (s *inputSelection) intersects(other *inputSelection) bool {
selectionsIntersect(s.skillQuality, other.skillQuality) ||
selectionsIntersect(s.errors, other.errors) ||
selectionsIntersect(s.outputs, other.outputs) ||
selectionsIntersect(s.examples, other.examples)
selectionsIntersect(s.examples, other.examples) ||
selectionsIntersect(s.publicContent, other.publicContent)
}
func (s *inputSelection) commandInputs() []CommandInput {
@@ -351,6 +394,16 @@ func (s *inputSelection) exampleInputs() []ExampleInput {
return out
}
func (s *inputSelection) publicContentInputs() []PublicContentInput {
out := make([]PublicContentInput, 0, countSelected(s.publicContent))
for i, ok := range s.publicContent {
if ok {
out = append(out, PublicContentInput{FactRef: factRef("public_content", i), PublicContentFact: s.f.PublicContent[i]})
}
}
return out
}
func changedSummary(f facts.Facts) ChangedSummary {
domains := map[string]bool{}
sources := map[string]bool{}
@@ -402,6 +455,10 @@ func changedSummary(f facts.Facts) ChangedSummary {
addNonEmpty(domains, example.Domain)
addNonEmpty(sources, example.Source)
}
for _, item := range f.PublicContent {
out.PublicContent++
addNonEmpty(sources, item.Source)
}
out.Domains = sortedViewSetKeys(domains)
out.Sources = sortedViewSetKeys(sources)
return out
@@ -434,7 +491,8 @@ func semanticDiagnosticRule(rule string) bool {
strings.HasPrefix(rule, "default_output") ||
strings.HasPrefix(rule, "skill_") ||
strings.HasPrefix(rule, "example_dry_run") ||
rule == "no_bare_helper_error"
rule == "no_bare_helper_error" ||
strings.HasPrefix(rule, "public_content_")
}
func diagnosticCommandMatches(diag facts.DiagnosticFact, values ...string) bool {

View File

@@ -77,6 +77,122 @@ func TestInputViewKeepsChangedReviewCandidatesWithOriginalRefs(t *testing.T) {
}
}
func TestInputViewIncludesPublicContentLeakage(t *testing.T) {
f := facts.Facts{
SchemaVersion: 1,
PublicContent: []facts.PublicContentFact{{
Rule: "public_content_generic_credential",
Action: report.ActionReject,
File: "docs/public.md",
Line: 4,
Excerpt: "api_key = <redacted>",
Message: "generic credential assignment",
}},
Diagnostics: []facts.DiagnosticFact{{
Rule: "public_content_generic_credential",
Action: report.ActionReject,
File: "docs/public.md",
Line: 4,
Message: "generic credential assignment",
}},
}
view := BuildInputView(f)
if len(view.PublicContentLeakage) != 1 {
t.Fatalf("public content leakage len = %d, want 1", len(view.PublicContentLeakage))
}
if got := view.PublicContentLeakage[0].FactRef; got != "facts.public_content[0]" {
t.Fatalf("public content fact ref = %q", got)
}
if len(view.Diagnostics) != 1 {
t.Fatalf("diagnostics len = %d, want 1", len(view.Diagnostics))
}
}
func TestInputViewIncludesPublicContentSemanticCandidatesWithoutDiagnostics(t *testing.T) {
f := facts.Facts{
SchemaVersion: 1,
PublicContent: []facts.PublicContentFact{{
Rule: "public_content_semantic_candidate",
Action: report.ActionWarning,
File: "docs/public.md",
Line: 1,
Source: "file",
Excerpt: "public prose that needs semantic review",
Message: "public contribution contains text for semantic public content review",
}},
}
view := BuildInputView(f)
if len(view.PublicContentLeakage) != 1 {
t.Fatalf("semantic candidate len = %d, want 1", len(view.PublicContentLeakage))
}
if got := view.PublicContentLeakage[0].FactRef; got != "facts.public_content[0]" {
t.Fatalf("semantic candidate fact ref = %q", got)
}
if len(view.Diagnostics) != 0 {
t.Fatalf("semantic candidate should not require diagnostics, got %#v", view.Diagnostics)
}
}
func TestPromptIncludesSanitizedPublicContentExcerpt(t *testing.T) {
scopeText := "pri" + "vate rollout"
f := facts.Facts{
SchemaVersion: 1,
PublicContent: []facts.PublicContentFact{{
Rule: "public_content_semantic_candidate",
Action: report.ActionWarning,
File: "docs/public.md",
Line: 1,
Source: "file",
Excerpt: `semantic signals: pri` + `vate_scope,roadmap_detail; excerpt: "` + scopeText + ` token=<redacted>"`,
Message: "public contribution contains text for semantic public content review",
}},
}
view := BuildInputView(f)
if len(view.PublicContentLeakage) != 1 {
t.Fatalf("semantic candidate len = %d, want 1", len(view.PublicContentLeakage))
}
if got := view.PublicContentLeakage[0].Excerpt; !strings.Contains(got, scopeText) || !strings.Contains(got, "token=<redacted>") {
t.Fatalf("semantic candidate excerpt missing from view: %q", got)
}
messages := BuildPrompt(f)
if len(messages) != 2 {
t.Fatalf("messages len = %d, want 2", len(messages))
}
if !strings.Contains(messages[1].Content, scopeText) || !strings.Contains(messages[1].Content, "redacted") {
t.Fatalf("prompt missing sanitized public content excerpt: %s", messages[1].Content)
}
if strings.Contains(messages[1].Content, "real-"+"secret-value") {
t.Fatalf("prompt leaked raw sensitive value %q", messages[1].Content)
}
}
func TestInputViewExcludesPublicContentWarningsWithoutSemanticCandidate(t *testing.T) {
f := facts.Facts{
SchemaVersion: 1,
PublicContent: []facts.PublicContentFact{{
Rule: "public_content_" + "pri" + "vate_ipv4",
Action: report.ActionWarning,
File: "docs/network.md",
Line: 1,
Source: "file",
Excerpt: "192.168." + "0.10",
Message: "public contribution contains a pri" + "vate-network IP address",
}},
}
view := BuildInputView(f)
if len(view.PublicContentLeakage) != 0 {
t.Fatalf("warning-only public content should not enter semantic view: %#v", view.PublicContentLeakage)
}
if len(view.Diagnostics) != 0 {
t.Fatalf("warning-only public content should not add diagnostics: %#v", view.Diagnostics)
}
}
func TestInputViewSummarizesBroadChangedCommandSurface(t *testing.T) {
f := broadChangedFacts(434, 44)

View File

@@ -138,6 +138,10 @@ func parseWaiver(parts []string, lineNo int) (Waiver, error) {
if item.SourceFile == "" || item.Line == 0 {
return Waiver{}, fmt.Errorf("%s:%d: %s waiver requires source_file and line", waiverPath, lineNo, item.FactKind)
}
case "public_content":
if item.SourceFile == "" || item.Line == 0 || item.CommandPath != "" {
return Waiver{}, fmt.Errorf("%s:%d: public_content waiver requires source_file and line only", waiverPath, lineNo)
}
case "command", "output":
if item.CommandPath == "" {
return Waiver{}, fmt.Errorf("%s:%d: %s waiver requires command_path", waiverPath, lineNo, item.FactKind)

View File

@@ -21,24 +21,27 @@ func TestLoadWaivers(t *testing.T) {
writeSemanticFile(t, repo, "waivers.txt", "# waiver_id\tcategory\tfact_kind\tsource_file\tline\tcommand_path\towner\treason\tadded_at\texpires_at\n"+
"wiki-move-202606\tskill_quality\tskill\tskills/lark-wiki/SKILL.md\t30\t\twiki-owner\tmigration\t2026-06-08\t2026-07-15\n"+
"wiki-move-202606\tskill_quality\tskill\tskills/lark-wiki/references/move.md\t12\t\twiki-owner\tmigration\t2026-06-08\t2026-07-15\n")
"wiki-move-202606\tskill_quality\tskill\tskills/lark-wiki/references/move.md\t12\t\twiki-owner\tmigration\t2026-06-08\t2026-07-15\n"+
"public-doc-202606\tpublic_content_leakage\tpublic_content\tdocs/public.md\t4\t\tsecurity-owner\treviewed false positive\t2026-06-08\t2026-07-15\n")
w, diags, err = LoadWaivers(repo, now)
if err != nil {
t.Fatalf("LoadWaivers() error = %v", err)
}
if len(diags) != 0 || len(w.Items) != 2 {
if len(diags) != 0 || len(w.Items) != 3 {
t.Fatalf("LoadWaivers() = %#v %#v", w, diags)
}
for name, body := range map[string]string{
"bad columns": "one\ttoo-few\n",
"bad id": "BAD\terror_hint\terror\tcmd/root.go\t1\t\to\tr\t2026-06-08\t2026-07-15\n",
"bad fact kind": "id1\terror_hint\tskill_quality\tcmd/root.go\t1\t\to\tr\t2026-06-08\t2026-07-15\n",
"missing owner": "id1\terror_hint\terror\tcmd/root.go\t1\t\t\tr\t2026-06-08\t2026-07-15\n",
"missing line": "id1\terror_hint\terror\tcmd/root.go\t\t\to\tr\t2026-06-08\t2026-07-15\n",
"missing command": "id1\tdefault_output\toutput\t\t\t\to\tr\t2026-06-08\t2026-07-15\n",
"bad source path": "id1\terror_hint\terror\t../cmd/root.go\t1\t\to\tr\t2026-06-08\t2026-07-15\n",
"bad date format": "id1\terror_hint\terror\tcmd/root.go\t1\t\to\tr\t20260608\t2026-07-15\n",
"bad columns": "one\ttoo-few\n",
"bad id": "BAD\terror_hint\terror\tcmd/root.go\t1\t\to\tr\t2026-06-08\t2026-07-15\n",
"bad fact kind": "id1\terror_hint\tskill_quality\tcmd/root.go\t1\t\to\tr\t2026-06-08\t2026-07-15\n",
"missing owner": "id1\terror_hint\terror\tcmd/root.go\t1\t\t\tr\t2026-06-08\t2026-07-15\n",
"missing line": "id1\terror_hint\terror\tcmd/root.go\t\t\to\tr\t2026-06-08\t2026-07-15\n",
"missing command": "id1\tdefault_output\toutput\t\t\t\to\tr\t2026-06-08\t2026-07-15\n",
"public content missing line": "id1\tpublic_content_leakage\tpublic_content\tdocs/public.md\t\t\to\tr\t2026-06-08\t2026-07-15\n",
"public content command selector": "id1\tpublic_content_leakage\tpublic_content\t\t\tcmd/foo\to\tr\t2026-06-08\t2026-07-15\n",
"bad source path": "id1\terror_hint\terror\t../cmd/root.go\t1\t\to\tr\t2026-06-08\t2026-07-15\n",
"bad date format": "id1\terror_hint\terror\tcmd/root.go\t1\t\to\tr\t20260608\t2026-07-15\n",
} {
t.Run(name, func(t *testing.T) {
writeSemanticFile(t, repo, "waivers.txt", body)

View File

@@ -45,6 +45,10 @@ async function publishTargetStillCurrent(github, context, core, target, phase =
repo: context.repo.repo,
pull_number: target.pr,
});
if (pr.state !== "open") {
core.notice(`PR quality summary skipped: PR is no longer open before ${phase}`);
return false;
}
if (pr.head.sha !== target.headSha) {
core.notice(`PR quality summary skipped: PR head changed before ${phase}`);
return false;

View File

@@ -152,6 +152,25 @@ describe("ci-quality-summary-publish", () => {
});
});
it("does not publish a summary when the PR closes before comment creation", async () => {
await withPublishTempDir(async ({ calls }) => {
await publish({
github: fakeGithub(calls, {
jobs: [{ name: "unit-test", conclusion: "failure", html_url: "https://github.example/jobs/1" }],
pullResponses: [
currentPullResponse(),
currentPullResponse({ state: "closed" }),
],
}),
context: workflowRunContext({ conclusion: "failure" }),
core: silentCore(calls),
});
assert.equal(calls.comments.length, 0);
assert.match(calls.notices.join("\n"), /PR is no longer open/);
});
});
it("does not delete an existing summary when the PR base changes before cleanup", async () => {
await withPublishTempDir(async ({ calls }) => {
await publish({
@@ -338,6 +357,7 @@ function fakeGithub(calls, options = {}) {
function currentPullResponse(overrides = {}) {
return {
data: {
state: overrides.state || "open",
head: { sha: overrides.headSha || process.env.CI_QUALITY_SUMMARY_HEAD_SHA },
base: {
sha: overrides.baseSha || process.env.CI_QUALITY_SUMMARY_BASE_SHA,

View File

@@ -5,26 +5,42 @@
set -euo pipefail
workflow=".github/workflows/ci.yml"
job_section() {
local job="$1"
awk -v job="$job" '
$0 == " " job ":" { in_job = 1; print; next }
in_job && /^ [A-Za-z0-9_-]+:/ { exit }
in_job { print }
' "$workflow"
}
workflow_permissions="$(awk '
/^permissions:/ { in_permissions = 1; print; next }
in_permissions && /^[^[:space:]]/ { exit }
in_permissions { print }
' "$workflow")"
fast_gate_section="$(job_section fast-gate)"
unit_test_section="$(job_section unit-test)"
lint_section="$(awk '
/^ lint:/ { in_job = 1 }
in_job { print }
/^ deterministic-gate:/ { exit }
/^ script-test:/ { exit }
' "$workflow")"
script_test_section="$(job_section script-test)"
deterministic_section="$(awk '
/^ deterministic-gate:/ { in_job = 1 }
in_job { print }
/^ coverage:/ { exit }
' "$workflow")"
coverage_job_section="$(job_section coverage)"
deadcode_section="$(job_section deadcode)"
dry_run_section="$(job_section e2e-dry-run)"
section="$(awk '
/^ e2e-live:/ { in_job = 1 }
in_job { print }
/^ security:/ { exit }
' "$workflow")"
security_section="$(job_section security)"
license_header_section="$(job_section license-header)"
results_section="$(awk '
/^ results:/ { in_job = 1 }
in_job { print }
@@ -98,13 +114,94 @@ if ! grep -Fq "make quality-gate" <<<"$deterministic_section"; then
exit 1
fi
if ! grep -Fq "Write public content metadata" <<<"$deterministic_section"; then
echo "deterministic-gate should write PR title/body metadata before quality-gate"
exit 1
fi
if ! grep -Fq "types: [opened, synchronize, reopened, edited]" "$workflow"; then
echo "CI pull_request trigger should include edited so PR title/body changes are rescanned"
exit 1
fi
if ! grep -Fq "script-test:" <<<"$script_test_section"; then
echo "CI should run make script-test so workflow and publisher contract tests are not local-only"
exit 1
fi
if ! grep -Fq "make script-test" <<<"$script_test_section"; then
echo "script-test job should invoke make script-test"
exit 1
fi
if ! grep -Fq "actions/setup-node" <<<"$script_test_section"; then
echo "script-test job should install Node for JavaScript workflow tests"
exit 1
fi
if grep -Fq '${{ secrets.' <<<"$script_test_section"; then
echo "script-test must not reference secrets"
exit 1
fi
if grep -Fq "metadata-gate:" "$workflow"; then
echo "metadata-gate should not run alongside deterministic-gate because both would upload the same facts artifact"
exit 1
fi
if grep -Fq "github.event.action != 'edited'" <<<"$fast_gate_section"; then
echo "fast-gate must run on pull_request edited events so title/body edits cannot replace failed CI with a light success"
exit 1
fi
for full_job in \
"$unit_test_section" \
"$lint_section" \
"$script_test_section" \
"$deterministic_section" \
"$coverage_job_section" \
"$dry_run_section" \
"$security_section"; do
if grep -Fq "github.event.action != 'edited'" <<<"$full_job"; then
echo "full CI jobs must run on pull_request edited events; do not skip title/body-only edits"
exit 1
fi
done
for pull_request_job in "$deadcode_section" "$license_header_section"; do
if grep -Fq "github.event.action != 'edited'" <<<"$pull_request_job"; then
echo "pull_request-only CI jobs must run on edited events"
exit 1
fi
done
if grep -Fq '${{ secrets.' <<<"$deterministic_section"; then
echo "deterministic-gate must not reference secrets"
exit 1
fi
if ! grep -Fq "PUBLIC_CONTENT_METADATA=" <<<"$deterministic_section"; then
echo "deterministic-gate should pass public content metadata into make quality-gate"
exit 1
fi
if ! grep -Fq "PR_BRANCH:" <<<"$deterministic_section"; then
echo "deterministic-gate should pass the pull request branch into public content metadata"
exit 1
fi
if ! grep -Fq "name: quality-gate-facts-\${{ github.event.pull_request.base.sha }}-\${{ github.event.pull_request.head.sha }}" <<<"$deterministic_section"; then
echo "deterministic-gate should upload base/head-bound quality-gate-facts for semantic review"
exit 1
fi
if ! grep -Fq "needs: [unit-test, lint, deterministic-gate]" "$workflow"; then
echo "E2E jobs should wait for deterministic-gate"
if ! grep -Fq "needs: [unit-test, lint, script-test, deterministic-gate]" "$workflow"; then
echo "E2E jobs should wait for script-test and deterministic-gate"
exit 1
fi
if ! grep -Fq "script-test" <<<"$results_section"; then
echo "results job should include script-test"
exit 1
fi
@@ -210,6 +307,11 @@ if ! grep -Fq "go run ./internal/qualitygate/cmd/manifest-export" <<<"$make_outp
exit 1
fi
if ! grep -Fq -- "--public-content-metadata .tmp/quality-gate/public-content-metadata.json" <<<"$make_output"; then
echo "quality-gate check should consume public content metadata"
exit 1
fi
if ! grep -Fq -- "--manifest .tmp/quality-gate/command-manifest.json" <<<"$make_output" ||
! grep -Fq -- "--command-index .tmp/quality-gate/command-index.json" <<<"$make_output"; then
echo "quality-gate check should consume both exported command snapshots"

View File

@@ -175,7 +175,7 @@ function inlineCode(value) {
}
function parseEvidenceRef(ref) {
const match = /^facts\.(commands|skills|errors|outputs)\[(\d+)\]$/.exec(String(ref || ""));
const match = /^facts\.(commands|skills|errors|outputs|public_content)\[(\d+)\]$/.exec(String(ref || ""));
if (!match) {
return null;
}
@@ -230,6 +230,20 @@ function evidenceLocation(facts, ref) {
return { kind: parsed.kind, command: item.path, label: item.path };
}
return null;
case "public_content":
if (item.file && Number.isInteger(item.line) && item.line > 0) {
const label = `${item.file}:${item.line}`;
if (item.file === "branch" || item.file === "pull_request_metadata" || String(item.file).startsWith("commit:")) {
return { kind: parsed.kind, label };
}
return {
kind: parsed.kind,
path: item.file,
line: item.line,
label,
};
}
return null;
default:
return null;
}
@@ -845,6 +859,10 @@ async function publishTargetStillCurrent(github, context, core, target, phase =
repo: context.repo.repo,
pull_number: target.pr,
});
if (pr.state !== "open") {
core.notice(`semantic review skipped: PR is no longer open before ${phase}`);
return false;
}
if (pr.head.sha !== target.headSha) {
core.notice(`semantic review skipped: PR head changed before ${phase}`);
return false;

View File

@@ -202,6 +202,100 @@ describe("semantic-review-publish", () => {
assert.equal(selectInlineTarget({ evidence: ["facts.errors[0]"] }, facts, changedLineIndex), null);
});
it("maps public content evidence to changed files but not virtual metadata", () => {
const restrictedScope = "pri" + "vate";
const facts = {
public_content: [
{
rule: "public_content_semantic_candidate",
action: "WARNING",
file: "docs/public-roadmap.md",
line: 4,
source: "file",
},
{
rule: "public_content_semantic_candidate",
action: "WARNING",
file: "pull_request_metadata",
line: 1,
source: "metadata",
},
{
rule: "public_content_automation_branch",
action: "WARNING",
file: "branch",
line: 1,
source: "branch",
},
{
rule: "public_content_change_id_trailer",
action: "REJECT",
file: "commit:1234abc",
line: 3,
source: "commit",
},
],
};
const changedLineIndex = buildChangedLineIndex([{
filename: "docs/public-roadmap.md",
patch: [
"@@ -3,2 +3,3 @@",
" context",
"+Specific " + restrictedScope + " roadmap detail",
].join("\n"),
}]);
assert.deepEqual(
selectInlineTarget({ evidence: ["facts.public_content[0]"] }, facts, changedLineIndex),
{ path: "docs/public-roadmap.md", line: 4 },
);
assert.equal(selectInlineTarget({ evidence: ["facts.public_content[1]"] }, facts, changedLineIndex), null);
assert.equal(selectInlineTarget({ evidence: ["facts.public_content[2]"] }, facts, changedLineIndex), null);
assert.equal(selectInlineTarget({ evidence: ["facts.public_content[3]"] }, facts, changedLineIndex), null);
const markdown = buildSummaryMarkdown({
block_mode: true,
blockers: [{
category: "public_content_leakage",
severity: "major",
review_action: "must_fix",
evidence: ["facts.public_content[1]"],
fingerprint: "public-content-metadata",
message: "PR metadata contains " + restrictedScope + " rollout detail",
suggested_action: "Move " + restrictedScope + " detail to an internal channel.",
}],
warnings: [],
}, facts);
assert.match(markdown, /pull_request_metadata:1/);
const virtualMarkdown = buildSummaryMarkdown({
block_mode: true,
blockers: [
{
category: "public_content_leakage",
severity: "major",
review_action: "must_fix",
evidence: ["facts.public_content[2]"],
fingerprint: "public-content-branch",
message: "Branch name looks automation-owned.",
suggested_action: "Use a maintainer-owned public branch name.",
},
{
category: "public_content_leakage",
severity: "major",
review_action: "must_fix",
evidence: ["facts.public_content[3]"],
fingerprint: "public-content-commit",
message: "Commit trailer contains " + restrictedScope + " review metadata.",
suggested_action: "Remove " + restrictedScope + " review metadata from commits.",
},
],
warnings: [],
}, facts);
assert.match(virtualMarkdown, /branch:1/);
assert.match(virtualMarkdown, /commit:1234abc:3/);
});
it("builds finding markers from stable fingerprints and evidence identity", () => {
const factsA = {
skills: [{
@@ -615,6 +709,35 @@ describe("semantic-review-publish", () => {
});
});
it("skips publishing when the PR closes after verification", async () => {
await withPublishTempDir(async ({ calls }) => {
fs.writeFileSync("decision.json", JSON.stringify({
block_mode: true,
blockers: [],
warnings: [],
}), "utf8");
await publish({
github: fakeGithub(calls, {
currentPullRequest: {
state: "closed",
head: { sha: "0123456789abcdef0123456789abcdef01234567" },
base: {
sha: "fedcba9876543210fedcba9876543210fedcba98",
repo: { id: 123 },
},
},
}),
context: workflowRunContext(),
core: silentCore(calls),
});
assert.equal(calls.checks.length, 0);
assert.equal(calls.comments.length, 0);
assert.match(calls.notices[0], /PR is no longer open before publishing/);
});
});
it("rejects publishing when the PR base repo changed after verification", async () => {
await withPublishTempDir(async ({ calls }) => {
fs.writeFileSync("decision.json", JSON.stringify({
@@ -2223,8 +2346,8 @@ function fakeGithub(calls, options = {}) {
},
},
pulls: {
get: async () => ({
data: Array.isArray(options.currentPullRequests)
get: async () => {
const pull = Array.isArray(options.currentPullRequests)
? options.currentPullRequests[Math.min(pullGetCount++, options.currentPullRequests.length - 1)]
: options.currentPullRequest || {
head: { sha: process.env.SEMANTIC_REVIEW_HEAD_SHA },
@@ -2232,8 +2355,9 @@ function fakeGithub(calls, options = {}) {
sha: process.env.SEMANTIC_REVIEW_BASE_SHA,
repo: { id: 123 },
},
},
}),
};
return { data: { state: "open", ...pull } };
},
listFiles() {},
listReviewComments() {},
createReviewComment: async (args) => {

View File

@@ -229,6 +229,36 @@ function requireSafePath(value, path) {
return file;
}
function requirePublicContentFile(value, path) {
const file = requireString(value, path);
if (file === "branch" || file === "pull_request_metadata" || /^commit:[0-9a-f]{7,40}$/.test(file)) {
return file;
}
if (file.startsWith("commit:")) {
throw new Error(`facts JSON ${path} must be a valid public content location`);
}
requireSafePath(file, path);
if (
file === "" ||
file === "." ||
file.startsWith("./") ||
file.includes("\\") ||
file.includes("\0") ||
file.split("/").includes(".git") ||
/^[A-Za-z][A-Za-z0-9+.-]*:/.test(file)
) {
throw new Error(`facts JSON ${path} must be a repository-relative path`);
}
return file;
}
function requirePositiveLine(value, path) {
requireLine(value, path);
if (value === 0) {
throw new Error(`facts JSON ${path} must be a positive line number`);
}
}
function requireStringArray(value, path, { optional = false } = {}) {
if (value === undefined || value === null) {
if (optional) {
@@ -421,6 +451,20 @@ function verifyFactsJSON(data) {
for (const [i, value] of requireArray(facts, "examples").entries()) {
verifyCommandExample(value, `examples[${i}]`);
}
for (const [i, value] of requireArray(facts, "public_content").entries()) {
const item = requireObject(value, `public_content[${i}]`);
requireString(item.rule, `public_content[${i}].rule`);
const action = requireString(item.action, `public_content[${i}].action`);
if (!VALID_ACTIONS.has(action)) {
throw new Error(`facts JSON public_content[${i}].action is invalid`);
}
requirePublicContentFile(item.file, `public_content[${i}].file`);
requirePositiveLine(item.line, `public_content[${i}].line`);
requireString(item.source, `public_content[${i}].source`, { optional: true });
requireString(item.excerpt, `public_content[${i}].excerpt`, { optional: true });
requireString(item.message, `public_content[${i}].message`, { optional: true });
requireString(item.suggestion, `public_content[${i}].suggestion`, { optional: true });
}
for (const [i, value] of requireArray(facts, "diagnostics").entries()) {
const item = requireObject(value, `diagnostics[${i}]`);
requireString(item.rule, `diagnostics[${i}].rule`);

View File

@@ -67,7 +67,43 @@ describe("verifyZipEntries", () => {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), "semantic-review-zip-"));
const zipPath = path.join(dir, "facts.zip");
const outPath = path.join(dir, "facts.json");
const facts = Buffer.from('{"schema_version":1}\n');
const restrictedScope = "pri" + "vate";
const facts = Buffer.from(JSON.stringify({
schema_version: 1,
public_content: [
{
rule: "public_content_semantic_candidate",
action: "WARNING",
file: "pull_request_metadata",
line: 1,
source: "metadata",
excerpt: "public release notes mention an internal rollout plan",
message: "public contribution may contain sensitive implementation detail",
suggestion: "move internal detail to " + restrictedScope + " discussion",
},
{
rule: "public_content_change_id_trailer",
action: "REJECT",
file: "commit:1234abc",
line: 3,
source: "commit",
},
{
rule: "public_content_automation_branch",
action: "WARNING",
file: "branch",
line: 1,
source: "branch",
},
{
rule: "public_content_" + "pri" + "vate_ipv4",
action: "WARNING",
file: "docs/public-network.md",
line: 7,
source: "file",
},
],
}) + "\n");
const zip = makeZip([{ fileName: "facts.json", data: facts, mode: 0o100644 }]);
fs.writeFileSync(zipPath, zip);
@@ -103,6 +139,19 @@ describe("verifyZipEntries", () => {
["bad-error-path", Buffer.from('{"schema_version":1,"errors":[{"file":"../x.go","line":1,"boundary":true,"uses_structured_error":false,"has_hint":false,"hint_action_count":0,"required_hint":true,"retryable":false}]}'), /errors\[0\]\.file/],
["bad-example-dry-run", Buffer.from('{"schema_version":1,"examples":[{"raw":"lark-cli docs +fetch","source_file":"skills/lark-doc/SKILL.md","line":3,"executable":true,"dry_run":{"method":"GET","url":"/open-apis/docx","query":{"page_size":["20",1]}}}]}'), /examples\[0\]\.dry_run\.query\.page_size\[1\]/],
["bad-output-field", Buffer.from(JSON.stringify({ schema_version: 1, outputs: [{ command: "drive files list", fields: ["ok", "x".repeat(9000)] }] })), /outputs\[0\]\.fields\[1\]/],
["non-array-public-content", Buffer.from('{"schema_version":1,"public_content":{}}'), /public_content must be an array/],
["bad-public-content-item", Buffer.from('{"schema_version":1,"public_content":["not-object"]}'), /public_content\[0\]/],
["bad-public-content-action", Buffer.from('{"schema_version":1,"public_content":[{"rule":"public_content_semantic_candidate","action":"BLOCK","file":"pull_request_metadata","line":1}]}'), /public_content\[0\]\.action/],
["bad-public-content-path", Buffer.from('{"schema_version":1,"public_content":[{"rule":"public_content_semantic_candidate","action":"WARNING","file":"../x","line":1}]}'), /public_content\[0\]\.file/],
["dot-slash-public-content-path", Buffer.from('{"schema_version":1,"public_content":[{"rule":"public_content_semantic_candidate","action":"WARNING","file":"./foo","line":1}]}'), /public_content\[0\]\.file/],
["empty-public-content-path", Buffer.from('{"schema_version":1,"public_content":[{"rule":"public_content_semantic_candidate","action":"WARNING","file":"","line":1}]}'), /public_content\[0\]\.file/],
["dot-public-content-path", Buffer.from('{"schema_version":1,"public_content":[{"rule":"public_content_semantic_candidate","action":"WARNING","file":".","line":1}]}'), /public_content\[0\]\.file/],
["url-public-content-path", Buffer.from('{"schema_version":1,"public_content":[{"rule":"public_content_semantic_candidate","action":"WARNING","file":"https://example.invalid/x","line":1}]}'), /public_content\[0\]\.file/],
["dotgit-public-content-path", Buffer.from('{"schema_version":1,"public_content":[{"rule":"public_content_semantic_candidate","action":"WARNING","file":".git/config","line":1}]}'), /public_content\[0\]\.file/],
["windows-public-content-path", Buffer.from('{"schema_version":1,"public_content":[{"rule":"public_content_semantic_candidate","action":"WARNING","file":"C:\\\\tmp\\\\x","line":1}]}'), /public_content\[0\]\.file/],
["bad-public-content-commit-ref", Buffer.from('{"schema_version":1,"public_content":[{"rule":"public_content_change_id_trailer","action":"REJECT","file":"commit:notasha","line":1}]}'), /public_content\[0\]\.file/],
["bad-public-content-line", Buffer.from('{"schema_version":1,"public_content":[{"rule":"public_content_semantic_candidate","action":"WARNING","file":"pull_request_metadata","line":"1"}]}'), /public_content\[0\]\.line/],
["zero-public-content-line", Buffer.from('{"schema_version":1,"public_content":[{"rule":"public_content_semantic_candidate","action":"WARNING","file":"pull_request_metadata","line":0}]}'), /public_content\[0\]\.line/],
["bad-diagnostic-action", Buffer.from('{"schema_version":1,"diagnostics":[{"rule":"r","action":"BLOCK","file":"x.go","line":1,"message":"m"}]}'), /diagnostics.*action/],
["long-message", Buffer.from(JSON.stringify({ schema_version: 1, diagnostics: [{ rule: "r", action: "REJECT", file: "x.go", line: 1, message: "x".repeat(9000) }] })), /too long/],
]) {

View File

@@ -184,6 +184,10 @@ require_in_step "$summary_verify_step" 'eventHeadSha && eventHeadSha.toLowerCase
require_in_step "$summary_verify_step" 'factsArtifactPattern' "PR quality summary should use the base-bound facts artifact name when available"
require_in_step "$summary_verify_step" 'const baseSha = artifactBaseSha || eventBaseSha || pr.base.sha' "PR quality summary must prefer the CI-time artifact base SHA"
require_in_step "$summary_verify_step" 'core.setOutput("artifact_error"' "PR quality summary must expose artifact binding failures"
require_in_step "$summary_verify_step" 'state: "all"' "PR quality summary fallback must inspect closed PRs before failing"
require_in_step "$summary_verify_step" 'candidate.state === "open"' "PR quality summary fallback must still prefer open PRs"
require_in_step "$summary_verify_step" 'workflow_run target PR is no longer open' "PR quality summary must skip stale workflow_run events after PR closure"
require_in_step "$summary_verify_step" 'pr.state !== "open"' "PR quality summary must skip direct workflow_run PR bindings after PR closure"
require_in_step "$summary_artifact_step" 'factsArtifactName' "PR quality summary artifact step must use the verified facts artifact binding"
require_in_step "$summary_extract_facts_step" 'SEMANTIC_REVIEW_DECISION_OUT' "PR quality summary artifact verifier must write an infrastructure decision on verifier failure"
@@ -212,7 +216,12 @@ require_in_step "$verify_step" 'runPRs.length > 1' "semantic-review must fail cl
require_in_step "$verify_step" 'listPullRequestsAssociatedWithCommit' "semantic-review must resolve fork workflow_run PRs when pull_requests is empty"
require_in_step "$verify_step" 'commit_sha: targetHeadSha' "semantic-review fallback must resolve PRs by the workflow_run PR head SHA"
require_in_step "$verify_step" 'github.rest.pulls.list' "semantic-review must have a pull-list fallback when commit association is empty"
require_in_step "$verify_step" 'candidatePRs.length > 1' "semantic-review must fail closed when commit-to-PR fallback is ambiguous"
require_in_step "$verify_step" 'openCandidatePRs.length > 1' "semantic-review must fail closed when commit-to-PR fallback is ambiguous"
require_in_step "$verify_step" 'state: "all"' "semantic-review fallback must inspect closed PRs before failing"
require_in_step "$verify_step" 'candidate.state === "open"' "semantic-review fallback must still prefer open PRs"
require_in_step "$verify_step" 'workflow_run target PR is no longer open' "semantic-review must skip stale workflow_run events after PR closure"
require_in_step "$verify_step" 'pr.state !== "open"' "semantic-review must skip direct workflow_run PR bindings after PR closure"
require_in_step "$verify_step" '!pr.head.repo' "semantic-review must skip unavailable PR head repositories before reading owner/repo"
require_in_step "$verify_step" 'pr.head.sha !== targetHeadSha' "semantic-review must skip stale PR heads"
require_in_step "$verify_step" 'eventBaseSha && parsedBaseSha.toLowerCase() !== eventBaseSha.toLowerCase()' "semantic-review should tolerate mutable workflow_run PR base metadata"
require_in_step "$verify_step" 'const baseSha = artifactBaseSha || eventBaseSha || pr.base.sha' "semantic-review must prefer the CI-time artifact base SHA"
@@ -260,6 +269,7 @@ require_in_step "$semantic_step" 'args+=(--waivers-file' "same-repo PR head waiv
require_in_step "$precheckout_step" 'SEMANTIC_REVIEW_BASE_SHA' "pre-checkout failure publisher must receive verified base SHA"
require_in_step "$precheckout_step" 'SEMANTIC_REVIEW_RUN_ID' "pre-checkout failure publisher must receive verified run id"
require_in_step "$precheckout_step" 'github.rest.pulls.get' "pre-checkout failure publisher must recheck PR target before writing"
require_in_step "$precheckout_step" 'pull.state !== "open"' "pre-checkout failure publisher must skip closed PRs before writing"
require_in_step "$precheckout_step" 'pull.head.sha !== headSha' "pre-checkout failure publisher must skip stale PR heads"
require_in_step "$precheckout_step" 'pull.base.sha !== baseSha' "pre-checkout failure publisher must skip stale PR bases"