fix(ci): reduce public content false positives

This commit is contained in:
Public Content Screenshot
2026-06-25 20:07:11 +08:00
committed by HanShaoshuai-k
parent 7346de30b1
commit d69761e205
3 changed files with 305 additions and 3 deletions

View File

@@ -63,7 +63,7 @@ func isPlaceholderValue(value string) bool {
func namedPlaceholderValue(value string) bool {
switch value {
case "placeholder", "redacted", "<redacted>", "xxxx":
case "...", "placeholder", "redacted", "<redacted>", "xxxx", "test-secret":
return true
}
return strings.Contains(value, "cli_example") || allXPlaceholder(value)
@@ -284,10 +284,14 @@ func anglePlaceholderIdentifier(value string) bool {
switch value {
case "token",
"id",
"userid",
"openid",
"key",
"secret",
"password",
"api-key",
"user-id",
"open-id",
"client-secret",
"access-token",
"refresh-token",
@@ -353,8 +357,24 @@ func resourceTokenPlaceholderValue(value string) bool {
"drive_route_token":
return true
default:
return minuteTokenFixturePlaceholder(normalized)
}
}
func minuteTokenFixturePlaceholder(value string) bool {
if value == "minute_no_meta" {
return true
}
suffix, ok := strings.CutPrefix(value, "minute_")
if !ok || suffix == "" {
return false
}
for _, r := range suffix {
if r < '0' || r > '9' {
return false
}
}
return true
}
func provenanceMarker(line string) bool {

View File

@@ -52,6 +52,7 @@ func scanText(file, source, text string, detectorFile bool) []Finding {
keyName, _ := normalizedCredentialAssignmentKey(match[0])
if value == "" ||
isNonSecretLiteralValue(value) ||
isBenignCodeCredentialExpression(file, value) ||
isPlaceholderValue(value) ||
isResourceTokenPlaceholderAssignment(keyName, value) {
continue
@@ -62,6 +63,9 @@ func scanText(file, source, text string, detectorFile bool) []Finding {
out = append(out, newFinding("public_content_generic_credential", file, lineNo, source, redactAssignment(match[0])))
}
for _, match := range jwtLikeRE.FindAllString(line, -1) {
if isSchemaDottedIdentifier(line, match) {
continue
}
out = append(out, newFinding("public_content_jwt_like_token", file, lineNo, source, redactToken(match)))
}
for range bearerHeaderRE.FindAllString(line, -1) {
@@ -251,12 +255,149 @@ func isResourceTokenField(key string) bool {
}
func isResourceTokenPlaceholderAssignment(key, value string) bool {
return key == "token" && resourceTokenPlaceholderValue(value)
switch {
case key == "client_token" && idempotencyTokenPlaceholderValue(value):
return true
case key == "retry_without_token" && numericStringPlaceholderValue(value):
return true
case tokenLikePlaceholderKey(key):
return tokenLikePlaceholderValue(value)
default:
return false
}
}
func tokenLikePlaceholderKey(key string) bool {
return key == "token" ||
strings.HasSuffix(key, "_token") ||
strings.HasSuffix(key, "-token")
}
func tokenLikePlaceholderValue(value string) bool {
normalized := strings.ToLower(strings.Trim(value, `"'`))
if normalized == "" || credentialShapedIdentifier(normalized) {
return false
}
return resourceTokenPlaceholderValue(value) ||
isPlaceholderValue(value) ||
normalized == "token" ||
strings.Contains(normalized, "...") ||
strings.Contains(normalized, "xxx") ||
strings.Contains(normalized, "_or_") ||
strings.HasSuffix(normalized, "_token") ||
strings.HasPrefix(normalized, ".")
}
func idempotencyTokenPlaceholderValue(value string) bool {
return numericStringPlaceholderValue(value) || uuidStringPlaceholderValue(value)
}
func uuidStringPlaceholderValue(value string) bool {
normalized := strings.Trim(value, `"'`)
parts := strings.Split(normalized, "-")
if len(parts) != 5 {
return false
}
for i, part := range parts {
want := []int{8, 4, 4, 4, 12}[i]
if len(part) != want {
return false
}
for _, r := range part {
if (r >= '0' && r <= '9') ||
(r >= 'a' && r <= 'f') ||
(r >= 'A' && r <= 'F') {
continue
}
return false
}
}
return true
}
func numericStringPlaceholderValue(value string) bool {
normalized := strings.Trim(value, `"'`)
if normalized == "" {
return false
}
for _, r := range normalized {
if r < '0' || r > '9' {
return false
}
}
return true
}
func isBenignCodeCredentialExpression(file, value string) bool {
normalized := strings.TrimSpace(value)
if strings.HasPrefix(normalized, "regexp.MustCompile(") {
return true
}
if !sourceCodeFile(file) || quotedLiteral(value) || credentialShapedValue(value) {
return false
}
return codeReferenceExpression(normalized)
}
func sourceCodeFile(file string) bool {
switch filepath.Ext(file) {
case ".go", ".py":
return true
default:
return false
}
}
func quotedLiteral(value string) bool {
normalized := strings.TrimSpace(value)
return len(normalized) >= 2 &&
((strings.HasPrefix(normalized, `"`) && strings.HasSuffix(normalized, `"`)) ||
(strings.HasPrefix(normalized, `'`) && strings.HasSuffix(normalized, `'`)))
}
func codeReferenceExpression(value string) bool {
if value == "" {
return false
}
for _, marker := range []string{".", "(", ")", "[", "]", "{"} {
if strings.Contains(value, marker) {
return true
}
}
return codeIdentifier(value) && !credentialNameFragment(value)
}
func codeIdentifier(value string) bool {
for i, r := range value {
switch {
case r >= 'a' && r <= 'z':
case r >= 'A' && r <= 'Z':
case r == '_' && i > 0:
case r >= '0' && r <= '9' && i > 0:
default:
return false
}
}
return true
}
func credentialNameFragment(value string) bool {
normalized := strings.ToLower(value)
for _, marker := range []string{"secret", "token", "password", "passwd", "key"} {
if strings.Contains(normalized, marker) {
return true
}
}
return false
}
func isSchemaDottedIdentifier(line, match string) bool {
return strings.Contains(line, "schema ") && strings.Contains(match, "_")
}
func isNonSecretLiteralValue(value string) bool {
switch strings.ToLower(strings.TrimSpace(strings.Trim(value, `"'`))) {
case "true", "false", "null", "nil":
case "true", "false", "null", "nil", "{", "[":
return true
default:
return false

View File

@@ -700,6 +700,8 @@ func TestScanFileAllowsBenignJSONTokenFields(t *testing.T) {
`{"drive_route_token":"route-example"}`,
`{"token":"<wiki_token>"}`,
`{"token":"wiki_token"}`,
`{"token":"minute_1"}`,
`{"token":"minute_no_meta"}`,
`{"token_url":"https://example.com/oauth/token"}`,
`{"token_endpoint":"https://example.com/oauth/token"}`,
`{"token_format":"Bearer"}`,
@@ -722,6 +724,145 @@ func TestScanFileAllowsBenignJSONTokenFields(t *testing.T) {
}
}
func TestScanFileAllowsTestFixtureSecretValues(t *testing.T) {
got := ScanFile("shortcuts/calendar/calendar_meeting_test.go", []byte(`AppID: "test-app", AppSecret: "test-secret", Brand: core.BrandFeishu,`+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("test fixture secret should not be credential finding: %#v", got)
}
}
}
func TestScanFileAllowsRegexpTokenValidators(t *testing.T) {
got := ScanFile("shortcuts/minutes/minutes_detail.go", []byte("var validMinuteTokenDetail = regexp.MustCompile(`^[a-z0-9]+$`)\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("regexp token validator should not be credential finding: %#v", got)
}
}
}
func TestScanFileAllowsBenignSourceCodeCredentialExpressions(t *testing.T) {
got := ScanFile("cmd/config/binder.go", []byte(strings.Join([]string{
"AppSecret: stored,",
"AccessToken: result.Token.AccessToken,",
`token := runtime.Str("token")`,
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("source code credential expressions should not be credential findings: %#v", got)
}
}
}
func TestScanFileAllowsPythonArgumentTokens(t *testing.T) {
got := ScanFile("skills/lark-slides/scripts/iconpark_tool.py", []byte(strings.Join([]string{
"def normalize_token(value: str) -> str:",
" token = rest[index]",
" next_token = rest[index + 1] if index + 1 < len(rest) else None",
"def append_unique(target: list[str], token: str) -> None:",
` fail(f"invalid range token: {trimmed}")`,
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("python token variables should not be credential findings: %#v", got)
}
}
}
func TestScanFileAllowsEllipsisCredentialPlaceholders(t *testing.T) {
got := ScanFile("skills/lark-doc/references/lark-doc-fetch.md", []byte(strings.Join([]string{
`<img token="..." url="https://..." width="..." height="..."/>`,
`<sheet token="..." sheet-id="...">`,
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("ellipsis placeholders should not be credential findings: %#v", got)
}
}
}
func TestScanFileAllowsSchemaDottedIdentifiers(t *testing.T) {
got := ScanFile("skills/lark-mail/references/lark-mail-recall.md", []byte("lark-cli schema mail.user_mailbox.sent_messages.get_recall_detail\n"))
for _, item := range got {
if item.Rule == "public_content_jwt_like_token" {
t.Fatalf("schema dotted identifier should not be jwt finding: %#v", got)
}
}
}
func TestScanFileAllowsClientTokenIdempotencyExamples(t *testing.T) {
got := ScanFile("skills/idempotency.md", []byte(strings.Join([]string{
`{"client_token":"1704067200"}`,
`{"client_token":"fe599b60-450f-46ff-b2ef-9f6675625b97"}`,
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("client_token idempotency examples should not be credential findings: %#v", got)
}
}
}
func TestScanFileDetectsCredentialShapedClientTokenValues(t *testing.T) {
stripeLike := "sk_" + "live_1234567890abcdef"
got := ScanFile("skills/idempotency.md", []byte(strings.Join([]string{
`{"client_token":"` + stripeLike + `"}`,
`{"client_token":"real-client-secret-value"}`,
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
count++
}
}
if count != 2 {
t.Fatalf("credential-shaped client_token findings = %d, want 2: %#v", count, got)
}
}
func TestScanFileAllowsTokenLikePlaceholderExamples(t *testing.T) {
got := ScanFile("skills/placeholders.md", []byte(strings.Join([]string{
`{ "block_token": "boardXXXX" }`,
`{ "resource_token": "doc_token_or_url" }`,
`{ "token": "canonical_token" }`,
`{ "target_parent_token": "wikcparent_xxx" }`,
`{ "mention_token": "<userId>" }`,
`{ "22-doc_token_xxx": { "objType": 22 } }`,
`{ "token": "12101..." }`,
`{ token: .token }`,
`retry_without_token = 0`,
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("token-like placeholders should not be credential findings: %#v", got)
}
}
}
func TestScanFileDetectsCredentialShapedTokenLikePlaceholderValues(t *testing.T) {
stripeLike := "sk_" + "live_1234567890abcdef"
got := ScanFile("skills/placeholders.md", []byte(strings.Join([]string{
`{ "resource_token": "` + stripeLike + `" }`,
`{ "block_token": "real-client-secret-value" }`,
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
count++
}
}
if count != 2 {
t.Fatalf("credential-shaped token-like placeholders findings = %d, want 2: %#v", count, got)
}
}
func TestScanFileDetectsNonFixtureMinuteTokenValues(t *testing.T) {
got := ScanFile("shortcuts/minutes/minutes_search_test.go", []byte(`{"token":"minute_real_secret"}`+"\n"))
if !findingRules(got)["public_content_generic_credential"] {
t.Fatalf("non-fixture minute token should be credential finding: %#v", got)
}
}
func TestScanFileAllowsBenignUnquotedTokenFields(t *testing.T) {
got := ScanFile("docs/config.yaml", []byte(strings.Join([]string{
"tokens: 128",