diff --git a/internal/qualitygate/publiccontent/scan.go b/internal/qualitygate/publiccontent/scan.go index c2376c77..577697a5 100644 --- a/internal/qualitygate/publiccontent/scan.go +++ b/internal/qualitygate/publiccontent/scan.go @@ -7,6 +7,7 @@ import ( "encoding/base64" "encoding/json" "fmt" + "math" "path/filepath" "sort" "strings" @@ -78,12 +79,15 @@ func scanText(file, source, text string, detectorFile bool) []Finding { out = append(out, newFinding("public_content_bearer_header", file, lineNo, source, "Authorization: Bearer ")) } for _, match := range credentialURLRE.FindAllString(line, -1) { - if isPlaceholderCredentialURL(match) { + if isPlaceholderCredentialURL(file, match) { continue } out = append(out, newFinding("public_content_credential_url", file, lineNo, source, redactCredentialURL(match))) } for _, match := range privateIPv4RE.FindAllString(line, -1) { + if !warnForPrivateIPv4(file) { + continue + } out = append(out, newFinding("public_content_private_ipv4", file, lineNo, source, match)) } if source == "branch" && automationBranchRE.MatchString(line) { @@ -130,6 +134,9 @@ func isCredentialAssignmentMatch(match string) bool { if isBenignTokenField(name) && !credentialShapedValue(value) { return false } + if isWeakTokenCredentialKey(name) && !weakTokenValueLooksCredentialLike(value) { + return false + } return isExplicitCredentialKey(name) } @@ -284,6 +291,9 @@ func tokenLikePlaceholderValue(key, value string) bool { if normalized == "" || credentialShapedIdentifier(normalized) { return false } + if authCredentialTokenKey(key) { + return false + } return resourceTokenPlaceholderValue(value) || maskedTokenFixturePlaceholderValue(key, normalized) || isPlaceholderValue(value) || @@ -313,11 +323,109 @@ func maskedTokenFixturePlaceholderValue(key, value string) bool { return stars >= 6 && alnum > 0 } +func isWeakTokenCredentialKey(key string) bool { + if authCredentialTokenKey(key) || isStrongTokenCredentialKey(key) { + return false + } + return key == "token" || + strings.HasSuffix(key, "_token") || + strings.HasSuffix(key, "-token") +} + +func isStrongTokenCredentialKey(key string) bool { + parts := credentialKeyParts(strings.ReplaceAll(strings.ToLower(key), "-", "_")) + for _, phrase := range [][2]string{ + {"access", "token"}, + {"refresh", "token"}, + {"auth", "token"}, + {"bearer", "token"}, + {"session", "token"}, + {"service", "token"}, + {"bot", "token"}, + {"api", "token"}, + {"secret", "token"}, + } { + if hasAdjacentCredentialParts(parts, phrase[0], phrase[1]) { + return true + } + } + return false +} + +func weakTokenValueLooksCredentialLike(value string) bool { + normalized := strings.ToLower(strings.Trim(value, `"'<>`)) + if normalized == "" || + isNonSecretLiteralValue(value) || + isPlaceholderValue(value) { + return false + } + candidate := unwrapCredentialValue(normalized) + return credentialShapedIdentifier(candidate) || + highEntropyCredentialValue(candidate) || + commandSubstitutionLooksCredentialLike(normalized) || + (strings.Contains(normalized, "://") && + urlRemainderLooksCredentialLike(removeAnglePlaceholders(normalized))) +} + +func unwrapCredentialValue(value string) string { + value = strings.TrimSpace(strings.Trim(value, `"'<>`)) + if strings.HasPrefix(value, "${{") && strings.HasSuffix(value, "}}") { + value = strings.TrimSpace(strings.TrimSuffix(strings.TrimPrefix(value, "${{"), "}}")) + } + value = strings.TrimPrefix(value, "$") + value = strings.Trim(value, "%") + return strings.TrimSpace(value) +} + +func highEntropyCredentialValue(value string) bool { + if len(value) < 32 { + return false + } + var hasLetter, hasDigit bool + for _, r := range value { + switch { + case r >= 'a' && r <= 'z': + hasLetter = true + case r >= '0' && r <= '9': + hasDigit = true + case r == '_' || r == '-' || r == '.' || r == '=': + default: + return false + } + } + return hasLetter && hasDigit && shannonEntropy(value) >= 3.5 +} + +func shannonEntropy(value string) float64 { + if value == "" { + return 0 + } + counts := map[rune]int{} + for _, r := range value { + counts[r]++ + } + var entropy float64 + length := float64(len([]rune(value))) + for _, count := range counts { + p := float64(count) / length + entropy -= p * log2(p) + } + return entropy +} + +func log2(value float64) float64 { + return math.Log(value) / math.Ln2 +} + func authCredentialTokenKey(key string) bool { switch strings.ReplaceAll(strings.ToLower(key), "-", "_") { case "access_token", + "api_token", + "bot_token", "refresh_token", + "secret_token", "session_token", + "service_token", "bearer_token", "auth_token", "authorization_token", @@ -844,7 +952,7 @@ func looksLikeEqualityComparison(value string) bool { return strings.HasPrefix(strings.TrimSpace(value), "=") } -func isPlaceholderCredentialURL(raw string) bool { +func isPlaceholderCredentialURL(file, raw string) bool { userInfo, ok := credentialURLUserInfo(raw) if !ok { return false @@ -853,7 +961,8 @@ func isPlaceholderCredentialURL(raw string) bool { if !ok { return false } - return credentialURLPasswordPlaceholder(password) + return credentialURLPasswordPlaceholder(password) || + (sourceOrTestFixtureFile(file) && credentialURLPasswordFixture(password)) } func credentialURLPasswordPlaceholder(password string) bool { @@ -867,6 +976,46 @@ func credentialURLPasswordPlaceholder(password string) bool { return angleWrappedPlaceholder(decoded) || percentWrappedPlaceholder(decoded) } +func credentialURLPasswordFixture(password string) bool { + normalized := strings.ToLower(strings.Trim(password, `"'`)) + switch normalized { + case "p", + "pass", + "password", + "pat_abc", + "pw", + "s3cret", + "secret", + "t": + return true + default: + return false + } +} + +func sourceOrTestFixtureFile(file string) bool { + normalized := filepath.ToSlash(file) + return sourceCodeFile(normalized) || + strings.HasPrefix(normalized, "testdata/") || + strings.HasPrefix(normalized, "fixtures/") || + strings.Contains(normalized, "/testdata/") || + strings.Contains(normalized, "/fixtures/") +} + +func warnForPrivateIPv4(file string) bool { + normalized := filepath.ToSlash(file) + if sourceOrTestFixtureFile(normalized) { + return false + } + switch filepath.Ext(normalized) { + case ".md", ".mdx", ".txt", ".json", ".yaml", ".yml", ".toml", ".env": + return true + default: + return strings.HasPrefix(normalized, "docs/") || + strings.HasPrefix(normalized, "skills/") + } +} + func credentialURLUserInfo(raw string) (string, bool) { schemeIdx := strings.Index(raw, "://") if schemeIdx < 0 { diff --git a/internal/qualitygate/publiccontent/scan_test.go b/internal/qualitygate/publiccontent/scan_test.go index 4dbb18b6..ad882597 100644 --- a/internal/qualitygate/publiccontent/scan_test.go +++ b/internal/qualitygate/publiccontent/scan_test.go @@ -61,6 +61,19 @@ func TestScanFileWarnsForPrivateIPv4Examples(t *testing.T) { } } +func TestScanFileAllowsPrivateIPv4SourceFixtures(t *testing.T) { + got := ScanFile("internal/transport/warn_test.go", []byte(strings.Join([]string{ + `proxy := "http://user:pass@10.0.0.1:3128"`, + `target := "socks5://admin:secret@172.16.0.1:1080"`, + `host := "192.168.0.10"`, + }, "\n")+"\n")) + for _, item := range got { + if item.Rule == "public_content_private_ipv4" { + t.Fatalf("private IPv4 source fixtures should not be public content findings: %#v", got) + } + } +} + func TestSemanticCandidateRequiresSpecificRiskSignals(t *testing.T) { benign := semanticCandidate("docs/network.md", "file", "For a local lab, use RFC1918 example host 192.168."+"0.10 only.", 1) if len(benign) != 0 { @@ -632,6 +645,45 @@ func TestScanFileAllowsCredentialURLPlaceholders(t *testing.T) { } } +func TestScanFileAllowsCredentialURLFixtures(t *testing.T) { + got := ScanFile("fixtures/network_test.go", []byte(strings.Join([]string{ + `proxy := "http://user:pass@proxy:8080"`, + `repo := "https://u:t@h/r.git"`, + `target := "https://attacker:pw@open.feishu.cn"`, + `proxy := "http://admin:s3cret@127.0.0.1:3128"`, + `repo := "http://x-token:PAT_abc@git.host/app_x.git"`, + }, "\n")+"\n")) + for _, item := range got { + if item.Rule == "public_content_credential_url" { + t.Fatalf("credential URL fixtures should not be credential URL findings: %#v", got) + } + } +} + +func TestScanFileAllowsRootCredentialURLFixtures(t *testing.T) { + got := ScanFile("fixtures/network.md", []byte(strings.Join([]string{ + `proxy: http://user:pass@proxy:8080`, + `repo: https://u:t@h/r.git`, + }, "\n")+"\n")) + for _, item := range got { + if item.Rule == "public_content_credential_url" { + t.Fatalf("root credential URL fixtures should not be credential URL findings: %#v", got) + } + } +} + +func TestScanFileAllowsRootPrivateIPv4Fixtures(t *testing.T) { + got := ScanFile("testdata/network.md", []byte(strings.Join([]string{ + `endpoint: http://10.0.0.1:8080`, + `redis: 192.168.1.10:6379`, + }, "\n")+"\n")) + for _, item := range got { + if item.Rule == "public_content_private_ipv4" { + t.Fatalf("root private IPv4 fixtures should not be private IPv4 findings: %#v", got) + } + } +} + func TestScanFileDetectsCredentialURLsWithRedactedSubstringPasswords(t *testing.T) { got := ScanFile("docs/config.yaml", []byte("DATABASE_URL=postgres://user:notredactedreal@example.invalid/db\n")) for _, item := range got { @@ -648,6 +700,7 @@ func TestScanFileDetectsCredentialURLsWithPlaceholderUserAndRealPassword(t *test "DATABASE_URL=postgres://:real-secret@example.invalid/db", "DATABASE_URL=postgres://:" + stripeLike + "@example.invalid/db", "URL=https://:real-secret@example.invalid/path", + "REPO=https://x-token:" + stripeLike + "@git.host/app.git", }, "\n")+"\n")) var count int for _, item := range got { @@ -661,8 +714,8 @@ func TestScanFileDetectsCredentialURLsWithPlaceholderUserAndRealPassword(t *test } } } - if count != 3 { - t.Fatalf("placeholder-user credential URL findings = %d, want 3: %#v", count, got) + if count != 4 { + t.Fatalf("placeholder-user credential URL findings = %d, want 4: %#v", count, got) } } @@ -724,6 +777,68 @@ func TestScanFileAllowsBenignJSONTokenFields(t *testing.T) { } } +func TestScanFileAllowsWeakTokenFieldsWithoutCredentialEvidence(t *testing.T) { + got := ScanFile("docs/resource-tokens.md", []byte(strings.Join([]string{ + `{"token":"img_abc123"}`, + `{"token":"img_live_secret"}`, + `{"token":"img_prod_key"}`, + `token=ab********cd`, + `{"image_token":"img_live_secret"}`, + `{"data_mail_token":"mail_abc123"}`, + `{"whiteboard_token":"board_v3_example"}`, + `{"want_token":"token from callback"}`, + }, "\n")+"\n")) + for _, item := range got { + if item.Rule == "public_content_generic_credential" { + t.Fatalf("weak token fields without credential evidence should not be credential findings: %#v", got) + } + } +} + +func TestScanFileDetectsWeakTokenFieldsWithHighConfidenceCredentialValues(t *testing.T) { + githubToken := "ghp_" + "1234567890abcdef1234567890abcdef1234" + stripeToken := "sk_" + "live_1234567890abcdef" + randomToken := strings.Join([]string{ + "a1b2c3d4", + "e5f6g7h8", + "i9j0k1l2", + "m3n4p5q6", + }, "") + got := ScanFile("docs/config.md", []byte(strings.Join([]string{ + `{"token":"` + githubToken + `"}`, + `token=` + stripeToken, + `{"image_token":"` + githubToken + `"}`, + `{"token":"` + randomToken + `"}`, + }, "\n")+"\n")) + var count int + for _, item := range got { + if item.Rule == "public_content_generic_credential" { + count++ + } + } + if count != 4 { + t.Fatalf("high-confidence weak token credential findings = %d, want 4: %#v", count, got) + } +} + +func TestScanFileDetectsStrongAuthTokenKeysWithFixtureLikeValues(t *testing.T) { + got := ScanFile("docs/config.md", []byte(strings.Join([]string{ + `{"access_token":"img_abc123"}`, + `{"api_token":"img_live_secret"}`, + `{"service_token":"ab********cd"}`, + `{"bot_token":"board_v3_example"}`, + }, "\n")+"\n")) + var count int + for _, item := range got { + if item.Rule == "public_content_generic_credential" { + count++ + } + } + if count != 4 { + t.Fatalf("strong auth token key findings = %d, want 4: %#v", count, got) + } +} + func TestScanFileAllowsTestFixtureSecretValues(t *testing.T) { got := ScanFile("fixtures/calendar_meeting_test.go", []byte(`AppID: "test-app", AppSecret: "test-secret", Brand: core.BrandFeishu,`+"\n")) for _, item := range got { @@ -1052,10 +1167,12 @@ func TestScanFileDetectsCredentialShapedTokenLikePlaceholderValues(t *testing.T) } } -func TestScanFileDetectsNonFixtureMinuteTokenValues(t *testing.T) { +func TestScanFileAllowsNonFixtureResourceTokenValues(t *testing.T) { got := ScanFile("fixtures/minutes_search_test.go", []byte(`{"token":"minute_real_secret"}`+"\n")) - if !findingRules(got)["public_content_generic_credential"] { - t.Fatalf("non-fixture minute token should be credential finding: %#v", got) + for _, item := range got { + if item.Rule == "public_content_generic_credential" { + t.Fatalf("resource-like bare token value should not be credential finding: %#v", got) + } } }