fix: reduce public content token false positives

This commit is contained in:
HanShaoshuai-k
2026-07-02 15:42:20 +08:00
committed by HanShaoshuai-k
parent d0cde9a414
commit 440867f1b4
2 changed files with 274 additions and 8 deletions

View File

@@ -7,6 +7,7 @@ import (
"encoding/base64"
"encoding/json"
"fmt"
"math"
"path/filepath"
"sort"
"strings"
@@ -78,12 +79,15 @@ func scanText(file, source, text string, detectorFile bool) []Finding {
out = append(out, newFinding("public_content_bearer_header", file, lineNo, source, "Authorization: Bearer <redacted>"))
}
for _, match := range credentialURLRE.FindAllString(line, -1) {
if isPlaceholderCredentialURL(match) {
if isPlaceholderCredentialURL(file, match) {
continue
}
out = append(out, newFinding("public_content_credential_url", file, lineNo, source, redactCredentialURL(match)))
}
for _, match := range privateIPv4RE.FindAllString(line, -1) {
if !warnForPrivateIPv4(file) {
continue
}
out = append(out, newFinding("public_content_private_ipv4", file, lineNo, source, match))
}
if source == "branch" && automationBranchRE.MatchString(line) {
@@ -130,6 +134,9 @@ func isCredentialAssignmentMatch(match string) bool {
if isBenignTokenField(name) && !credentialShapedValue(value) {
return false
}
if isWeakTokenCredentialKey(name) && !weakTokenValueLooksCredentialLike(value) {
return false
}
return isExplicitCredentialKey(name)
}
@@ -284,6 +291,9 @@ func tokenLikePlaceholderValue(key, value string) bool {
if normalized == "" || credentialShapedIdentifier(normalized) {
return false
}
if authCredentialTokenKey(key) {
return false
}
return resourceTokenPlaceholderValue(value) ||
maskedTokenFixturePlaceholderValue(key, normalized) ||
isPlaceholderValue(value) ||
@@ -313,11 +323,109 @@ func maskedTokenFixturePlaceholderValue(key, value string) bool {
return stars >= 6 && alnum > 0
}
func isWeakTokenCredentialKey(key string) bool {
if authCredentialTokenKey(key) || isStrongTokenCredentialKey(key) {
return false
}
return key == "token" ||
strings.HasSuffix(key, "_token") ||
strings.HasSuffix(key, "-token")
}
func isStrongTokenCredentialKey(key string) bool {
parts := credentialKeyParts(strings.ReplaceAll(strings.ToLower(key), "-", "_"))
for _, phrase := range [][2]string{
{"access", "token"},
{"refresh", "token"},
{"auth", "token"},
{"bearer", "token"},
{"session", "token"},
{"service", "token"},
{"bot", "token"},
{"api", "token"},
{"secret", "token"},
} {
if hasAdjacentCredentialParts(parts, phrase[0], phrase[1]) {
return true
}
}
return false
}
func weakTokenValueLooksCredentialLike(value string) bool {
normalized := strings.ToLower(strings.Trim(value, `"'<>`))
if normalized == "" ||
isNonSecretLiteralValue(value) ||
isPlaceholderValue(value) {
return false
}
candidate := unwrapCredentialValue(normalized)
return credentialShapedIdentifier(candidate) ||
highEntropyCredentialValue(candidate) ||
commandSubstitutionLooksCredentialLike(normalized) ||
(strings.Contains(normalized, "://") &&
urlRemainderLooksCredentialLike(removeAnglePlaceholders(normalized)))
}
func unwrapCredentialValue(value string) string {
value = strings.TrimSpace(strings.Trim(value, `"'<>`))
if strings.HasPrefix(value, "${{") && strings.HasSuffix(value, "}}") {
value = strings.TrimSpace(strings.TrimSuffix(strings.TrimPrefix(value, "${{"), "}}"))
}
value = strings.TrimPrefix(value, "$")
value = strings.Trim(value, "%")
return strings.TrimSpace(value)
}
func highEntropyCredentialValue(value string) bool {
if len(value) < 32 {
return false
}
var hasLetter, hasDigit bool
for _, r := range value {
switch {
case r >= 'a' && r <= 'z':
hasLetter = true
case r >= '0' && r <= '9':
hasDigit = true
case r == '_' || r == '-' || r == '.' || r == '=':
default:
return false
}
}
return hasLetter && hasDigit && shannonEntropy(value) >= 3.5
}
func shannonEntropy(value string) float64 {
if value == "" {
return 0
}
counts := map[rune]int{}
for _, r := range value {
counts[r]++
}
var entropy float64
length := float64(len([]rune(value)))
for _, count := range counts {
p := float64(count) / length
entropy -= p * log2(p)
}
return entropy
}
func log2(value float64) float64 {
return math.Log(value) / math.Ln2
}
func authCredentialTokenKey(key string) bool {
switch strings.ReplaceAll(strings.ToLower(key), "-", "_") {
case "access_token",
"api_token",
"bot_token",
"refresh_token",
"secret_token",
"session_token",
"service_token",
"bearer_token",
"auth_token",
"authorization_token",
@@ -844,7 +952,7 @@ func looksLikeEqualityComparison(value string) bool {
return strings.HasPrefix(strings.TrimSpace(value), "=")
}
func isPlaceholderCredentialURL(raw string) bool {
func isPlaceholderCredentialURL(file, raw string) bool {
userInfo, ok := credentialURLUserInfo(raw)
if !ok {
return false
@@ -853,7 +961,8 @@ func isPlaceholderCredentialURL(raw string) bool {
if !ok {
return false
}
return credentialURLPasswordPlaceholder(password)
return credentialURLPasswordPlaceholder(password) ||
(sourceOrTestFixtureFile(file) && credentialURLPasswordFixture(password))
}
func credentialURLPasswordPlaceholder(password string) bool {
@@ -867,6 +976,46 @@ func credentialURLPasswordPlaceholder(password string) bool {
return angleWrappedPlaceholder(decoded) || percentWrappedPlaceholder(decoded)
}
func credentialURLPasswordFixture(password string) bool {
normalized := strings.ToLower(strings.Trim(password, `"'`))
switch normalized {
case "p",
"pass",
"password",
"pat_abc",
"pw",
"s3cret",
"secret",
"t":
return true
default:
return false
}
}
func sourceOrTestFixtureFile(file string) bool {
normalized := filepath.ToSlash(file)
return sourceCodeFile(normalized) ||
strings.HasPrefix(normalized, "testdata/") ||
strings.HasPrefix(normalized, "fixtures/") ||
strings.Contains(normalized, "/testdata/") ||
strings.Contains(normalized, "/fixtures/")
}
func warnForPrivateIPv4(file string) bool {
normalized := filepath.ToSlash(file)
if sourceOrTestFixtureFile(normalized) {
return false
}
switch filepath.Ext(normalized) {
case ".md", ".mdx", ".txt", ".json", ".yaml", ".yml", ".toml", ".env":
return true
default:
return strings.HasPrefix(normalized, "docs/") ||
strings.HasPrefix(normalized, "skills/")
}
}
func credentialURLUserInfo(raw string) (string, bool) {
schemeIdx := strings.Index(raw, "://")
if schemeIdx < 0 {

View File

@@ -61,6 +61,19 @@ func TestScanFileWarnsForPrivateIPv4Examples(t *testing.T) {
}
}
func TestScanFileAllowsPrivateIPv4SourceFixtures(t *testing.T) {
got := ScanFile("internal/transport/warn_test.go", []byte(strings.Join([]string{
`proxy := "http://user:pass@10.0.0.1:3128"`,
`target := "socks5://admin:secret@172.16.0.1:1080"`,
`host := "192.168.0.10"`,
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_private_ipv4" {
t.Fatalf("private IPv4 source fixtures should not be public content findings: %#v", got)
}
}
}
func TestSemanticCandidateRequiresSpecificRiskSignals(t *testing.T) {
benign := semanticCandidate("docs/network.md", "file", "For a local lab, use RFC1918 example host 192.168."+"0.10 only.", 1)
if len(benign) != 0 {
@@ -632,6 +645,45 @@ func TestScanFileAllowsCredentialURLPlaceholders(t *testing.T) {
}
}
func TestScanFileAllowsCredentialURLFixtures(t *testing.T) {
got := ScanFile("fixtures/network_test.go", []byte(strings.Join([]string{
`proxy := "http://user:pass@proxy:8080"`,
`repo := "https://u:t@h/r.git"`,
`target := "https://attacker:pw@open.feishu.cn"`,
`proxy := "http://admin:s3cret@127.0.0.1:3128"`,
`repo := "http://x-token:PAT_abc@git.host/app_x.git"`,
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_credential_url" {
t.Fatalf("credential URL fixtures should not be credential URL findings: %#v", got)
}
}
}
func TestScanFileAllowsRootCredentialURLFixtures(t *testing.T) {
got := ScanFile("fixtures/network.md", []byte(strings.Join([]string{
`proxy: http://user:pass@proxy:8080`,
`repo: https://u:t@h/r.git`,
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_credential_url" {
t.Fatalf("root credential URL fixtures should not be credential URL findings: %#v", got)
}
}
}
func TestScanFileAllowsRootPrivateIPv4Fixtures(t *testing.T) {
got := ScanFile("testdata/network.md", []byte(strings.Join([]string{
`endpoint: http://10.0.0.1:8080`,
`redis: 192.168.1.10:6379`,
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_private_ipv4" {
t.Fatalf("root private IPv4 fixtures should not be private IPv4 findings: %#v", got)
}
}
}
func TestScanFileDetectsCredentialURLsWithRedactedSubstringPasswords(t *testing.T) {
got := ScanFile("docs/config.yaml", []byte("DATABASE_URL=postgres://user:notredactedreal@example.invalid/db\n"))
for _, item := range got {
@@ -648,6 +700,7 @@ func TestScanFileDetectsCredentialURLsWithPlaceholderUserAndRealPassword(t *test
"DATABASE_URL=postgres://<user>:real-secret@example.invalid/db",
"DATABASE_URL=postgres://<user>:" + stripeLike + "@example.invalid/db",
"URL=https://<user>:real-secret@example.invalid/path",
"REPO=https://x-token:" + stripeLike + "@git.host/app.git",
}, "\n")+"\n"))
var count int
for _, item := range got {
@@ -661,8 +714,8 @@ func TestScanFileDetectsCredentialURLsWithPlaceholderUserAndRealPassword(t *test
}
}
}
if count != 3 {
t.Fatalf("placeholder-user credential URL findings = %d, want 3: %#v", count, got)
if count != 4 {
t.Fatalf("placeholder-user credential URL findings = %d, want 4: %#v", count, got)
}
}
@@ -724,6 +777,68 @@ func TestScanFileAllowsBenignJSONTokenFields(t *testing.T) {
}
}
func TestScanFileAllowsWeakTokenFieldsWithoutCredentialEvidence(t *testing.T) {
got := ScanFile("docs/resource-tokens.md", []byte(strings.Join([]string{
`{"token":"img_abc123"}`,
`{"token":"img_live_secret"}`,
`{"token":"img_prod_key"}`,
`token=ab********cd`,
`{"image_token":"img_live_secret"}`,
`{"data_mail_token":"mail_abc123"}`,
`{"whiteboard_token":"board_v3_example"}`,
`{"want_token":"token from callback"}`,
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("weak token fields without credential evidence should not be credential findings: %#v", got)
}
}
}
func TestScanFileDetectsWeakTokenFieldsWithHighConfidenceCredentialValues(t *testing.T) {
githubToken := "ghp_" + "1234567890abcdef1234567890abcdef1234"
stripeToken := "sk_" + "live_1234567890abcdef"
randomToken := strings.Join([]string{
"a1b2c3d4",
"e5f6g7h8",
"i9j0k1l2",
"m3n4p5q6",
}, "")
got := ScanFile("docs/config.md", []byte(strings.Join([]string{
`{"token":"` + githubToken + `"}`,
`token=` + stripeToken,
`{"image_token":"` + githubToken + `"}`,
`{"token":"` + randomToken + `"}`,
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
count++
}
}
if count != 4 {
t.Fatalf("high-confidence weak token credential findings = %d, want 4: %#v", count, got)
}
}
func TestScanFileDetectsStrongAuthTokenKeysWithFixtureLikeValues(t *testing.T) {
got := ScanFile("docs/config.md", []byte(strings.Join([]string{
`{"access_token":"img_abc123"}`,
`{"api_token":"img_live_secret"}`,
`{"service_token":"ab********cd"}`,
`{"bot_token":"board_v3_example"}`,
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
count++
}
}
if count != 4 {
t.Fatalf("strong auth token key findings = %d, want 4: %#v", count, got)
}
}
func TestScanFileAllowsTestFixtureSecretValues(t *testing.T) {
got := ScanFile("fixtures/calendar_meeting_test.go", []byte(`AppID: "test-app", AppSecret: "test-secret", Brand: core.BrandFeishu,`+"\n"))
for _, item := range got {
@@ -1052,10 +1167,12 @@ func TestScanFileDetectsCredentialShapedTokenLikePlaceholderValues(t *testing.T)
}
}
func TestScanFileDetectsNonFixtureMinuteTokenValues(t *testing.T) {
func TestScanFileAllowsNonFixtureResourceTokenValues(t *testing.T) {
got := ScanFile("fixtures/minutes_search_test.go", []byte(`{"token":"minute_real_secret"}`+"\n"))
if !findingRules(got)["public_content_generic_credential"] {
t.Fatalf("non-fixture minute token should be credential finding: %#v", got)
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("resource-like bare token value should not be credential finding: %#v", got)
}
}
}