fix: reduce public content credential false positives (#1700)

This commit is contained in:
HanShaoshuai-k
2026-07-01 17:46:33 +08:00
committed by GitHub
parent 3bda9e17de
commit e8bfbab4a5
3 changed files with 462 additions and 18 deletions

View File

@@ -52,6 +52,9 @@ func isPlaceholderValue(value string) bool {
normalized := strings.ToLower(trimmed)
if normalized == "" ||
normalized == "=" ||
printfPlaceholderValue(normalized) ||
htmlEntityAnglePlaceholder(normalized) ||
starMaskedPlaceholder(normalized) ||
percentWrappedPlaceholder(normalized) ||
angleWrappedPlaceholder(normalized) ||
urlWithAnglePlaceholder(normalized) ||
@@ -61,9 +64,28 @@ func isPlaceholderValue(value string) bool {
return namedPlaceholderValue(normalized)
}
func htmlEntityAnglePlaceholder(value string) bool {
if !strings.HasPrefix(value, "<") || !strings.HasSuffix(value, ">") {
return false
}
return anglePlaceholderIdentifier(strings.TrimSuffix(strings.TrimPrefix(value, "<"), ">"))
}
func starMaskedPlaceholder(value string) bool {
var stars int
for _, r := range value {
if r == '*' {
stars++
continue
}
return false
}
return stars >= 3
}
func namedPlaceholderValue(value string) bool {
switch value {
case "...", "placeholder", "redacted", "<redacted>", "xxxx", "test-secret":
case "...", "***", "****", "placeholder", "redacted", "<redacted>", "xxxx", "test-secret", "test-token", "dry-run", "dry_run":
return true
}
return strings.Contains(value, "cli_example") ||
@@ -71,6 +93,15 @@ func namedPlaceholderValue(value string) bool {
conventionalNamedPlaceholderValue(value)
}
func printfPlaceholderValue(value string) bool {
switch value {
case "%d", "%s", "%q", "%v", "%w", "%x", "%T":
return true
default:
return false
}
}
func allXPlaceholder(value string) bool {
if len(value) < 4 {
return false

View File

@@ -54,8 +54,9 @@ func scanText(file, source, text string, detectorFile bool) []Finding {
keyName, _ := normalizedCredentialAssignmentKey(match[0])
if value == "" ||
isNonSecretLiteralValue(value) ||
isBenignCodeCredentialExpression(file, value) ||
isBenignCodeCredentialExpression(file, line, match[0], value) ||
isPlaceholderValue(value) ||
isPermissionScopeIdentifierAssignment(keyName, value) ||
isResourceTokenPlaceholderAssignment(keyName, value) {
continue
}
@@ -266,7 +267,7 @@ func isResourceTokenPlaceholderAssignment(key, value string) bool {
case key == "retry_without_token" && numericStringPlaceholderValue(value):
return true
case tokenLikePlaceholderKey(key):
return tokenLikePlaceholderValue(value)
return tokenLikePlaceholderValue(key, value)
default:
return false
}
@@ -278,12 +279,13 @@ func tokenLikePlaceholderKey(key string) bool {
strings.HasSuffix(key, "-token")
}
func tokenLikePlaceholderValue(value string) bool {
func tokenLikePlaceholderValue(key, value string) bool {
normalized := strings.ToLower(strings.Trim(value, `"'`))
if normalized == "" || credentialShapedIdentifier(normalized) {
return false
}
return resourceTokenPlaceholderValue(value) ||
maskedTokenFixturePlaceholderValue(key, normalized) ||
isPlaceholderValue(value) ||
normalized == "token" ||
strings.Contains(normalized, "...") ||
@@ -293,6 +295,51 @@ func tokenLikePlaceholderValue(value string) bool {
strings.HasPrefix(normalized, ".")
}
func maskedTokenFixturePlaceholderValue(key, value string) bool {
if authCredentialTokenKey(key) {
return false
}
var stars, alnum int
for _, r := range value {
switch {
case r == '*':
stars++
case (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9'):
alnum++
default:
return false
}
}
return stars >= 6 && alnum > 0
}
func authCredentialTokenKey(key string) bool {
switch strings.ReplaceAll(strings.ToLower(key), "-", "_") {
case "access_token",
"refresh_token",
"session_token",
"bearer_token",
"auth_token",
"authorization_token",
"id_token":
return true
default:
return false
}
}
func isPermissionScopeIdentifierAssignment(key, value string) bool {
if !strings.HasSuffix(key, "_token") {
return false
}
switch strings.ToLower(strings.Trim(value, `"',;`)) {
case "read", "write", "modify", "readonly", "get_as_user":
return true
default:
return false
}
}
func idempotencyTokenPlaceholderValue(value string) bool {
return numericStringPlaceholderValue(value) || uuidStringPlaceholderValue(value)
}
@@ -333,20 +380,87 @@ func numericStringPlaceholderValue(value string) bool {
return true
}
func isBenignCodeCredentialExpression(file, value string) bool {
func isBenignCodeCredentialExpression(file, line, match, value string) bool {
normalized := strings.TrimSpace(value)
if strings.HasPrefix(normalized, "regexp.MustCompile(") {
return true
}
if !sourceCodeFile(file) || quotedLiteral(value) || credentialShapedValue(value) {
if !sourceCodeFile(file) || credentialShapedValue(value) {
return false
}
if rhs, ok := sourceCodeTypedCredentialRHS(line, match); ok {
return isBenignTypedCredentialRHS(rhs)
}
rawValueQuoted := credentialAssignmentRawValueQuoted(match)
if sourceCodeLiteralLooksNonSecret(normalized, !rawValueQuoted) {
return true
}
if sourceCodeFormatStringLiteral(normalized) && sourceCodeFormatArgumentContext(line, match) {
return true
}
if strings.Contains(match, "+") {
return true
}
if rawValueQuoted {
return false
}
if quotedLiteral(value) {
return sourceCodeLiteralLooksNonSecret(value, false)
}
return codeReferenceExpression(normalized)
}
func sourceCodeTypedCredentialRHS(line, match string) (string, bool) {
idx := strings.Index(line, match)
if idx < 0 {
return "", false
}
key, ok := credentialAssignmentKey(match)
if !ok {
return "", false
}
rest := strings.TrimSpace(line[idx+len(key):])
if !strings.HasPrefix(rest, ":") {
return "", false
}
typeAndRHS := strings.TrimSpace(strings.TrimPrefix(rest, ":"))
assignmentIdx := strings.Index(typeAndRHS, "=")
if assignmentIdx < 0 {
return "", false
}
return strings.TrimSpace(typeAndRHS[assignmentIdx+1:]), true
}
func isBenignTypedCredentialRHS(value string) bool {
value = strings.TrimRight(strings.TrimSpace(value), ",;")
if value == "" || isNonSecretLiteralValue(value) || isPlaceholderValue(value) {
return true
}
if credentialShapedValue(value) {
return false
}
if sourceCodeLiteralLooksNonSecret(value, !quotedLiteral(value)) {
return true
}
if quotedLiteral(value) {
return false
}
return codeReferenceExpression(value)
}
func credentialAssignmentRawValueQuoted(match string) bool {
key, ok := credentialAssignmentKey(match)
if !ok {
return false
}
rest := strings.TrimSpace(strings.TrimPrefix(match[len(key):], ":"))
rest = strings.TrimSpace(strings.TrimPrefix(rest, "="))
return strings.HasPrefix(rest, `"`) || strings.HasPrefix(rest, `'`)
}
func sourceCodeFile(file string) bool {
switch filepath.Ext(file) {
case ".go", ".py":
case ".go", ".js", ".jsx", ".py", ".ts", ".tsx":
return true
default:
return false
@@ -360,7 +474,147 @@ func quotedLiteral(value string) bool {
(strings.HasPrefix(normalized, `'`) && strings.HasSuffix(normalized, `'`)))
}
func sourceCodeLiteralLooksNonSecret(value string, allowNumeric bool) bool {
literal := strings.Trim(strings.TrimSpace(value), `"'`)
if strings.HasPrefix(literal, "/") {
return true
}
return (allowNumeric && numericStringPlaceholderValue(literal)) ||
sourceCodeEnvVarNameLiteral(literal) ||
sourceCodeAttributeNameLiteral(literal) ||
sourceCodeFakeOrPlaceholderLiteral(literal) ||
sourceCodeCredentialTermLiteral(literal) ||
sourceCodeCredentialPrefixLiteral(literal) ||
sourceCodeVocabularyLiteral(literal) ||
sourceCodeSchemaTypeLiteral(literal) ||
benignCredentialStatusLiteral(literal)
}
func sourceCodeFormatArgumentContext(line, match string) bool {
idx := strings.Index(line, match)
if idx < 0 {
return false
}
prefix := line[:idx]
if semicolon := strings.LastIndex(prefix, ";"); semicolon >= 0 {
prefix = prefix[semicolon+1:]
}
return strings.Contains(prefix, "fmt.") ||
strings.Contains(prefix, "log.") ||
strings.Contains(prefix, "printf(") ||
strings.Contains(prefix, "Printf(") ||
strings.Contains(prefix, "Errorf(") ||
strings.Contains(prefix, "Fprintf(")
}
func sourceCodeFormatStringLiteral(value string) bool {
for i := 0; i < len(value)-1; i++ {
if value[i] != '%' {
continue
}
if value[i+1] == '%' {
i++
continue
}
j := i + 1
for j < len(value) && strings.ContainsRune("#+- 0.0123456789", rune(value[j])) {
j++
}
if j < len(value) && strings.ContainsRune("vTtbcdoOqxXUeEfFgGspw", rune(value[j])) {
return true
}
}
return false
}
func sourceCodeEnvVarNameLiteral(value string) bool {
if value == "" || !strings.Contains(value, "_") {
return false
}
var hasCredentialMarker bool
for _, r := range value {
switch {
case r >= 'A' && r <= 'Z':
case r >= '0' && r <= '9':
case r == '_':
default:
return false
}
}
for _, marker := range []string{"TOKEN", "SECRET", "KEY", "PASSWORD", "PASSWD"} {
if strings.Contains(value, marker) {
hasCredentialMarker = true
break
}
}
return hasCredentialMarker
}
func sourceCodeAttributeNameLiteral(value string) bool {
normalized := strings.ToLower(value)
return strings.HasPrefix(normalized, "data-") && delimitedPlaceholderIdentifier(normalized)
}
func sourceCodeFakeOrPlaceholderLiteral(value string) bool {
normalized := strings.ToLower(value)
return strings.HasPrefix(normalized, "fake_") ||
strings.HasPrefix(normalized, "fake-") ||
strings.Contains(normalized, "placeholder") ||
(strings.Contains(normalized, "<") && strings.Contains(normalized, ">"))
}
func sourceCodeCredentialTermLiteral(value string) bool {
normalized := strings.ToLower(strings.ReplaceAll(value, "-", "_"))
return conventionalCredentialPlaceholderName(normalized)
}
func sourceCodeCredentialPrefixLiteral(value string) bool {
switch strings.ToLower(value) {
case "appsecret:":
return true
default:
return false
}
}
func sourceCodeVocabularyLiteral(value string) bool {
switch strings.ToLower(value) {
case "bot", "tenant", "user":
return true
default:
return false
}
}
func sourceCodeSchemaTypeLiteral(value string) bool {
normalized := strings.ToLower(value)
return normalized == "string" || strings.HasPrefix(normalized, "string(")
}
func benignCredentialStatusLiteral(value string) bool {
normalized := strings.ToLower(strings.ReplaceAll(value, "-", "_"))
if !delimitedPlaceholderIdentifier(normalized) {
return false
}
for _, marker := range []string{
"bad_fmt",
"expired",
"format",
"invalid",
"missing",
"permission",
"status",
"type",
} {
if strings.Contains(normalized, marker) {
return true
}
}
return false
}
func codeReferenceExpression(value string) bool {
value = strings.TrimRight(strings.TrimSpace(value), ";")
if value == "" {
return false
}
@@ -369,7 +623,10 @@ func codeReferenceExpression(value string) bool {
return true
}
}
return codeIdentifier(value) && !credentialNameFragment(value)
if !codeIdentifier(value) {
return false
}
return codeIdentifier(value)
}
func codeIdentifier(value string) bool {
@@ -386,16 +643,6 @@ func codeIdentifier(value string) bool {
return true
}
func credentialNameFragment(value string) bool {
normalized := strings.ToLower(value)
for _, marker := range []string{"secret", "token", "password", "passwd", "key"} {
if strings.Contains(normalized, marker) {
return true
}
}
return false
}
func isNonSecretLiteralValue(value string) bool {
switch strings.ToLower(strings.TrimSpace(strings.Trim(value, `"'`))) {
case "true", "false", "null", "nil", "{", "[":

View File

@@ -770,6 +770,172 @@ func TestScanFileAllowsPythonArgumentTokens(t *testing.T) {
}
}
func TestScanFileAllowsPythonCredentialTypeAnnotations(t *testing.T) {
got := ScanFile("fixtures/doc_word_stat.py", []byte(strings.Join([]string{
"class Counter:",
" def __init__(self) -> None:",
" self._token_kind: TokenKind | None = None",
" self.access_token: AccessToken | None = None",
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("python credential-shaped type annotations should not be credential findings: %#v", got)
}
}
}
func TestScanFileAllowsSourceCodeCredentialNonSecretLiterals(t *testing.T) {
got := ScanFile("fixtures/auth_paths.go", []byte(strings.Join([]string{
`const PathOAuthTokenV2 = "/open-apis/authen/v2/oauth/token"`,
`return fmt.Errorf("failed to remove token: %v", err)`,
`const LarkErrTokenMissing = "token_missing"`,
`const LarkErrTokenExpired = 99991677`,
`const CliAppSecret = "LARKSUITE_CLI_APP_SECRET"`,
`const LargeAttachmentTokenAttr = "data-mail-token"`,
`const fakeOfficeTokenPrefix = "fake_office_"`,
`fmt.Fprintf(w, " - token=%s filename=%s\n", att.Token, att.FileName)`,
`tokenTypeHint := "access_token"`,
`const TokenTenant Token = "tenant"`,
`const secretKeyPrefix = "appsecret:"`,
`output.PrintJson(out, map[string]interface{}{"appSecret": "****"})`,
`return &credential.TokenResult{Token: "test-token"}, nil`,
`fmt.Fprintf(w, "password=%s\n", pat)`,
`text += "(img_token:" + imgToken + ")"`,
`map[string]interface{}{"token": "string(optional, from inspect)"}`,
`this.token = token;`,
`// AppSecret: "appsecret:<appId>"`,
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("source code non-secret literals should not be credential findings: %#v", got)
}
}
}
func TestScanFileAllowsCredentialLikePublicPlaceholders(t *testing.T) {
got := ScanFile("fixtures/placeholders.md", []byte(strings.Join([]string{
`app_secret=***`,
`{"token":"&lt;wiki_token&gt;"}`,
`{"token":"Pgrrwvr***********UnRb"}`,
`"scope_name": "auth:user_access_token:read"`,
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("public placeholders and scope identifiers should not be credential findings: %#v", got)
}
}
}
func TestScanFileDetectsPartiallyMaskedCredentialValues(t *testing.T) {
got := ScanFile("fixtures/config.md", []byte(strings.Join([]string{
"client_secret=realprefix***realsuffix",
"client_secret=ab********cd",
"access_token=ab********cd",
"refresh_token=realprefix********realsuffix",
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
count++
}
}
if count != 4 {
t.Fatalf("partially masked credential findings = %d, want 4: %#v", count, got)
}
}
func TestScanFileAllowsDryRunCredentialPlaceholders(t *testing.T) {
got := ScanFile("fixtures/ci.yml", []byte(strings.Join([]string{
"LARKSUITE_CLI_APP_SECRET=dry-run",
"client_secret: dry_run",
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("dry-run credential placeholders should not be credential findings: %#v", got)
}
}
}
func TestScanFileDetectsTypedCredentialAssignmentsWithSecretRHS(t *testing.T) {
cases := []struct {
name string
file string
text string
}{
{
name: "typescript simple secret",
file: "fixtures/source_secret.ts",
text: `const clientSecret: string = "real-client-secret-value"`,
},
{
name: "typescript numeric password",
file: "fixtures/source_secret.ts",
text: `const password: string = "12345678901234567890"`,
},
{
name: "typescript union secret",
file: "fixtures/source_secret.ts",
text: `const clientSecret: string | undefined = "real-client-secret-value"`,
},
{
name: "python simple secret",
file: "fixtures/source_secret.py",
text: `self.client_secret: str = "real-client-secret-value"`,
},
{
name: "python union secret",
file: "fixtures/source_secret.py",
text: `self.client_secret: str | None = "real-client-secret-value"`,
},
{
name: "python optional secret",
file: "fixtures/source_secret.py",
text: `self.client_secret: Optional[str] = "real-client-secret-value"`,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got := ScanFile(tc.file, []byte(tc.text+"\n"))
if !findingRules(got)["public_content_generic_credential"] {
t.Fatalf("typed credential assignment should be reported: %#v", got)
}
})
}
}
func TestScanFileDetectsCredentialShapedSourceCodeLiterals(t *testing.T) {
githubToken := "ghp_" + "1234567890abcdef1234567890abcdef1234"
got := ScanFile("fixtures/source_secret.go", []byte(strings.Join([]string{
`const ClientSecret = "real-client-secret-value"`,
`const GithubToken = "` + githubToken + `"`,
`const Password = "12345678901234567890"`,
`const ClientSecretNumber = "12345678901234567890"`,
`const ClientSecretFormat = "abc%sdefreal"`,
`fmt.Println("done"); const ClientSecret = "abc%sdefreal"`,
}, "\n")+"\n"))
var count int
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
count++
}
}
if count != 6 {
t.Fatalf("source code credential-shaped literal findings = %d, want 6: %#v", count, got)
}
}
func TestScanFileAllowsPrintfCredentialPlaceholders(t *testing.T) {
got := ScanFile("fixtures/placeholders.md", []byte(strings.Join([]string{
"client_secret=%s",
"access_token=%v",
}, "\n")+"\n"))
for _, item := range got {
if item.Rule == "public_content_generic_credential" {
t.Fatalf("printf placeholders should not be credential findings: %#v", got)
}
}
}
func TestScanFileAllowsEllipsisCredentialPlaceholders(t *testing.T) {
got := ScanFile("fixtures/lark-doc-fetch.md", []byte(strings.Join([]string{
`<img token="..." url="https://..." width="..." height="..."/>`,