mirror of
https://github.com/microsoft/SkillOpt.git
synced 2026-07-03 14:02:58 +08:00
fix(skillopt-sleep): redact secrets before persisting cycle diagnostics
PR #92 added a per-cycle diagnostics.json that surfaces backend stderr, optimizer replies, and task responses so a 0.0 night is self-diagnosing. Those free-text fields can carry credentials (e.g. a codex 401 stderr dump containing an auth token), so persisting them verbatim was a new on-disk leak surface. - Add a shared redact_secrets() in staging.py and route diagnostics.json's call_error / reflect_raw_head / holdout_detail through it before writing. - Redact the codex and Claude auth-error log lines too (a secondary sink when a file log handler is attached); last_call_error stays raw in memory so _AUTH_MARKERS matching is unaffected. - Centralize _SECRET_PATTERNS in staging.py (harvest_codex now reuses them) and extend coverage to AWS / GitHub / Slack / Google / JWT token shapes. - Tests: secret-shape coverage, private-key blocks, recursive/scalar passthrough, no over-redaction of plain prose, fail-fast auth-error log redaction, and an end-to-end check that diagnostics.json has no secret. Observability-only; the gate and learning algorithm are unchanged. Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -582,9 +582,10 @@ class ClaudeCliBackend(CliBackend):
|
||||
combined = check_stdout + "\n" + stderr
|
||||
for marker in self._CLI_ERROR_MARKERS:
|
||||
if marker in combined:
|
||||
from skillopt_sleep.staging import redact_secrets
|
||||
logging.getLogger("skillopt_sleep").warning(
|
||||
"Claude CLI returned a likely auth error: %s",
|
||||
combined[:200].replace("\n", " "),
|
||||
redact_secrets(combined[:200].replace("\n", " ")),
|
||||
)
|
||||
self.last_call_error = combined[:500]
|
||||
return
|
||||
@@ -843,8 +844,10 @@ class CodexCliBackend(CliBackend):
|
||||
return out
|
||||
err = self.last_call_error or ""
|
||||
if any(m in err for m in self._AUTH_MARKERS):
|
||||
from skillopt_sleep.staging import redact_secrets
|
||||
logging.getLogger("skillopt_sleep").error(
|
||||
"codex auth error — re-login required (`codex login`): %s", err[:200]
|
||||
"codex auth error — re-login required (`codex login`): %s",
|
||||
redact_secrets(err[:200]),
|
||||
)
|
||||
break # fail fast: retrying a 401 just burns calls
|
||||
if attempt < retries - 1:
|
||||
|
||||
@@ -21,6 +21,7 @@ from skillopt_sleep.harvest_sources import harvest_for_config
|
||||
from skillopt_sleep.memory import ensure_skill_scaffold
|
||||
from skillopt_sleep.mine import mine
|
||||
from skillopt_sleep.staging import adopt as adopt_staging
|
||||
from skillopt_sleep.staging import redact_secrets
|
||||
from skillopt_sleep.staging import write_staging
|
||||
from skillopt_sleep.state import SleepState, _now_iso
|
||||
from skillopt_sleep.types import SessionDigest, SleepReport, TaskRecord
|
||||
@@ -281,6 +282,9 @@ def run_sleep_cycle(
|
||||
# cycle previously captured none of this, making the gate a black box (#learning-stall).
|
||||
try:
|
||||
import json as _json
|
||||
# Backend stderr / optimizer replies / task responses can carry
|
||||
# credentials (e.g. a codex 401 stderr dump), so scrub secret-looking
|
||||
# substrings before persisting them to the on-disk diagnostics.
|
||||
with open(os.path.join(staging_dir, "diagnostics.json"), "w", encoding="utf-8") as _fh:
|
||||
_json.dump({
|
||||
"night": night,
|
||||
@@ -292,9 +296,11 @@ def run_sleep_cycle(
|
||||
"accepted": result.accepted,
|
||||
"n_applied_edits": len(result.applied_edits),
|
||||
"n_rejected_edits": len(result.rejected_edits),
|
||||
"call_error": getattr(result, "call_error", ""),
|
||||
"reflect_raw_head": (getattr(result, "reflect_raw", "") or "")[:1200],
|
||||
"holdout_detail": getattr(result, "holdout_detail", []),
|
||||
"call_error": redact_secrets(getattr(result, "call_error", "")),
|
||||
"reflect_raw_head": redact_secrets(
|
||||
(getattr(result, "reflect_raw", "") or "")[:1200]
|
||||
),
|
||||
"holdout_detail": redact_secrets(getattr(result, "holdout_detail", [])),
|
||||
}, _fh, indent=2)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -16,29 +16,9 @@ from skillopt_sleep.harvest import (
|
||||
_iter_jsonl,
|
||||
_project_matches,
|
||||
)
|
||||
from skillopt_sleep.staging import _SECRET_PATTERNS
|
||||
from skillopt_sleep.types import SessionDigest
|
||||
|
||||
_SECRET_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
|
||||
(re.compile(r"sk-[A-Za-z0-9_-]{10,}"), "[REDACTED_OPENAI_KEY]"),
|
||||
(re.compile(r"(?i)(Authorization:\s*Bearer\s+)[^\s\"']+"), r"\1[REDACTED]"),
|
||||
(re.compile(r"(?i)(Authorization:\s*Basic\s+)[^\s\"']+"), r"\1[REDACTED]"),
|
||||
(
|
||||
re.compile(r"(?i)\b(api[_-]?key|token|password|secret)\b(\s*[:=]\s*)[^\s\"']+"),
|
||||
r"\1\2[REDACTED]",
|
||||
),
|
||||
(
|
||||
re.compile(r"(?i)\b(api[_-]?key|token|password|secret)\b(\s+)[^\s\"']+"),
|
||||
r"\1\2[REDACTED]",
|
||||
),
|
||||
(
|
||||
re.compile(
|
||||
r"-----BEGIN [A-Z ]*PRIVATE KEY-----.*?-----END [A-Z ]*PRIVATE KEY-----",
|
||||
re.DOTALL,
|
||||
),
|
||||
"[REDACTED_PRIVATE_KEY]",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _payload(rec: Dict[str, Any]) -> Dict[str, Any]:
|
||||
payload = rec.get("payload")
|
||||
|
||||
@@ -9,12 +9,68 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import time
|
||||
from typing import List, Optional
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from skillopt_sleep.types import SleepReport
|
||||
|
||||
# Secret patterns scrubbed from any free-text we persist to the staging dir
|
||||
# (diagnostics, reports). Kept here so every on-disk artifact shares one
|
||||
# redaction pass; harvest_codex reuses these for session text too.
|
||||
_SECRET_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
|
||||
(re.compile(r"sk-[A-Za-z0-9_-]{10,}"), "[REDACTED_OPENAI_KEY]"),
|
||||
# Distinctive vendor token prefixes (low false-positive: these prefixes do
|
||||
# not occur in normal diagnostic prose).
|
||||
(re.compile(r"\bAKIA[0-9A-Z]{16}\b"), "[REDACTED_AWS_KEY]"),
|
||||
(re.compile(r"\bgh[pousr]_[A-Za-z0-9]{20,}\b"), "[REDACTED_GITHUB_TOKEN]"),
|
||||
(re.compile(r"\bxox[baprs]-[A-Za-z0-9-]{10,}\b"), "[REDACTED_SLACK_TOKEN]"),
|
||||
(re.compile(r"\bAIza[0-9A-Za-z_-]{20,}\b"), "[REDACTED_GOOGLE_KEY]"),
|
||||
# Bare JWT (three base64url segments) — e.g. a leaked bearer body without
|
||||
# the "Authorization:" prefix.
|
||||
(re.compile(r"\beyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\b"),
|
||||
"[REDACTED_JWT]"),
|
||||
(re.compile(r"(?i)(Authorization:\s*Bearer\s+)[^\s\"']+"), r"\1[REDACTED]"),
|
||||
(re.compile(r"(?i)(Authorization:\s*Basic\s+)[^\s\"']+"), r"\1[REDACTED]"),
|
||||
(
|
||||
re.compile(r"(?i)\b(api[_-]?key|token|password|secret)\b(\s*[:=]\s*)[^\s\"']+"),
|
||||
r"\1\2[REDACTED]",
|
||||
),
|
||||
(
|
||||
re.compile(r"(?i)\b(api[_-]?key|token|password|secret)\b(\s+)[^\s\"']+"),
|
||||
r"\1\2[REDACTED]",
|
||||
),
|
||||
(
|
||||
re.compile(
|
||||
r"-----BEGIN [A-Z ]*PRIVATE KEY-----.*?-----END [A-Z ]*PRIVATE KEY-----",
|
||||
re.DOTALL,
|
||||
),
|
||||
"[REDACTED_PRIVATE_KEY]",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def redact_secrets(value: Any) -> Any:
|
||||
"""Scrub secret-looking substrings (API keys, bearer tokens, private keys)
|
||||
from a string, or recursively from the string leaves of a list/dict.
|
||||
|
||||
Used before writing backend stderr / optimizer replies / task responses to
|
||||
on-disk diagnostics: those are surfaced for debugging, but the underlying
|
||||
text (e.g. a codex 401 stderr dump) can carry credentials. Non-string
|
||||
scalars pass through unchanged.
|
||||
"""
|
||||
if isinstance(value, str):
|
||||
out = value
|
||||
for pattern, replacement in _SECRET_PATTERNS:
|
||||
out = pattern.sub(replacement, out)
|
||||
return out
|
||||
if isinstance(value, list):
|
||||
return [redact_secrets(v) for v in value]
|
||||
if isinstance(value, dict):
|
||||
return {k: redact_secrets(v) for k, v in value.items()}
|
||||
return value
|
||||
|
||||
|
||||
def _ts_dir() -> str:
|
||||
return time.strftime("%Y%m%d-%H%M%S", time.localtime())
|
||||
|
||||
@@ -1160,5 +1160,127 @@ class TestVerifierDiscipline(unittest.TestCase):
|
||||
self.assertGreater(len(res.rejected_edits), 0)
|
||||
self.assertIn("placeholder", res.rejected_edits[0].content)
|
||||
|
||||
class TestDiagnosticsRedaction(unittest.TestCase):
|
||||
"""diagnostics.json surfaces backend stderr / optimizer replies / task
|
||||
responses for debugging — but those can carry credentials (e.g. a codex 401
|
||||
stderr dump). redact_secrets() must scrub them before anything is persisted."""
|
||||
|
||||
def test_redacts_common_secret_shapes(self):
|
||||
from skillopt_sleep.staging import redact_secrets
|
||||
cases = [
|
||||
("error: used sk-ABCDEFGHIJ1234567890 to call", "sk-ABCDEFGHIJ1234567890"),
|
||||
("Authorization: Bearer eyJhbGciOi.JIUzI1Ni.qwerty", "eyJhbGciOi.JIUzI1Ni.qwerty"),
|
||||
("config api_key=super-secret-value here", "super-secret-value"),
|
||||
("token: abc123def456ghi", "abc123def456ghi"),
|
||||
("aws AKIAIOSFODNN7EXAMPLE creds", "AKIAIOSFODNN7EXAMPLE"),
|
||||
("github ghp_AbCdEf0123456789AbCdEf0123 push", "ghp_AbCdEf0123456789AbCdEf0123"),
|
||||
("jwt eyJhbGci0123.eyJzdWIi4567.SflKxwRJ89 here", "eyJhbGci0123.eyJzdWIi4567.SflKxwRJ89"),
|
||||
]
|
||||
for text, secret in cases:
|
||||
out = redact_secrets(text)
|
||||
self.assertNotIn(secret, out, f"secret leaked: {text!r} -> {out!r}")
|
||||
self.assertIn("REDACTED", out, f"no redaction marker in {out!r}")
|
||||
|
||||
def test_does_not_over_redact_plain_prose(self):
|
||||
"""Redaction must not mangle ordinary diagnostic prose that happens to
|
||||
mention security words without an actual secret value attached."""
|
||||
from skillopt_sleep.staging import redact_secrets
|
||||
for benign in (
|
||||
"the gate rejected the edit",
|
||||
"response was empty, judge scored 0.0",
|
||||
"held-out 1.000 -> 0.000 reject",
|
||||
):
|
||||
self.assertEqual(redact_secrets(benign), benign, f"over-redacted: {benign!r}")
|
||||
|
||||
def test_redacts_private_key_block(self):
|
||||
from skillopt_sleep.staging import redact_secrets
|
||||
blob = (
|
||||
"-----BEGIN RSA PRIVATE KEY-----\n"
|
||||
"MIIEowIBAAKCAQEA...secret...\n"
|
||||
"-----END RSA PRIVATE KEY-----"
|
||||
)
|
||||
out = redact_secrets("leaked:\n" + blob)
|
||||
self.assertNotIn("MIIEowIBAAKCAQEA", out)
|
||||
self.assertIn("[REDACTED_PRIVATE_KEY]", out)
|
||||
|
||||
def test_redacts_recursively_in_lists_and_dicts(self):
|
||||
from skillopt_sleep.staging import redact_secrets
|
||||
payload = {
|
||||
"call_error": "exit 1: api_key=leaked-key-123",
|
||||
"holdout_detail": [
|
||||
{"id": "t1", "response_head": "uses sk-DEADBEEF0001cafe", "hard": 0.0},
|
||||
],
|
||||
"n_tasks": 3, # non-string scalars pass through untouched
|
||||
"accepted": False,
|
||||
}
|
||||
out = redact_secrets(payload)
|
||||
self.assertNotIn("leaked-key-123", out["call_error"])
|
||||
self.assertNotIn("sk-DEADBEEF0001cafe", out["holdout_detail"][0]["response_head"])
|
||||
self.assertEqual(out["n_tasks"], 3)
|
||||
self.assertIs(out["accepted"], False)
|
||||
|
||||
def test_non_string_scalars_unchanged(self):
|
||||
from skillopt_sleep.staging import redact_secrets
|
||||
self.assertEqual(redact_secrets(42), 42)
|
||||
self.assertEqual(redact_secrets(0.5), 0.5)
|
||||
self.assertIsNone(redact_secrets(None))
|
||||
|
||||
def test_diagnostics_json_on_disk_has_no_secret(self):
|
||||
"""End-to-end: a codex-style 401 stderr captured in call_error must not
|
||||
reach diagnostics.json verbatim once written to the staging dir."""
|
||||
import json
|
||||
from skillopt_sleep.staging import redact_secrets
|
||||
# Mirror exactly what cycle.py writes (the fields that carry free text).
|
||||
secret_stderr = (
|
||||
"codex exec exited 1: ERROR 401 Unauthorized "
|
||||
"Authorization: Bearer sk-LEAKED99887766abcdef refresh_token_reused"
|
||||
)
|
||||
diag = {
|
||||
"night": 1,
|
||||
"accepted": False,
|
||||
"call_error": redact_secrets(secret_stderr),
|
||||
"reflect_raw_head": redact_secrets("optimizer said api_key=should-not-persist"),
|
||||
"holdout_detail": redact_secrets(
|
||||
[{"id": "v1", "response_head": "sk-ANOTHERLEAK1234567", "hard": 0.0}]
|
||||
),
|
||||
}
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
p = os.path.join(tmp, "diagnostics.json")
|
||||
with open(p, "w", encoding="utf-8") as fh:
|
||||
json.dump(diag, fh, indent=2)
|
||||
with open(p, encoding="utf-8") as fh:
|
||||
on_disk = fh.read()
|
||||
for leak in ("sk-LEAKED99887766abcdef", "should-not-persist", "sk-ANOTHERLEAK1234567"):
|
||||
self.assertNotIn(leak, on_disk, f"secret {leak!r} leaked to diagnostics.json")
|
||||
# The diagnostic value is still there (we scrub, not drop).
|
||||
self.assertIn("401 Unauthorized", on_disk)
|
||||
self.assertIn("REDACTED", on_disk)
|
||||
|
||||
def test_codex_auth_error_log_is_redacted(self):
|
||||
"""The codex auth-error log line (a secondary on-disk sink when a file
|
||||
log handler is attached) must not emit the raw stderr token verbatim."""
|
||||
import logging
|
||||
from skillopt_sleep.backend import CodexCliBackend
|
||||
be = CodexCliBackend.__new__(CodexCliBackend) # no __init__ side effects
|
||||
be.timeout = 1
|
||||
be._AUTH_MARKERS = CodexCliBackend._AUTH_MARKERS
|
||||
secret = "sk-LOGLEAK0011223344aa"
|
||||
calls = {"n": 0}
|
||||
|
||||
def _fake_once(prompt, *, max_tokens=1024):
|
||||
calls["n"] += 1
|
||||
be.last_call_error = f"401 Unauthorized Authorization: Bearer {secret}"
|
||||
return ""
|
||||
|
||||
be._call_once = _fake_once
|
||||
with self.assertLogs("skillopt_sleep", level="ERROR") as cm:
|
||||
out = be._call("p", retries=3)
|
||||
self.assertEqual(out, "")
|
||||
self.assertEqual(calls["n"], 1, "auth error must fail fast, not retry")
|
||||
joined = "\n".join(cm.output)
|
||||
self.assertNotIn(secret, joined, "raw token leaked into the log line")
|
||||
self.assertIn("REDACTED", joined)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(verbosity=2)
|
||||
|
||||
Reference in New Issue
Block a user