diff --git a/skillopt/utils/json_utils.py b/skillopt/utils/json_utils.py index 0fcc4a0..f1fab6e 100644 --- a/skillopt/utils/json_utils.py +++ b/skillopt/utils/json_utils.py @@ -71,6 +71,25 @@ def _top_level_brace_objects(text: str) -> list[str]: return spans +def _looks_json_like(span: str) -> bool: + """Heuristic: does ``span`` look like an intended JSON object (vs. prose)? + + A genuine JSON object's first non-space character after ``{`` is either ``"`` + (a string key) or ``}`` (an empty object). Prose pseudo-objects that the + repair pass would otherwise fabricate into bogus dicts — ``{op: delete}``, + ``{x: 1}`` quoted in single quotes or backticks, etc. — start with a bare + word and are rejected. This complements the string-aware scan, which only + skips *double*-quoted prose; single-quoted / backticked / unquoted prose + braces are caught here instead. Legitimate repair targets (trailing commas, + unescaped quotes inside string values) all begin with ``"`` and pass. + """ + inner = span.strip() + if not (inner.startswith("{") and inner.endswith("}")): + return False + after_brace = inner[1:].lstrip() + return after_brace[:1] in ('"', '}') + + def extract_json(text: str) -> dict | None: """Extract a JSON object from LLM response text. @@ -111,6 +130,12 @@ def extract_json(text: str) -> dict | None: # 0 or >1 top-level objects → too ambiguous to repair safely → None if not candidate: return None + # Final guard: only repair spans that actually look like an intended JSON + # object. Prose pseudo-objects in single quotes / backticks / bare text + # (e.g. `{op: delete}`) reach here because the scan only skips double-quoted + # prose; repairing them would fabricate a wrong dict (worse than None). + if not _looks_json_like(candidate): + return None try: from json_repair import repair_json except ModuleNotFoundError: diff --git a/tests/test_json_utils.py b/tests/test_json_utils.py index 1fa98c5..286efd7 100644 --- a/tests/test_json_utils.py +++ b/tests/test_json_utils.py @@ -94,6 +94,32 @@ class TestExtractJsonTolerantFallback: text = 'The literal string "{op: delete}" appears in prose, not as JSON.' assert extract_json(text) is None + def test_single_quoted_and_backticked_prose_returns_none(self) -> None: + """Regression: pseudo-JSON in single quotes / backticks / bare prose must + not be repaired into a bogus dict (the string-aware scan only skips + double-quoted prose; the JSON-like guard catches the rest).""" + for text in ( + "The literal string '{op: delete}' appears in prose, not JSON.", + "The inline code `{op: delete}` appears in prose, not JSON.", + "The literal string 'set it to {x: 1}' appears in prose.", + "A bare mapping {op: delete} written in prose.", + ): + assert extract_json(text) is None, text + + def test_json_string_values_with_quotes_still_repair(self) -> None: + """The JSON-like guard must NOT reject legitimate objects whose string + values contain single quotes or backticks.""" + pytest.importorskip("json_repair") + assert extract_json('{"msg": "it\'s a test",}') == {"msg": "it's a test"} + assert extract_json('{"code": "use `backtick` here",}') == {"code": "use `backtick` here"} + + def test_no_warning_on_quoted_prose(self, recwarn: pytest.WarningsRecorder) -> None: + """Prose pseudo-JSON (no real candidate) must not warn even without + json_repair installed — the JSON-like guard returns None before import.""" + assert extract_json("The inline code `{op: delete}` appears in prose.") is None + assert extract_json("A bare mapping {op: delete} in prose.") is None + assert [w for w in recwarn.list if issubclass(w.category, RuntimeWarning)] == [] + def test_no_warning_on_plain_text(self, recwarn: pytest.WarningsRecorder) -> None: """No json_repair warning for ordinary no-JSON replies (no candidate).""" assert extract_json("Just plain text without JSON.") is None