test: add unit test suite for core utility modules

Add initial test infrastructure covering: - skillopt/utils/scoring.py (compute_score, skill_hash) - skillopt/utils/json_utils.py (extract_json, extract_json_array) - skillopt/types.py (Edit, Patch dataclass serialization) All tested functions are pure/deterministic with no LLM dependencies. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-07-03 14:02:58 +08:00 · 2026-06-01 02:04:22 +08:00
parent 8ebede0efd
commit dd8cd993b5
4 changed files with 467 additions and 0 deletions
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/test_json_utils.py
+++ b/tests/test_json_utils.py
@@ -0,0 +1,112 @@
+"""Tests for skillopt.utils.json_utils."""
+from __future__ import annotations
+
+import pytest
+
+from skillopt.utils.json_utils import extract_json, extract_json_array
+
+
+class TestExtractJson:
+    """extract_json — extract a JSON object from LLM response text."""
+
+    def test_code_fence_json(self) -> None:
+        text = 'Some text\n```json\n{"key": "value", "num": 42}\n```\nmore text'
+        assert extract_json(text) == {"key": "value", "num": 42}
+
+    def test_bare_json_object(self) -> None:
+        text = 'The result is {"answer": "yes", "score": 0.95}.'
+        assert extract_json(text) == {"answer": "yes", "score": 0.95}
+
+    def test_code_fence_takes_precedence(self) -> None:
+        """If fence content parses successfully it should be preferred over bare."""
+        text = (
+            '```json\n{"source": "fence"}\n```\n'
+            'Then also {"source": "bare"}'
+        )
+        assert extract_json(text) == {"source": "fence"}
+
+    def test_broken_fence_falls_back_to_bare(self) -> None:
+        """When fence content is invalid JSON, fall back to bare {...} match."""
+        # Use invalid fence content that has no braces so the greedy bare
+        # regex doesn't swallow the valid object.
+        text = (
+            '```json\nnot json at all\n```\n'
+            'Answer: {"fallback": "yes"}'
+        )
+        assert extract_json(text) == {"fallback": "yes"}
+
+    def test_nested_json(self) -> None:
+        text = '```json\n{"outer": {"inner": [1, 2, 3]}}\n```'
+        assert extract_json(text) == {"outer": {"inner": [1, 2, 3]}}
+
+    def test_no_json_returns_none(self) -> None:
+        assert extract_json("Just plain text without JSON.") is None
+
+    def test_empty_string_returns_none(self) -> None:
+        assert extract_json("") is None
+
+    def test_malformed_json_returns_none(self) -> None:
+        assert extract_json("{broken") is None
+
+    def test_empty_json_object(self) -> None:
+        assert extract_json('{"empty": {}}') == {"empty": {}}
+
+    def test_json_with_escaped_chars(self) -> None:
+        text = '{"message": "hello\\nworld"}'
+        assert extract_json(text) == {"message": "hello\nworld"}
+
+    def test_only_fence_with_no_json_syntax(self) -> None:
+        """Code fences without valid JSON content should not match."""
+        text = "```\nplain code block\n```"
+        assert extract_json(text) is None
+
+
+class TestExtractJsonArray:
+    """extract_json_array — extract a JSON array from LLM response text."""
+
+    def test_code_fence_array(self) -> None:
+        text = '```json\n["a", "b", "c"]\n```'
+        assert extract_json_array(text) == ["a", "b", "c"]
+
+    def test_bare_array(self) -> None:
+        text = "The items are [1, 2, 3]."
+        assert extract_json_array(text) == [1, 2, 3]
+
+    def test_code_fence_takes_precedence(self) -> None:
+        text = (
+            '```json\n["from_fence"]\n```\n'
+            'also ["from_bare"]'
+        )
+        assert extract_json_array(text) == ["from_fence"]
+
+    def test_broken_fence_falls_back_to_bare(self) -> None:
+        text = (
+            '```json\nnot json at all\n```\n'
+            'values: [42]'
+        )
+        assert extract_json_array(text) == [42]
+
+    def test_nested_array(self) -> None:
+        text = '```json\n[[1, 2], [3, 4]]\n```'
+        assert extract_json_array(text) == [[1, 2], [3, 4]]
+
+    def test_no_array_returns_none(self) -> None:
+        assert extract_json_array("no brackets here") is None
+
+    def test_empty_string_returns_none(self) -> None:
+        assert extract_json_array("") is None
+
+    def test_malformed_array_returns_none(self) -> None:
+        assert extract_json_array("[1, 2, ") is None
+
+    def test_empty_json_array(self) -> None:
+        assert extract_json_array("[]") == []
+
+    def test_array_of_objects(self) -> None:
+        text = '[{"x": 1}, {"x": 2}]'
+        assert extract_json_array(text) == [{"x": 1}, {"x": 2}]
+
+    def test_object_not_confused_with_array(self) -> None:
+        """extract_json_array should not match a bare JSON object."""
+        text = '{"this is an object": true}'
+        assert extract_json_array(text) is None
--- a/tests/test_scoring.py
+++ b/tests/test_scoring.py
@@ -0,0 +1,106 @@
+"""Tests for skillopt.utils.scoring."""
+from __future__ import annotations
+
+import pytest
+
+from skillopt.utils.scoring import compute_score, skill_hash
+
+
+class _ResultObject:
+    """Minimal object with hard/soft attrs (duck-typing path)."""
+
+    def __init__(self, hard: float, soft: float) -> None:
+        self.hard = hard
+        self.soft = soft
+
+
+class TestComputeScore:
+    """compute_score — hard/soft accuracy from a list of episode results."""
+
+    def test_empty_list_returns_zeros(self) -> None:
+        assert compute_score([]) == (0.0, 0.0)
+
+    def test_dict_results_happy_path(self) -> None:
+        results = [
+            {"hard": 1, "soft": 0.8},
+            {"hard": 0, "soft": 0.5},
+            {"hard": 1, "soft": 0.9},
+        ]
+        hard, soft = compute_score(results)
+        assert hard == pytest.approx(2 / 3)
+        assert soft == pytest.approx((0.8 + 0.5 + 0.9) / 3)
+
+    def test_object_results(self) -> None:
+        results = [
+            _ResultObject(1.0, 0.75),
+            _ResultObject(0.0, 0.25),
+        ]
+        hard, soft = compute_score(results)
+        assert hard == 0.5
+        assert soft == 0.5
+
+    def test_mixed_dict_and_object_results(self) -> None:
+        results = [
+            {"hard": 1, "soft": 1.0},
+            _ResultObject(0, 0.0),
+        ]
+        hard, soft = compute_score(results)
+        assert hard == 0.5
+        assert soft == 0.5
+
+    def test_missing_keys_default_to_zero(self) -> None:
+        results = [
+            {"hard": 1},
+            {},
+        ]
+        hard, soft = compute_score(results)
+        assert hard == 0.5
+        assert soft == 0.0
+
+    def test_single_result(self) -> None:
+        results = [{"hard": 1, "soft": 0.95}]
+        assert compute_score(results) == (1.0, 0.95)
+
+    def test_continuous_hard_values(self) -> None:
+        """Hard may be continuous 0.0-1.0 when using smoothed reward."""
+        results = [
+            {"hard": 0.75, "soft": 0.6},
+            {"hard": 0.25, "soft": 0.4},
+        ]
+        hard, soft = compute_score(results)
+        assert hard == 0.5
+        assert soft == 0.5
+
+
+class TestSkillHash:
+    """skill_hash — a short, deterministic hash of skill content."""
+
+    def test_deterministic(self) -> None:
+        assert skill_hash("hello") == skill_hash("hello")
+
+    def test_different_input_produces_different_hash(self) -> None:
+        assert skill_hash("hello") != skill_hash("world")
+
+    def test_empty_string(self) -> None:
+        h = skill_hash("")
+        assert isinstance(h, str)
+        assert len(h) == 16
+
+    def test_output_length(self) -> None:
+        h = skill_hash("some skill content here")
+        assert len(h) == 16
+
+    def test_hex_characters(self) -> None:
+        h = skill_hash("any content")
+        assert all(c in "0123456789abcdef" for c in h)
+
+    def test_unicode_content(self) -> None:
+        h1 = skill_hash("cafe")
+        h2 = skill_hash("cafe")
+        assert h1 == h2
+
+    def test_multiline_content(self) -> None:
+        content = "line1\nline2\nline3"
+        h = skill_hash(content)
+        assert len(h) == 16
+        assert isinstance(h, str)
--- a/tests/test_types.py
+++ b/tests/test_types.py
@@ -0,0 +1,249 @@
+"""Tests for skillopt.types — Edit and Patch dataclass serialization."""
+from __future__ import annotations
+
+import pytest
+
+from skillopt.types import Edit, Patch
+
+
+# ── Edit ────────────────────────────────────────────────────────────────────
+
+
+class TestEditCreation:
+    """Edit dataclass construction."""
+
+    def test_minimal_edit(self) -> None:
+        e = Edit(op="append")
+        assert e.op == "append"
+        assert e.content == ""
+        assert e.target == ""
+        assert e.support_count is None
+        assert e.source_type is None
+        assert e.merge_level is None
+        assert e.update_origin == ""
+        assert e.update_target == ""
+
+    def test_full_edit(self) -> None:
+        e = Edit(
+            op="replace",
+            content="new content",
+            target="old content",
+            support_count=5,
+            source_type="failure",
+            merge_level=2,
+            update_origin="reflect",
+            update_target="skill",
+        )
+        assert e.op == "replace"
+        assert e.content == "new content"
+        assert e.target == "old content"
+        assert e.support_count == 5
+        assert e.source_type == "failure"
+        assert e.merge_level == 2
+        assert e.update_origin == "reflect"
+        assert e.update_target == "skill"
+
+    def test_insert_after_op(self) -> None:
+        e = Edit(op="insert_after", content="insertion", target="anchor")
+        assert e.op == "insert_after"
+        assert e.content == "insertion"
+        assert e.target == "anchor"
+
+    def test_delete_op(self) -> None:
+        e = Edit(op="delete", target="thing_to_remove")
+        assert e.op == "delete"
+        assert e.target == "thing_to_remove"
+
+
+class TestEditRoundTrip:
+    """Edit.to_dict() / Edit.from_dict() round-trip."""
+
+    def test_round_trip_minimal(self) -> None:
+        e = Edit(op="append")
+        d = e.to_dict()
+        restored = Edit.from_dict(d)
+        assert restored == e
+
+    def test_round_trip_full(self) -> None:
+        e = Edit(
+            op="replace",
+            content="new content",
+            target="old content",
+            support_count=3,
+            source_type="success",
+            merge_level=1,
+            update_origin="meta_reflect",
+            update_target="system_prompt",
+        )
+        d = e.to_dict()
+        restored = Edit.from_dict(d)
+        assert restored == e
+
+    def test_round_trip_delete_without_content(self) -> None:
+        e = Edit(op="delete", target="obsolete_line")
+        d = e.to_dict()
+        restored = Edit.from_dict(d)
+        assert restored == e
+
+    def test_optional_fields_omitted_when_default(self) -> None:
+        e = Edit(op="append")
+        d = e.to_dict()
+        assert d == {"op": "append", "content": ""}
+        # support_count, source_type, etc. should be absent
+        assert "support_count" not in d
+        assert "source_type" not in d
+        assert "merge_level" not in d
+        assert "target" not in d
+        assert "update_origin" not in d
+        assert "update_target" not in d
+
+    def test_from_dict_with_defaults(self) -> None:
+        d = {"op": "replace", "content": "abc"}
+        e = Edit.from_dict(d)
+        assert e.op == "replace"
+        assert e.content == "abc"
+        assert e.target == ""
+        assert e.support_count is None
+        assert e.source_type is None
+
+    def test_from_dict_with_extra_keys(self) -> None:
+        """Extra keys in dict should be ignored."""
+        d = {"op": "append", "content": "", "unknown_field": 42}
+        e = Edit.from_dict(d)
+        assert e.op == "append"
+        assert not hasattr(e, "unknown_field")
+
+
+class TestEditEdgeCases:
+    """Edge cases around Edit."""
+
+    def test_support_count_zero(self) -> None:
+        """0 is a valid support_count and should be serialized."""
+        e = Edit(op="append", support_count=0)
+        d = e.to_dict()
+        assert d["support_count"] == 0
+        restored = Edit.from_dict(d)
+        assert restored.support_count == 0
+
+    def test_merge_level_zero(self) -> None:
+        e = Edit(op="replace", merge_level=0)
+        d = e.to_dict()
+        assert d["merge_level"] == 0
+        restored = Edit.from_dict(d)
+        assert restored.merge_level == 0
+
+    def test_empty_target_stays_empty(self) -> None:
+        e = Edit(op="append", target="")
+        d = e.to_dict()
+        assert "target" not in d
+
+
+# ── Patch ───────────────────────────────────────────────────────────────────
+
+
+class TestPatchCreation:
+    """Patch dataclass construction."""
+
+    def test_empty_patch(self) -> None:
+        p = Patch()
+        assert p.edits == []
+        assert p.reasoning == ""
+        assert p.ranking_details is None
+
+    def test_patch_with_edits(self) -> None:
+        edits = [
+            Edit(op="append", content="step 1"),
+            Edit(op="append", content="step 2"),
+        ]
+        p = Patch(edits=edits, reasoning="Added two steps")
+        assert len(p.edits) == 2
+        assert p.reasoning == "Added two steps"
+
+    def test_patch_with_ranking_details(self) -> None:
+        p = Patch(ranking_details={"score": 0.95, "rank": 1})
+        assert p.ranking_details == {"score": 0.95, "rank": 1}
+
+
+class TestPatchRoundTrip:
+    """Patch.to_dict() / Patch.from_dict() round-trip."""
+
+    def test_round_trip_empty(self) -> None:
+        p = Patch()
+        d = p.to_dict()
+        restored = Patch.from_dict(d)
+        assert restored.edits == []
+        assert restored.reasoning == ""
+        assert restored.ranking_details is None
+
+    def test_round_trip_with_edits(self) -> None:
+        edits = [
+            Edit(op="insert_after", content="new step", target="existing step"),
+            Edit(op="replace", content="updated", target="old"),
+        ]
+        p = Patch(edits=edits, reasoning="Batch update")
+        d = p.to_dict()
+        restored = Patch.from_dict(d)
+        assert len(restored.edits) == 2
+        for original, restored_edit in zip(p.edits, restored.edits):
+            assert isinstance(restored_edit, Edit)
+            assert original == restored_edit
+        assert restored.reasoning == "Batch update"
+        assert restored.ranking_details is None
+
+    def test_round_trip_with_ranking_details(self) -> None:
+        details = {"strategy": "rouge", "scores": [0.9, 0.8, 0.7]}
+        p = Patch(
+            edits=[Edit(op="append", content="a")],
+            reasoning="selected best",
+            ranking_details=details,
+        )
+        d = p.to_dict()
+        restored = Patch.from_dict(d)
+        assert restored.ranking_details == details
+
+    def test_to_dict_contains_reasoning_and_edits(self) -> None:
+        p = Patch(edits=[Edit(op="append", content="test")], reasoning="reason")
+        d = p.to_dict()
+        assert "reasoning" in d
+        assert "edits" in d
+        assert isinstance(d["edits"], list)
+
+    def test_from_dict_preserves_edit_order(self) -> None:
+        edits = [
+            Edit(op="append", content="first"),
+            Edit(op="insert_after", content="second", target="first"),
+            Edit(op="append", content="third"),
+        ]
+        p = Patch(edits=edits, reasoning="ordered")
+        d = p.to_dict()
+        restored = Patch.from_dict(d)
+        assert restored.edits[0].content == "first"
+        assert restored.edits[1].content == "second"
+        assert restored.edits[2].content == "third"
+
+
+class TestPatchEdgeCases:
+    """Edge cases around Patch."""
+
+    def test_reasoning_empty_string(self) -> None:
+        p = Patch(reasoning="")
+        d = p.to_dict()
+        assert d["reasoning"] == ""
+
+    def test_zero_edits(self) -> None:
+        """Patch with explicitly empty edit list."""
+        p = Patch(edits=[])
+        d = p.to_dict()
+        assert d["edits"] == []
+
+    def test_nested_edit_from_dict_handles_dicts(self) -> None:
+        """from_dict should accept dicts in the 'edits' list."""
+        d = {
+            "reasoning": "test",
+            "edits": [{"op": "append", "content": "hello"}],
+        }
+        p = Patch.from_dict(d)
+        assert len(p.edits) == 1
+        assert isinstance(p.edits[0], Edit)
+        assert p.edits[0].op == "append"
+        assert p.edits[0].content == "hello"