test: add unit test suite for core utility modules

Add initial test infrastructure covering:
- skillopt/utils/scoring.py (compute_score, skill_hash)
- skillopt/utils/json_utils.py (extract_json, extract_json_array)
- skillopt/types.py (Edit, Patch dataclass serialization)

All tested functions are pure/deterministic with no LLM dependencies.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Claude Code Agent
2026-06-01 02:04:22 +08:00
parent 8ebede0efd
commit dd8cd993b5
4 changed files with 467 additions and 0 deletions

0
tests/__init__.py Normal file
View File

112
tests/test_json_utils.py Normal file
View File

@@ -0,0 +1,112 @@
"""Tests for skillopt.utils.json_utils."""
from __future__ import annotations
import pytest
from skillopt.utils.json_utils import extract_json, extract_json_array
class TestExtractJson:
"""extract_json — extract a JSON object from LLM response text."""
def test_code_fence_json(self) -> None:
text = 'Some text\n```json\n{"key": "value", "num": 42}\n```\nmore text'
assert extract_json(text) == {"key": "value", "num": 42}
def test_bare_json_object(self) -> None:
text = 'The result is {"answer": "yes", "score": 0.95}.'
assert extract_json(text) == {"answer": "yes", "score": 0.95}
def test_code_fence_takes_precedence(self) -> None:
"""If fence content parses successfully it should be preferred over bare."""
text = (
'```json\n{"source": "fence"}\n```\n'
'Then also {"source": "bare"}'
)
assert extract_json(text) == {"source": "fence"}
def test_broken_fence_falls_back_to_bare(self) -> None:
"""When fence content is invalid JSON, fall back to bare {...} match."""
# Use invalid fence content that has no braces so the greedy bare
# regex doesn't swallow the valid object.
text = (
'```json\nnot json at all\n```\n'
'Answer: {"fallback": "yes"}'
)
assert extract_json(text) == {"fallback": "yes"}
def test_nested_json(self) -> None:
text = '```json\n{"outer": {"inner": [1, 2, 3]}}\n```'
assert extract_json(text) == {"outer": {"inner": [1, 2, 3]}}
def test_no_json_returns_none(self) -> None:
assert extract_json("Just plain text without JSON.") is None
def test_empty_string_returns_none(self) -> None:
assert extract_json("") is None
def test_malformed_json_returns_none(self) -> None:
assert extract_json("{broken") is None
def test_empty_json_object(self) -> None:
assert extract_json('{"empty": {}}') == {"empty": {}}
def test_json_with_escaped_chars(self) -> None:
text = '{"message": "hello\\nworld"}'
assert extract_json(text) == {"message": "hello\nworld"}
def test_only_fence_with_no_json_syntax(self) -> None:
"""Code fences without valid JSON content should not match."""
text = "```\nplain code block\n```"
assert extract_json(text) is None
class TestExtractJsonArray:
"""extract_json_array — extract a JSON array from LLM response text."""
def test_code_fence_array(self) -> None:
text = '```json\n["a", "b", "c"]\n```'
assert extract_json_array(text) == ["a", "b", "c"]
def test_bare_array(self) -> None:
text = "The items are [1, 2, 3]."
assert extract_json_array(text) == [1, 2, 3]
def test_code_fence_takes_precedence(self) -> None:
text = (
'```json\n["from_fence"]\n```\n'
'also ["from_bare"]'
)
assert extract_json_array(text) == ["from_fence"]
def test_broken_fence_falls_back_to_bare(self) -> None:
text = (
'```json\nnot json at all\n```\n'
'values: [42]'
)
assert extract_json_array(text) == [42]
def test_nested_array(self) -> None:
text = '```json\n[[1, 2], [3, 4]]\n```'
assert extract_json_array(text) == [[1, 2], [3, 4]]
def test_no_array_returns_none(self) -> None:
assert extract_json_array("no brackets here") is None
def test_empty_string_returns_none(self) -> None:
assert extract_json_array("") is None
def test_malformed_array_returns_none(self) -> None:
assert extract_json_array("[1, 2, ") is None
def test_empty_json_array(self) -> None:
assert extract_json_array("[]") == []
def test_array_of_objects(self) -> None:
text = '[{"x": 1}, {"x": 2}]'
assert extract_json_array(text) == [{"x": 1}, {"x": 2}]
def test_object_not_confused_with_array(self) -> None:
"""extract_json_array should not match a bare JSON object."""
text = '{"this is an object": true}'
assert extract_json_array(text) is None

106
tests/test_scoring.py Normal file
View File

@@ -0,0 +1,106 @@
"""Tests for skillopt.utils.scoring."""
from __future__ import annotations
import pytest
from skillopt.utils.scoring import compute_score, skill_hash
class _ResultObject:
"""Minimal object with hard/soft attrs (duck-typing path)."""
def __init__(self, hard: float, soft: float) -> None:
self.hard = hard
self.soft = soft
class TestComputeScore:
"""compute_score — hard/soft accuracy from a list of episode results."""
def test_empty_list_returns_zeros(self) -> None:
assert compute_score([]) == (0.0, 0.0)
def test_dict_results_happy_path(self) -> None:
results = [
{"hard": 1, "soft": 0.8},
{"hard": 0, "soft": 0.5},
{"hard": 1, "soft": 0.9},
]
hard, soft = compute_score(results)
assert hard == pytest.approx(2 / 3)
assert soft == pytest.approx((0.8 + 0.5 + 0.9) / 3)
def test_object_results(self) -> None:
results = [
_ResultObject(1.0, 0.75),
_ResultObject(0.0, 0.25),
]
hard, soft = compute_score(results)
assert hard == 0.5
assert soft == 0.5
def test_mixed_dict_and_object_results(self) -> None:
results = [
{"hard": 1, "soft": 1.0},
_ResultObject(0, 0.0),
]
hard, soft = compute_score(results)
assert hard == 0.5
assert soft == 0.5
def test_missing_keys_default_to_zero(self) -> None:
results = [
{"hard": 1},
{},
]
hard, soft = compute_score(results)
assert hard == 0.5
assert soft == 0.0
def test_single_result(self) -> None:
results = [{"hard": 1, "soft": 0.95}]
assert compute_score(results) == (1.0, 0.95)
def test_continuous_hard_values(self) -> None:
"""Hard may be continuous 0.0-1.0 when using smoothed reward."""
results = [
{"hard": 0.75, "soft": 0.6},
{"hard": 0.25, "soft": 0.4},
]
hard, soft = compute_score(results)
assert hard == 0.5
assert soft == 0.5
class TestSkillHash:
"""skill_hash — a short, deterministic hash of skill content."""
def test_deterministic(self) -> None:
assert skill_hash("hello") == skill_hash("hello")
def test_different_input_produces_different_hash(self) -> None:
assert skill_hash("hello") != skill_hash("world")
def test_empty_string(self) -> None:
h = skill_hash("")
assert isinstance(h, str)
assert len(h) == 16
def test_output_length(self) -> None:
h = skill_hash("some skill content here")
assert len(h) == 16
def test_hex_characters(self) -> None:
h = skill_hash("any content")
assert all(c in "0123456789abcdef" for c in h)
def test_unicode_content(self) -> None:
h1 = skill_hash("cafe")
h2 = skill_hash("cafe")
assert h1 == h2
def test_multiline_content(self) -> None:
content = "line1\nline2\nline3"
h = skill_hash(content)
assert len(h) == 16
assert isinstance(h, str)

249
tests/test_types.py Normal file
View File

@@ -0,0 +1,249 @@
"""Tests for skillopt.types — Edit and Patch dataclass serialization."""
from __future__ import annotations
import pytest
from skillopt.types import Edit, Patch
# ── Edit ────────────────────────────────────────────────────────────────────
class TestEditCreation:
"""Edit dataclass construction."""
def test_minimal_edit(self) -> None:
e = Edit(op="append")
assert e.op == "append"
assert e.content == ""
assert e.target == ""
assert e.support_count is None
assert e.source_type is None
assert e.merge_level is None
assert e.update_origin == ""
assert e.update_target == ""
def test_full_edit(self) -> None:
e = Edit(
op="replace",
content="new content",
target="old content",
support_count=5,
source_type="failure",
merge_level=2,
update_origin="reflect",
update_target="skill",
)
assert e.op == "replace"
assert e.content == "new content"
assert e.target == "old content"
assert e.support_count == 5
assert e.source_type == "failure"
assert e.merge_level == 2
assert e.update_origin == "reflect"
assert e.update_target == "skill"
def test_insert_after_op(self) -> None:
e = Edit(op="insert_after", content="insertion", target="anchor")
assert e.op == "insert_after"
assert e.content == "insertion"
assert e.target == "anchor"
def test_delete_op(self) -> None:
e = Edit(op="delete", target="thing_to_remove")
assert e.op == "delete"
assert e.target == "thing_to_remove"
class TestEditRoundTrip:
"""Edit.to_dict() / Edit.from_dict() round-trip."""
def test_round_trip_minimal(self) -> None:
e = Edit(op="append")
d = e.to_dict()
restored = Edit.from_dict(d)
assert restored == e
def test_round_trip_full(self) -> None:
e = Edit(
op="replace",
content="new content",
target="old content",
support_count=3,
source_type="success",
merge_level=1,
update_origin="meta_reflect",
update_target="system_prompt",
)
d = e.to_dict()
restored = Edit.from_dict(d)
assert restored == e
def test_round_trip_delete_without_content(self) -> None:
e = Edit(op="delete", target="obsolete_line")
d = e.to_dict()
restored = Edit.from_dict(d)
assert restored == e
def test_optional_fields_omitted_when_default(self) -> None:
e = Edit(op="append")
d = e.to_dict()
assert d == {"op": "append", "content": ""}
# support_count, source_type, etc. should be absent
assert "support_count" not in d
assert "source_type" not in d
assert "merge_level" not in d
assert "target" not in d
assert "update_origin" not in d
assert "update_target" not in d
def test_from_dict_with_defaults(self) -> None:
d = {"op": "replace", "content": "abc"}
e = Edit.from_dict(d)
assert e.op == "replace"
assert e.content == "abc"
assert e.target == ""
assert e.support_count is None
assert e.source_type is None
def test_from_dict_with_extra_keys(self) -> None:
"""Extra keys in dict should be ignored."""
d = {"op": "append", "content": "", "unknown_field": 42}
e = Edit.from_dict(d)
assert e.op == "append"
assert not hasattr(e, "unknown_field")
class TestEditEdgeCases:
"""Edge cases around Edit."""
def test_support_count_zero(self) -> None:
"""0 is a valid support_count and should be serialized."""
e = Edit(op="append", support_count=0)
d = e.to_dict()
assert d["support_count"] == 0
restored = Edit.from_dict(d)
assert restored.support_count == 0
def test_merge_level_zero(self) -> None:
e = Edit(op="replace", merge_level=0)
d = e.to_dict()
assert d["merge_level"] == 0
restored = Edit.from_dict(d)
assert restored.merge_level == 0
def test_empty_target_stays_empty(self) -> None:
e = Edit(op="append", target="")
d = e.to_dict()
assert "target" not in d
# ── Patch ───────────────────────────────────────────────────────────────────
class TestPatchCreation:
"""Patch dataclass construction."""
def test_empty_patch(self) -> None:
p = Patch()
assert p.edits == []
assert p.reasoning == ""
assert p.ranking_details is None
def test_patch_with_edits(self) -> None:
edits = [
Edit(op="append", content="step 1"),
Edit(op="append", content="step 2"),
]
p = Patch(edits=edits, reasoning="Added two steps")
assert len(p.edits) == 2
assert p.reasoning == "Added two steps"
def test_patch_with_ranking_details(self) -> None:
p = Patch(ranking_details={"score": 0.95, "rank": 1})
assert p.ranking_details == {"score": 0.95, "rank": 1}
class TestPatchRoundTrip:
"""Patch.to_dict() / Patch.from_dict() round-trip."""
def test_round_trip_empty(self) -> None:
p = Patch()
d = p.to_dict()
restored = Patch.from_dict(d)
assert restored.edits == []
assert restored.reasoning == ""
assert restored.ranking_details is None
def test_round_trip_with_edits(self) -> None:
edits = [
Edit(op="insert_after", content="new step", target="existing step"),
Edit(op="replace", content="updated", target="old"),
]
p = Patch(edits=edits, reasoning="Batch update")
d = p.to_dict()
restored = Patch.from_dict(d)
assert len(restored.edits) == 2
for original, restored_edit in zip(p.edits, restored.edits):
assert isinstance(restored_edit, Edit)
assert original == restored_edit
assert restored.reasoning == "Batch update"
assert restored.ranking_details is None
def test_round_trip_with_ranking_details(self) -> None:
details = {"strategy": "rouge", "scores": [0.9, 0.8, 0.7]}
p = Patch(
edits=[Edit(op="append", content="a")],
reasoning="selected best",
ranking_details=details,
)
d = p.to_dict()
restored = Patch.from_dict(d)
assert restored.ranking_details == details
def test_to_dict_contains_reasoning_and_edits(self) -> None:
p = Patch(edits=[Edit(op="append", content="test")], reasoning="reason")
d = p.to_dict()
assert "reasoning" in d
assert "edits" in d
assert isinstance(d["edits"], list)
def test_from_dict_preserves_edit_order(self) -> None:
edits = [
Edit(op="append", content="first"),
Edit(op="insert_after", content="second", target="first"),
Edit(op="append", content="third"),
]
p = Patch(edits=edits, reasoning="ordered")
d = p.to_dict()
restored = Patch.from_dict(d)
assert restored.edits[0].content == "first"
assert restored.edits[1].content == "second"
assert restored.edits[2].content == "third"
class TestPatchEdgeCases:
"""Edge cases around Patch."""
def test_reasoning_empty_string(self) -> None:
p = Patch(reasoning="")
d = p.to_dict()
assert d["reasoning"] == ""
def test_zero_edits(self) -> None:
"""Patch with explicitly empty edit list."""
p = Patch(edits=[])
d = p.to_dict()
assert d["edits"] == []
def test_nested_edit_from_dict_handles_dicts(self) -> None:
"""from_dict should accept dicts in the 'edits' list."""
d = {
"reasoning": "test",
"edits": [{"op": "append", "content": "hello"}],
}
p = Patch.from_dict(d)
assert len(p.edits) == 1
assert isinstance(p.edits[0], Edit)
assert p.edits[0].op == "append"
assert p.edits[0].content == "hello"