mirror of
https://github.com/microsoft/SkillOpt.git
synced 2026-07-03 14:02:58 +08:00
test: add unit test suite for core utility modules
Add initial test infrastructure covering: - skillopt/utils/scoring.py (compute_score, skill_hash) - skillopt/utils/json_utils.py (extract_json, extract_json_array) - skillopt/types.py (Edit, Patch dataclass serialization) All tested functions are pure/deterministic with no LLM dependencies. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
112
tests/test_json_utils.py
Normal file
112
tests/test_json_utils.py
Normal file
@@ -0,0 +1,112 @@
|
||||
"""Tests for skillopt.utils.json_utils."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from skillopt.utils.json_utils import extract_json, extract_json_array
|
||||
|
||||
|
||||
class TestExtractJson:
|
||||
"""extract_json — extract a JSON object from LLM response text."""
|
||||
|
||||
def test_code_fence_json(self) -> None:
|
||||
text = 'Some text\n```json\n{"key": "value", "num": 42}\n```\nmore text'
|
||||
assert extract_json(text) == {"key": "value", "num": 42}
|
||||
|
||||
def test_bare_json_object(self) -> None:
|
||||
text = 'The result is {"answer": "yes", "score": 0.95}.'
|
||||
assert extract_json(text) == {"answer": "yes", "score": 0.95}
|
||||
|
||||
def test_code_fence_takes_precedence(self) -> None:
|
||||
"""If fence content parses successfully it should be preferred over bare."""
|
||||
text = (
|
||||
'```json\n{"source": "fence"}\n```\n'
|
||||
'Then also {"source": "bare"}'
|
||||
)
|
||||
assert extract_json(text) == {"source": "fence"}
|
||||
|
||||
def test_broken_fence_falls_back_to_bare(self) -> None:
|
||||
"""When fence content is invalid JSON, fall back to bare {...} match."""
|
||||
# Use invalid fence content that has no braces so the greedy bare
|
||||
# regex doesn't swallow the valid object.
|
||||
text = (
|
||||
'```json\nnot json at all\n```\n'
|
||||
'Answer: {"fallback": "yes"}'
|
||||
)
|
||||
assert extract_json(text) == {"fallback": "yes"}
|
||||
|
||||
def test_nested_json(self) -> None:
|
||||
text = '```json\n{"outer": {"inner": [1, 2, 3]}}\n```'
|
||||
assert extract_json(text) == {"outer": {"inner": [1, 2, 3]}}
|
||||
|
||||
def test_no_json_returns_none(self) -> None:
|
||||
assert extract_json("Just plain text without JSON.") is None
|
||||
|
||||
def test_empty_string_returns_none(self) -> None:
|
||||
assert extract_json("") is None
|
||||
|
||||
def test_malformed_json_returns_none(self) -> None:
|
||||
assert extract_json("{broken") is None
|
||||
|
||||
def test_empty_json_object(self) -> None:
|
||||
assert extract_json('{"empty": {}}') == {"empty": {}}
|
||||
|
||||
def test_json_with_escaped_chars(self) -> None:
|
||||
text = '{"message": "hello\\nworld"}'
|
||||
assert extract_json(text) == {"message": "hello\nworld"}
|
||||
|
||||
def test_only_fence_with_no_json_syntax(self) -> None:
|
||||
"""Code fences without valid JSON content should not match."""
|
||||
text = "```\nplain code block\n```"
|
||||
assert extract_json(text) is None
|
||||
|
||||
|
||||
class TestExtractJsonArray:
|
||||
"""extract_json_array — extract a JSON array from LLM response text."""
|
||||
|
||||
def test_code_fence_array(self) -> None:
|
||||
text = '```json\n["a", "b", "c"]\n```'
|
||||
assert extract_json_array(text) == ["a", "b", "c"]
|
||||
|
||||
def test_bare_array(self) -> None:
|
||||
text = "The items are [1, 2, 3]."
|
||||
assert extract_json_array(text) == [1, 2, 3]
|
||||
|
||||
def test_code_fence_takes_precedence(self) -> None:
|
||||
text = (
|
||||
'```json\n["from_fence"]\n```\n'
|
||||
'also ["from_bare"]'
|
||||
)
|
||||
assert extract_json_array(text) == ["from_fence"]
|
||||
|
||||
def test_broken_fence_falls_back_to_bare(self) -> None:
|
||||
text = (
|
||||
'```json\nnot json at all\n```\n'
|
||||
'values: [42]'
|
||||
)
|
||||
assert extract_json_array(text) == [42]
|
||||
|
||||
def test_nested_array(self) -> None:
|
||||
text = '```json\n[[1, 2], [3, 4]]\n```'
|
||||
assert extract_json_array(text) == [[1, 2], [3, 4]]
|
||||
|
||||
def test_no_array_returns_none(self) -> None:
|
||||
assert extract_json_array("no brackets here") is None
|
||||
|
||||
def test_empty_string_returns_none(self) -> None:
|
||||
assert extract_json_array("") is None
|
||||
|
||||
def test_malformed_array_returns_none(self) -> None:
|
||||
assert extract_json_array("[1, 2, ") is None
|
||||
|
||||
def test_empty_json_array(self) -> None:
|
||||
assert extract_json_array("[]") == []
|
||||
|
||||
def test_array_of_objects(self) -> None:
|
||||
text = '[{"x": 1}, {"x": 2}]'
|
||||
assert extract_json_array(text) == [{"x": 1}, {"x": 2}]
|
||||
|
||||
def test_object_not_confused_with_array(self) -> None:
|
||||
"""extract_json_array should not match a bare JSON object."""
|
||||
text = '{"this is an object": true}'
|
||||
assert extract_json_array(text) is None
|
||||
106
tests/test_scoring.py
Normal file
106
tests/test_scoring.py
Normal file
@@ -0,0 +1,106 @@
|
||||
"""Tests for skillopt.utils.scoring."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from skillopt.utils.scoring import compute_score, skill_hash
|
||||
|
||||
|
||||
class _ResultObject:
|
||||
"""Minimal object with hard/soft attrs (duck-typing path)."""
|
||||
|
||||
def __init__(self, hard: float, soft: float) -> None:
|
||||
self.hard = hard
|
||||
self.soft = soft
|
||||
|
||||
|
||||
class TestComputeScore:
|
||||
"""compute_score — hard/soft accuracy from a list of episode results."""
|
||||
|
||||
def test_empty_list_returns_zeros(self) -> None:
|
||||
assert compute_score([]) == (0.0, 0.0)
|
||||
|
||||
def test_dict_results_happy_path(self) -> None:
|
||||
results = [
|
||||
{"hard": 1, "soft": 0.8},
|
||||
{"hard": 0, "soft": 0.5},
|
||||
{"hard": 1, "soft": 0.9},
|
||||
]
|
||||
hard, soft = compute_score(results)
|
||||
assert hard == pytest.approx(2 / 3)
|
||||
assert soft == pytest.approx((0.8 + 0.5 + 0.9) / 3)
|
||||
|
||||
def test_object_results(self) -> None:
|
||||
results = [
|
||||
_ResultObject(1.0, 0.75),
|
||||
_ResultObject(0.0, 0.25),
|
||||
]
|
||||
hard, soft = compute_score(results)
|
||||
assert hard == 0.5
|
||||
assert soft == 0.5
|
||||
|
||||
def test_mixed_dict_and_object_results(self) -> None:
|
||||
results = [
|
||||
{"hard": 1, "soft": 1.0},
|
||||
_ResultObject(0, 0.0),
|
||||
]
|
||||
hard, soft = compute_score(results)
|
||||
assert hard == 0.5
|
||||
assert soft == 0.5
|
||||
|
||||
def test_missing_keys_default_to_zero(self) -> None:
|
||||
results = [
|
||||
{"hard": 1},
|
||||
{},
|
||||
]
|
||||
hard, soft = compute_score(results)
|
||||
assert hard == 0.5
|
||||
assert soft == 0.0
|
||||
|
||||
def test_single_result(self) -> None:
|
||||
results = [{"hard": 1, "soft": 0.95}]
|
||||
assert compute_score(results) == (1.0, 0.95)
|
||||
|
||||
def test_continuous_hard_values(self) -> None:
|
||||
"""Hard may be continuous 0.0-1.0 when using smoothed reward."""
|
||||
results = [
|
||||
{"hard": 0.75, "soft": 0.6},
|
||||
{"hard": 0.25, "soft": 0.4},
|
||||
]
|
||||
hard, soft = compute_score(results)
|
||||
assert hard == 0.5
|
||||
assert soft == 0.5
|
||||
|
||||
|
||||
class TestSkillHash:
|
||||
"""skill_hash — a short, deterministic hash of skill content."""
|
||||
|
||||
def test_deterministic(self) -> None:
|
||||
assert skill_hash("hello") == skill_hash("hello")
|
||||
|
||||
def test_different_input_produces_different_hash(self) -> None:
|
||||
assert skill_hash("hello") != skill_hash("world")
|
||||
|
||||
def test_empty_string(self) -> None:
|
||||
h = skill_hash("")
|
||||
assert isinstance(h, str)
|
||||
assert len(h) == 16
|
||||
|
||||
def test_output_length(self) -> None:
|
||||
h = skill_hash("some skill content here")
|
||||
assert len(h) == 16
|
||||
|
||||
def test_hex_characters(self) -> None:
|
||||
h = skill_hash("any content")
|
||||
assert all(c in "0123456789abcdef" for c in h)
|
||||
|
||||
def test_unicode_content(self) -> None:
|
||||
h1 = skill_hash("cafe")
|
||||
h2 = skill_hash("cafe")
|
||||
assert h1 == h2
|
||||
|
||||
def test_multiline_content(self) -> None:
|
||||
content = "line1\nline2\nline3"
|
||||
h = skill_hash(content)
|
||||
assert len(h) == 16
|
||||
assert isinstance(h, str)
|
||||
249
tests/test_types.py
Normal file
249
tests/test_types.py
Normal file
@@ -0,0 +1,249 @@
|
||||
"""Tests for skillopt.types — Edit and Patch dataclass serialization."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from skillopt.types import Edit, Patch
|
||||
|
||||
|
||||
# ── Edit ────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestEditCreation:
|
||||
"""Edit dataclass construction."""
|
||||
|
||||
def test_minimal_edit(self) -> None:
|
||||
e = Edit(op="append")
|
||||
assert e.op == "append"
|
||||
assert e.content == ""
|
||||
assert e.target == ""
|
||||
assert e.support_count is None
|
||||
assert e.source_type is None
|
||||
assert e.merge_level is None
|
||||
assert e.update_origin == ""
|
||||
assert e.update_target == ""
|
||||
|
||||
def test_full_edit(self) -> None:
|
||||
e = Edit(
|
||||
op="replace",
|
||||
content="new content",
|
||||
target="old content",
|
||||
support_count=5,
|
||||
source_type="failure",
|
||||
merge_level=2,
|
||||
update_origin="reflect",
|
||||
update_target="skill",
|
||||
)
|
||||
assert e.op == "replace"
|
||||
assert e.content == "new content"
|
||||
assert e.target == "old content"
|
||||
assert e.support_count == 5
|
||||
assert e.source_type == "failure"
|
||||
assert e.merge_level == 2
|
||||
assert e.update_origin == "reflect"
|
||||
assert e.update_target == "skill"
|
||||
|
||||
def test_insert_after_op(self) -> None:
|
||||
e = Edit(op="insert_after", content="insertion", target="anchor")
|
||||
assert e.op == "insert_after"
|
||||
assert e.content == "insertion"
|
||||
assert e.target == "anchor"
|
||||
|
||||
def test_delete_op(self) -> None:
|
||||
e = Edit(op="delete", target="thing_to_remove")
|
||||
assert e.op == "delete"
|
||||
assert e.target == "thing_to_remove"
|
||||
|
||||
|
||||
class TestEditRoundTrip:
|
||||
"""Edit.to_dict() / Edit.from_dict() round-trip."""
|
||||
|
||||
def test_round_trip_minimal(self) -> None:
|
||||
e = Edit(op="append")
|
||||
d = e.to_dict()
|
||||
restored = Edit.from_dict(d)
|
||||
assert restored == e
|
||||
|
||||
def test_round_trip_full(self) -> None:
|
||||
e = Edit(
|
||||
op="replace",
|
||||
content="new content",
|
||||
target="old content",
|
||||
support_count=3,
|
||||
source_type="success",
|
||||
merge_level=1,
|
||||
update_origin="meta_reflect",
|
||||
update_target="system_prompt",
|
||||
)
|
||||
d = e.to_dict()
|
||||
restored = Edit.from_dict(d)
|
||||
assert restored == e
|
||||
|
||||
def test_round_trip_delete_without_content(self) -> None:
|
||||
e = Edit(op="delete", target="obsolete_line")
|
||||
d = e.to_dict()
|
||||
restored = Edit.from_dict(d)
|
||||
assert restored == e
|
||||
|
||||
def test_optional_fields_omitted_when_default(self) -> None:
|
||||
e = Edit(op="append")
|
||||
d = e.to_dict()
|
||||
assert d == {"op": "append", "content": ""}
|
||||
# support_count, source_type, etc. should be absent
|
||||
assert "support_count" not in d
|
||||
assert "source_type" not in d
|
||||
assert "merge_level" not in d
|
||||
assert "target" not in d
|
||||
assert "update_origin" not in d
|
||||
assert "update_target" not in d
|
||||
|
||||
def test_from_dict_with_defaults(self) -> None:
|
||||
d = {"op": "replace", "content": "abc"}
|
||||
e = Edit.from_dict(d)
|
||||
assert e.op == "replace"
|
||||
assert e.content == "abc"
|
||||
assert e.target == ""
|
||||
assert e.support_count is None
|
||||
assert e.source_type is None
|
||||
|
||||
def test_from_dict_with_extra_keys(self) -> None:
|
||||
"""Extra keys in dict should be ignored."""
|
||||
d = {"op": "append", "content": "", "unknown_field": 42}
|
||||
e = Edit.from_dict(d)
|
||||
assert e.op == "append"
|
||||
assert not hasattr(e, "unknown_field")
|
||||
|
||||
|
||||
class TestEditEdgeCases:
|
||||
"""Edge cases around Edit."""
|
||||
|
||||
def test_support_count_zero(self) -> None:
|
||||
"""0 is a valid support_count and should be serialized."""
|
||||
e = Edit(op="append", support_count=0)
|
||||
d = e.to_dict()
|
||||
assert d["support_count"] == 0
|
||||
restored = Edit.from_dict(d)
|
||||
assert restored.support_count == 0
|
||||
|
||||
def test_merge_level_zero(self) -> None:
|
||||
e = Edit(op="replace", merge_level=0)
|
||||
d = e.to_dict()
|
||||
assert d["merge_level"] == 0
|
||||
restored = Edit.from_dict(d)
|
||||
assert restored.merge_level == 0
|
||||
|
||||
def test_empty_target_stays_empty(self) -> None:
|
||||
e = Edit(op="append", target="")
|
||||
d = e.to_dict()
|
||||
assert "target" not in d
|
||||
|
||||
|
||||
# ── Patch ───────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestPatchCreation:
|
||||
"""Patch dataclass construction."""
|
||||
|
||||
def test_empty_patch(self) -> None:
|
||||
p = Patch()
|
||||
assert p.edits == []
|
||||
assert p.reasoning == ""
|
||||
assert p.ranking_details is None
|
||||
|
||||
def test_patch_with_edits(self) -> None:
|
||||
edits = [
|
||||
Edit(op="append", content="step 1"),
|
||||
Edit(op="append", content="step 2"),
|
||||
]
|
||||
p = Patch(edits=edits, reasoning="Added two steps")
|
||||
assert len(p.edits) == 2
|
||||
assert p.reasoning == "Added two steps"
|
||||
|
||||
def test_patch_with_ranking_details(self) -> None:
|
||||
p = Patch(ranking_details={"score": 0.95, "rank": 1})
|
||||
assert p.ranking_details == {"score": 0.95, "rank": 1}
|
||||
|
||||
|
||||
class TestPatchRoundTrip:
|
||||
"""Patch.to_dict() / Patch.from_dict() round-trip."""
|
||||
|
||||
def test_round_trip_empty(self) -> None:
|
||||
p = Patch()
|
||||
d = p.to_dict()
|
||||
restored = Patch.from_dict(d)
|
||||
assert restored.edits == []
|
||||
assert restored.reasoning == ""
|
||||
assert restored.ranking_details is None
|
||||
|
||||
def test_round_trip_with_edits(self) -> None:
|
||||
edits = [
|
||||
Edit(op="insert_after", content="new step", target="existing step"),
|
||||
Edit(op="replace", content="updated", target="old"),
|
||||
]
|
||||
p = Patch(edits=edits, reasoning="Batch update")
|
||||
d = p.to_dict()
|
||||
restored = Patch.from_dict(d)
|
||||
assert len(restored.edits) == 2
|
||||
for original, restored_edit in zip(p.edits, restored.edits):
|
||||
assert isinstance(restored_edit, Edit)
|
||||
assert original == restored_edit
|
||||
assert restored.reasoning == "Batch update"
|
||||
assert restored.ranking_details is None
|
||||
|
||||
def test_round_trip_with_ranking_details(self) -> None:
|
||||
details = {"strategy": "rouge", "scores": [0.9, 0.8, 0.7]}
|
||||
p = Patch(
|
||||
edits=[Edit(op="append", content="a")],
|
||||
reasoning="selected best",
|
||||
ranking_details=details,
|
||||
)
|
||||
d = p.to_dict()
|
||||
restored = Patch.from_dict(d)
|
||||
assert restored.ranking_details == details
|
||||
|
||||
def test_to_dict_contains_reasoning_and_edits(self) -> None:
|
||||
p = Patch(edits=[Edit(op="append", content="test")], reasoning="reason")
|
||||
d = p.to_dict()
|
||||
assert "reasoning" in d
|
||||
assert "edits" in d
|
||||
assert isinstance(d["edits"], list)
|
||||
|
||||
def test_from_dict_preserves_edit_order(self) -> None:
|
||||
edits = [
|
||||
Edit(op="append", content="first"),
|
||||
Edit(op="insert_after", content="second", target="first"),
|
||||
Edit(op="append", content="third"),
|
||||
]
|
||||
p = Patch(edits=edits, reasoning="ordered")
|
||||
d = p.to_dict()
|
||||
restored = Patch.from_dict(d)
|
||||
assert restored.edits[0].content == "first"
|
||||
assert restored.edits[1].content == "second"
|
||||
assert restored.edits[2].content == "third"
|
||||
|
||||
|
||||
class TestPatchEdgeCases:
|
||||
"""Edge cases around Patch."""
|
||||
|
||||
def test_reasoning_empty_string(self) -> None:
|
||||
p = Patch(reasoning="")
|
||||
d = p.to_dict()
|
||||
assert d["reasoning"] == ""
|
||||
|
||||
def test_zero_edits(self) -> None:
|
||||
"""Patch with explicitly empty edit list."""
|
||||
p = Patch(edits=[])
|
||||
d = p.to_dict()
|
||||
assert d["edits"] == []
|
||||
|
||||
def test_nested_edit_from_dict_handles_dicts(self) -> None:
|
||||
"""from_dict should accept dicts in the 'edits' list."""
|
||||
d = {
|
||||
"reasoning": "test",
|
||||
"edits": [{"op": "append", "content": "hello"}],
|
||||
}
|
||||
p = Patch.from_dict(d)
|
||||
assert len(p.edits) == 1
|
||||
assert isinstance(p.edits[0], Edit)
|
||||
assert p.edits[0].op == "append"
|
||||
assert p.edits[0].content == "hello"
|
||||
Reference in New Issue
Block a user