Files
microsoft-SkillOpt/tests/test_scoring.py
Claude Code Agent dd8cd993b5 test: add unit test suite for core utility modules
Add initial test infrastructure covering:
- skillopt/utils/scoring.py (compute_score, skill_hash)
- skillopt/utils/json_utils.py (extract_json, extract_json_array)
- skillopt/types.py (Edit, Patch dataclass serialization)

All tested functions are pure/deterministic with no LLM dependencies.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-01 02:04:22 +08:00

107 lines
3.1 KiB
Python

"""Tests for skillopt.utils.scoring."""
from __future__ import annotations
import pytest
from skillopt.utils.scoring import compute_score, skill_hash
class _ResultObject:
"""Minimal object with hard/soft attrs (duck-typing path)."""
def __init__(self, hard: float, soft: float) -> None:
self.hard = hard
self.soft = soft
class TestComputeScore:
"""compute_score — hard/soft accuracy from a list of episode results."""
def test_empty_list_returns_zeros(self) -> None:
assert compute_score([]) == (0.0, 0.0)
def test_dict_results_happy_path(self) -> None:
results = [
{"hard": 1, "soft": 0.8},
{"hard": 0, "soft": 0.5},
{"hard": 1, "soft": 0.9},
]
hard, soft = compute_score(results)
assert hard == pytest.approx(2 / 3)
assert soft == pytest.approx((0.8 + 0.5 + 0.9) / 3)
def test_object_results(self) -> None:
results = [
_ResultObject(1.0, 0.75),
_ResultObject(0.0, 0.25),
]
hard, soft = compute_score(results)
assert hard == 0.5
assert soft == 0.5
def test_mixed_dict_and_object_results(self) -> None:
results = [
{"hard": 1, "soft": 1.0},
_ResultObject(0, 0.0),
]
hard, soft = compute_score(results)
assert hard == 0.5
assert soft == 0.5
def test_missing_keys_default_to_zero(self) -> None:
results = [
{"hard": 1},
{},
]
hard, soft = compute_score(results)
assert hard == 0.5
assert soft == 0.0
def test_single_result(self) -> None:
results = [{"hard": 1, "soft": 0.95}]
assert compute_score(results) == (1.0, 0.95)
def test_continuous_hard_values(self) -> None:
"""Hard may be continuous 0.0-1.0 when using smoothed reward."""
results = [
{"hard": 0.75, "soft": 0.6},
{"hard": 0.25, "soft": 0.4},
]
hard, soft = compute_score(results)
assert hard == 0.5
assert soft == 0.5
class TestSkillHash:
"""skill_hash — a short, deterministic hash of skill content."""
def test_deterministic(self) -> None:
assert skill_hash("hello") == skill_hash("hello")
def test_different_input_produces_different_hash(self) -> None:
assert skill_hash("hello") != skill_hash("world")
def test_empty_string(self) -> None:
h = skill_hash("")
assert isinstance(h, str)
assert len(h) == 16
def test_output_length(self) -> None:
h = skill_hash("some skill content here")
assert len(h) == 16
def test_hex_characters(self) -> None:
h = skill_hash("any content")
assert all(c in "0123456789abcdef" for c in h)
def test_unicode_content(self) -> None:
h1 = skill_hash("cafe")
h2 = skill_hash("cafe")
assert h1 == h2
def test_multiline_content(self) -> None:
content = "line1\nline2\nline3"
h = skill_hash(content)
assert len(h) == 16
assert isinstance(h, str)