Files
Yifan Yang b02ffc2c99 refactor(sleep): decouple engine to top-level skillopt_sleep/ (zero research dep)
Open-source-tool / research-code separation:
  - git mv skillopt/sleep/ -> skillopt_sleep/ (top-level, sibling to the research
    skillopt/ package). History preserved as renames.
  - All imports skillopt.sleep.* -> skillopt_sleep.*.
  - Vendor the validation gate into skillopt_sleep/gate.py (a self-contained copy
    of skillopt.evaluation.gate). The engine now has ZERO dependency on the
    research package — verified: grep finds no `from skillopt.` in skillopt_sleep/,
    and consolidate's gate resolves to skillopt_sleep.gate.
  - Plugin scripts/commands/skill call `-m skillopt_sleep`.

29 tests pass; `python -m skillopt_sleep` runs standalone.

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
2026-06-08 14:31:52 +00:00

51 lines
2.1 KiB
Python

"""SkillOpt-Sleep — vendored validation gate.
This is a self-contained copy of the SkillOpt validation gate so the sleep
engine has ZERO dependency on the research package (skillopt/*). The research
repo's ``skillopt.evaluation.gate`` is the reference implementation and the two
are kept behaviourally identical; vendoring keeps this open-source tool
decoupled from the paper's experiment code.
"""
from __future__ import annotations
from dataclasses import dataclass
@dataclass(frozen=True)
class GateResult:
action: str # "accept_new_best" | "accept" | "reject"
current_skill: str
current_score: float
best_skill: str
best_score: float
best_step: int
def select_gate_score(hard: float, soft: float, metric: str = "hard",
mixed_weight: float = 0.5) -> float:
"""Project (hard, soft) onto a single comparison metric."""
if metric == "hard":
return float(hard)
if metric == "soft":
return float(soft)
if metric == "mixed":
w = max(0.0, min(1.0, float(mixed_weight)))
return (1.0 - w) * float(hard) + w * float(soft)
raise ValueError(f"unknown gate metric {metric!r}; expected hard/soft/mixed")
def evaluate_gate(candidate_skill: str, cand_hard: float, current_skill: str,
current_score: float, best_skill: str, best_score: float,
best_step: int, global_step: int, *, cand_soft: float = 0.0,
metric: str = "hard", mixed_weight: float = 0.5) -> GateResult:
"""Pure gate decision: compare candidate score to current/best."""
cand_score = select_gate_score(cand_hard, cand_soft, metric, mixed_weight)
if cand_score > current_score:
if cand_score > best_score:
return GateResult("accept_new_best", candidate_skill, cand_score,
candidate_skill, cand_score, global_step)
return GateResult("accept", candidate_skill, cand_score,
best_skill, best_score, best_step)
return GateResult("reject", current_skill, current_score,
best_skill, best_score, best_step)