mirror of
https://github.com/microsoft/SkillOpt.git
synced 2026-07-03 14:02:58 +08:00
Open-source-tool / research-code separation:
- git mv skillopt/sleep/ -> skillopt_sleep/ (top-level, sibling to the research
skillopt/ package). History preserved as renames.
- All imports skillopt.sleep.* -> skillopt_sleep.*.
- Vendor the validation gate into skillopt_sleep/gate.py (a self-contained copy
of skillopt.evaluation.gate). The engine now has ZERO dependency on the
research package — verified: grep finds no `from skillopt.` in skillopt_sleep/,
and consolidate's gate resolves to skillopt_sleep.gate.
- Plugin scripts/commands/skill call `-m skillopt_sleep`.
29 tests pass; `python -m skillopt_sleep` runs standalone.
Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
51 lines
2.1 KiB
Python
51 lines
2.1 KiB
Python
"""SkillOpt-Sleep — vendored validation gate.
|
|
|
|
This is a self-contained copy of the SkillOpt validation gate so the sleep
|
|
engine has ZERO dependency on the research package (skillopt/*). The research
|
|
repo's ``skillopt.evaluation.gate`` is the reference implementation and the two
|
|
are kept behaviourally identical; vendoring keeps this open-source tool
|
|
decoupled from the paper's experiment code.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class GateResult:
|
|
action: str # "accept_new_best" | "accept" | "reject"
|
|
current_skill: str
|
|
current_score: float
|
|
best_skill: str
|
|
best_score: float
|
|
best_step: int
|
|
|
|
|
|
def select_gate_score(hard: float, soft: float, metric: str = "hard",
|
|
mixed_weight: float = 0.5) -> float:
|
|
"""Project (hard, soft) onto a single comparison metric."""
|
|
if metric == "hard":
|
|
return float(hard)
|
|
if metric == "soft":
|
|
return float(soft)
|
|
if metric == "mixed":
|
|
w = max(0.0, min(1.0, float(mixed_weight)))
|
|
return (1.0 - w) * float(hard) + w * float(soft)
|
|
raise ValueError(f"unknown gate metric {metric!r}; expected hard/soft/mixed")
|
|
|
|
|
|
def evaluate_gate(candidate_skill: str, cand_hard: float, current_skill: str,
|
|
current_score: float, best_skill: str, best_score: float,
|
|
best_step: int, global_step: int, *, cand_soft: float = 0.0,
|
|
metric: str = "hard", mixed_weight: float = 0.5) -> GateResult:
|
|
"""Pure gate decision: compare candidate score to current/best."""
|
|
cand_score = select_gate_score(cand_hard, cand_soft, metric, mixed_weight)
|
|
if cand_score > current_score:
|
|
if cand_score > best_score:
|
|
return GateResult("accept_new_best", candidate_skill, cand_score,
|
|
candidate_skill, cand_score, global_step)
|
|
return GateResult("accept", candidate_skill, cand_score,
|
|
best_skill, best_score, best_step)
|
|
return GateResult("reject", current_skill, current_score,
|
|
best_skill, best_score, best_step)
|