mirror of
https://github.com/microsoft/SkillOpt.git
synced 2026-07-03 14:02:58 +08:00
feat(sleep): sweep 'direct' plan uses strong-optimizer/weak-target dual config
The default sweep direct plan now uses a DualBackend (Sonnet optimizer proposes edits, Haiku target runs tasks) — the SkillOpt-faithful and more reliable setup, since a weak self-optimizing model (Haiku-as-optimizer) produced flaky JSON. report.py renders the optimizer->target pairing in the direct table. Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
This commit is contained in:
@@ -33,7 +33,7 @@ def _fmt_model(backend: str, model: str) -> str:
|
||||
|
||||
|
||||
def render(rows: List[Dict[str, Any]]) -> str:
|
||||
direct = [r for r in rows if r.get("cfg", {}).get("kind") == "direct" and "error" not in r]
|
||||
direct = [r for r in rows if r.get("cfg", {}).get("kind") in ("direct", "dual") and "error" not in r]
|
||||
transfer = [r for r in rows if r.get("cfg", {}).get("kind") == "transfer" and "error" not in r]
|
||||
errors = [r for r in rows if "error" in r]
|
||||
|
||||
@@ -47,13 +47,19 @@ def render(rows: List[Dict[str, Any]]) -> str:
|
||||
out.append("")
|
||||
|
||||
# ── direct improvement table ──────────────────────────────────────────
|
||||
out.append("## Direct improvement (optimize and deploy on the same model)")
|
||||
out.append("## Direct improvement (optimize, then deploy)")
|
||||
out.append("")
|
||||
out.append("| Backend:Model | Seed | Held-out before | Held-out after | Nights | Tokens |")
|
||||
out.append("| Optimizer → Target | Seed | Held-out before | Held-out after | Nights | Tokens |")
|
||||
out.append("|---|---|---|---|---|---|")
|
||||
for r in direct:
|
||||
c = r["cfg"]
|
||||
out.append(f"| {_fmt_model(c['backend'], c.get('model',''))} | {c['seed']} | "
|
||||
if c.get("kind") == "dual":
|
||||
label = (f"{_fmt_model(c['optimizer_backend'], c.get('optimizer_model',''))}"
|
||||
f" → {_fmt_model(c['target_backend'], c.get('target_model',''))}")
|
||||
else:
|
||||
m = _fmt_model(c["backend"], c.get("model", ""))
|
||||
label = f"{m} → {m}"
|
||||
out.append(f"| {label} | {c['seed']} | "
|
||||
f"{r['baseline']:.2f} | **{r['after']:.2f}** | {c['nights']} | "
|
||||
f"{r.get('tokens','?')} |")
|
||||
if direct:
|
||||
|
||||
@@ -20,7 +20,7 @@ import sys
|
||||
import time
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from skillopt.sleep.backend import get_backend
|
||||
from skillopt.sleep.backend import build_backend, get_backend
|
||||
from skillopt.sleep.experiments.gbrain_bench import find_data_root, load_seed
|
||||
from skillopt.sleep.experiments.run_gbrain import run_seed as bench_seed
|
||||
from skillopt.sleep.experiments.run_transfer import run_seed as transfer_seed
|
||||
@@ -31,6 +31,12 @@ def _direct_cfg(backend, model, seed, nights=2):
|
||||
return {"kind": "direct", "backend": backend, "model": model, "seed": seed, "nights": nights}
|
||||
|
||||
|
||||
def _dual_cfg(opt_backend, opt_model, tgt_backend, tgt_model, seed, nights=2):
|
||||
# a 'direct' run on a DualBackend: strong optimizer proposes, weak target runs
|
||||
return {"kind": "dual", "optimizer_backend": opt_backend, "optimizer_model": opt_model,
|
||||
"target_backend": tgt_backend, "target_model": tgt_model, "seed": seed, "nights": nights}
|
||||
|
||||
|
||||
def _transfer_cfg(sb, sm, tb, tm, seed, nights=2):
|
||||
return {"kind": "transfer", "source_backend": sb, "source_model": sm,
|
||||
"target_backend": tb, "target_model": tm, "seed": seed, "nights": nights}
|
||||
@@ -42,11 +48,12 @@ PLANS: Dict[str, List[Dict[str, Any]]] = {
|
||||
_direct_cfg("claude", "haiku", "brief-writer", 1),
|
||||
_direct_cfg("codex", "", "brief-writer", 2),
|
||||
],
|
||||
# direct results across seeds + models, both backends
|
||||
# SkillOpt-faithful: STRONG optimizer (sonnet) proposes, WEAK target (haiku)
|
||||
# runs — the reliable config. Plus Codex self-optimized.
|
||||
"direct": [
|
||||
_direct_cfg("claude", "haiku", "brief-writer"),
|
||||
_direct_cfg("claude", "haiku", "advisor"),
|
||||
_direct_cfg("claude", "sonnet", "brief-writer"),
|
||||
_dual_cfg("claude", "sonnet", "claude", "haiku", "brief-writer"),
|
||||
_dual_cfg("claude", "sonnet", "claude", "haiku", "advisor"),
|
||||
_dual_cfg("claude", "sonnet", "claude", "haiku", "thorough-analyst"),
|
||||
_direct_cfg("codex", "", "brief-writer"),
|
||||
_direct_cfg("codex", "", "advisor"),
|
||||
],
|
||||
@@ -90,8 +97,15 @@ def run_one(cfg: Dict[str, Any], data_root: str, codex_path: str,
|
||||
seed = cfg["seed"]
|
||||
skill, tasks = load_seed(data_root, seed)
|
||||
t0 = time.time()
|
||||
if cfg["kind"] == "direct":
|
||||
be = get_backend(cfg["backend"], model=cfg.get("model", ""), codex_path=codex_path)
|
||||
if cfg["kind"] in ("direct", "dual"):
|
||||
if cfg["kind"] == "dual":
|
||||
be = build_backend(
|
||||
optimizer_backend=cfg["optimizer_backend"], optimizer_model=cfg.get("optimizer_model", ""),
|
||||
target_backend=cfg["target_backend"], target_model=cfg.get("target_model", ""),
|
||||
codex_path=codex_path,
|
||||
)
|
||||
else:
|
||||
be = get_backend(cfg["backend"], model=cfg.get("model", ""), codex_path=codex_path)
|
||||
r = bench_seed(be, seed, skill, tasks, nights=cfg["nights"],
|
||||
limit_replay=limit_replay, limit_holdout=limit_holdout)
|
||||
out = {"baseline": r["held_out_before"], "after": r["held_out_after"],
|
||||
|
||||
Reference in New Issue
Block a user