feat(sleep): sweep 'direct' plan uses strong-optimizer/weak-target dual config

The default sweep direct plan now uses a DualBackend (Sonnet optimizer proposes
edits, Haiku target runs tasks) — the SkillOpt-faithful and more reliable setup,
since a weak self-optimizing model (Haiku-as-optimizer) produced flaky JSON.
report.py renders the optimizer->target pairing in the direct table.

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
This commit is contained in:
Yifan Yang
2026-06-08 14:31:51 +00:00
parent d75863eb6f
commit 023950a291
2 changed files with 31 additions and 11 deletions

View File

@@ -33,7 +33,7 @@ def _fmt_model(backend: str, model: str) -> str:
def render(rows: List[Dict[str, Any]]) -> str:
direct = [r for r in rows if r.get("cfg", {}).get("kind") == "direct" and "error" not in r]
direct = [r for r in rows if r.get("cfg", {}).get("kind") in ("direct", "dual") and "error" not in r]
transfer = [r for r in rows if r.get("cfg", {}).get("kind") == "transfer" and "error" not in r]
errors = [r for r in rows if "error" in r]
@@ -47,13 +47,19 @@ def render(rows: List[Dict[str, Any]]) -> str:
out.append("")
# ── direct improvement table ──────────────────────────────────────────
out.append("## Direct improvement (optimize and deploy on the same model)")
out.append("## Direct improvement (optimize, then deploy)")
out.append("")
out.append("| Backend:Model | Seed | Held-out before | Held-out after | Nights | Tokens |")
out.append("| Optimizer → Target | Seed | Held-out before | Held-out after | Nights | Tokens |")
out.append("|---|---|---|---|---|---|")
for r in direct:
c = r["cfg"]
out.append(f"| {_fmt_model(c['backend'], c.get('model',''))} | {c['seed']} | "
if c.get("kind") == "dual":
label = (f"{_fmt_model(c['optimizer_backend'], c.get('optimizer_model',''))}"
f"{_fmt_model(c['target_backend'], c.get('target_model',''))}")
else:
m = _fmt_model(c["backend"], c.get("model", ""))
label = f"{m}{m}"
out.append(f"| {label} | {c['seed']} | "
f"{r['baseline']:.2f} | **{r['after']:.2f}** | {c['nights']} | "
f"{r.get('tokens','?')} |")
if direct:

View File

@@ -20,7 +20,7 @@ import sys
import time
from typing import Any, Dict, List
from skillopt.sleep.backend import get_backend
from skillopt.sleep.backend import build_backend, get_backend
from skillopt.sleep.experiments.gbrain_bench import find_data_root, load_seed
from skillopt.sleep.experiments.run_gbrain import run_seed as bench_seed
from skillopt.sleep.experiments.run_transfer import run_seed as transfer_seed
@@ -31,6 +31,12 @@ def _direct_cfg(backend, model, seed, nights=2):
return {"kind": "direct", "backend": backend, "model": model, "seed": seed, "nights": nights}
def _dual_cfg(opt_backend, opt_model, tgt_backend, tgt_model, seed, nights=2):
# a 'direct' run on a DualBackend: strong optimizer proposes, weak target runs
return {"kind": "dual", "optimizer_backend": opt_backend, "optimizer_model": opt_model,
"target_backend": tgt_backend, "target_model": tgt_model, "seed": seed, "nights": nights}
def _transfer_cfg(sb, sm, tb, tm, seed, nights=2):
return {"kind": "transfer", "source_backend": sb, "source_model": sm,
"target_backend": tb, "target_model": tm, "seed": seed, "nights": nights}
@@ -42,11 +48,12 @@ PLANS: Dict[str, List[Dict[str, Any]]] = {
_direct_cfg("claude", "haiku", "brief-writer", 1),
_direct_cfg("codex", "", "brief-writer", 2),
],
# direct results across seeds + models, both backends
# SkillOpt-faithful: STRONG optimizer (sonnet) proposes, WEAK target (haiku)
# runs — the reliable config. Plus Codex self-optimized.
"direct": [
_direct_cfg("claude", "haiku", "brief-writer"),
_direct_cfg("claude", "haiku", "advisor"),
_direct_cfg("claude", "sonnet", "brief-writer"),
_dual_cfg("claude", "sonnet", "claude", "haiku", "brief-writer"),
_dual_cfg("claude", "sonnet", "claude", "haiku", "advisor"),
_dual_cfg("claude", "sonnet", "claude", "haiku", "thorough-analyst"),
_direct_cfg("codex", "", "brief-writer"),
_direct_cfg("codex", "", "advisor"),
],
@@ -90,8 +97,15 @@ def run_one(cfg: Dict[str, Any], data_root: str, codex_path: str,
seed = cfg["seed"]
skill, tasks = load_seed(data_root, seed)
t0 = time.time()
if cfg["kind"] == "direct":
be = get_backend(cfg["backend"], model=cfg.get("model", ""), codex_path=codex_path)
if cfg["kind"] in ("direct", "dual"):
if cfg["kind"] == "dual":
be = build_backend(
optimizer_backend=cfg["optimizer_backend"], optimizer_model=cfg.get("optimizer_model", ""),
target_backend=cfg["target_backend"], target_model=cfg.get("target_model", ""),
codex_path=codex_path,
)
else:
be = get_backend(cfg["backend"], model=cfg.get("model", ""), codex_path=codex_path)
r = bench_seed(be, seed, skill, tasks, nights=cfg["nights"],
limit_replay=limit_replay, limit_holdout=limit_holdout)
out = {"baseline": r["held_out_before"], "after": r["held_out_after"],