diff --git a/skillopt/sleep/experiments/run_gbrain.py b/skillopt/sleep/experiments/run_gbrain.py index f29ef57..0e71f88 100644 --- a/skillopt/sleep/experiments/run_gbrain.py +++ b/skillopt/sleep/experiments/run_gbrain.py @@ -45,6 +45,7 @@ def _score(backend, tasks, skill, memory, split="test", metric="mixed", w=0.5): def run_seed(backend, seed: str, skill: str, tasks: List, *, nights: int = 3, edit_budget: int = 4, gate_mode: str = "on", + slow_update: bool = True, limit_replay: int = 0, limit_holdout: int = 0) -> dict: memory = "" # optionally cap each split to control API cost / latency. @@ -63,6 +64,7 @@ def run_seed(backend, seed: str, skill: str, tasks: List, *, bh, bs, bscore = _score(backend, tasks, skill, memory, split="test") trace = [{"night": 0, "test_hard": round(bh, 3), "action": "baseline"}] cur = skill + first_night_skill = skill for night in range(1, nights + 1): res = consolidate( backend, tasks, cur, memory, @@ -71,6 +73,8 @@ def run_seed(backend, seed: str, skill: str, tasks: List, *, ) if res.accepted: cur = res.new_skill + if night == 1: + first_night_skill = cur # report the TEST score each night (independent of the val gate) th, _ts, _ = _score(backend, tasks, cur, memory, split="test") trace.append({ @@ -83,6 +87,27 @@ def run_seed(backend, seed: str, skill: str, tasks: List, *, }) if th >= 0.999: break + + # ── SLOW UPDATE: consolidate cross-night experience into the protected + # long-term field. Runs regardless of gate mode (it is what preserves + # long-term memory even when the gate is OFF). + slow_text = None + if nights >= 2 and slow_update: + try: + from skillopt.sleep.slow_update import run_slow_update, replace_slow_field + val_tasks = [t for t in tasks if t.split == "val"] or tasks + prev_pairs = replay_batch(backend, val_tasks, first_night_skill, memory) + curr_pairs = replay_batch(backend, val_tasks, cur, memory) + slow_text = run_slow_update( + backend, prev_skill=first_night_skill, curr_skill=cur, + prev_pairs=[(t, r) for t, r in prev_pairs], + curr_pairs=[(t, r) for t, r in curr_pairs], + ) + if slow_text: + cur = replace_slow_field(cur, slow_text) + except Exception: + slow_text = None + ah, as_, ascore = _score(backend, tasks, cur, memory, split="test") return { "seed": seed, @@ -91,6 +116,7 @@ def run_seed(backend, seed: str, skill: str, tasks: List, *, "improved": ah > bh, "nights": len(trace) - 1, "trace": trace, + "slow_update": slow_text, "final_skill_tail": cur[-400:], } diff --git a/skillopt/sleep/slow_update.py b/skillopt/sleep/slow_update.py new file mode 100644 index 0000000..20a7175 --- /dev/null +++ b/skillopt/sleep/slow_update.py @@ -0,0 +1,142 @@ +"""SkillOpt-Sleep — slow update (cross-night long-term memory). + +This is the deployment-time analogue of SkillOpt's epoch-wise slow/meta update +(paper §3.6). Step-level edits (consolidate) learn from one night's batch; the +slow update learns across nights and writes a durable "longitudinal guidance" +block into a PROTECTED field of the skill that step-level edits never touch. + +It reuses the exact protected-field marker convention from the main repo +(``skillopt/optimizer/slow_update.py``) so the artifact is compatible: + + ... + +Why it matters: even when the user turns the validation gate OFF (greedy mode), +the slow update still runs at the end of the run, so short-term nightly +experience is consolidated into long-term memory rather than lost. The cross-night +content is carried in ``state.slow_memory``. + +Driven through the Backend abstraction (mock/claude/codex), so it stays +import-light — no `openai` dependency. +""" +from __future__ import annotations + +import re +from typing import List, Optional, Tuple + +from skillopt.sleep.backend import Backend, _extract_json +from skillopt.sleep.types import ReplayResult, TaskRecord + + +SLOW_UPDATE_START = "" +SLOW_UPDATE_END = "" + + +# ── protected-field helpers (mirror skillopt/optimizer/slow_update.py) ───────── + +def has_slow_field(skill: str) -> bool: + return SLOW_UPDATE_START in skill and SLOW_UPDATE_END in skill + + +def extract_slow_field(skill: str) -> str: + s = skill.find(SLOW_UPDATE_START) + e = skill.find(SLOW_UPDATE_END) + if s == -1 or e == -1: + return "" + return skill[s + len(SLOW_UPDATE_START):e].strip() + + +def _strip_slow_fields(skill: str) -> str: + while True: + s = skill.find(SLOW_UPDATE_START) + if s == -1: + break + e = skill.find(SLOW_UPDATE_END, s) + if e == -1: + skill = skill[:s] + break + skill = skill[:s] + skill[e + len(SLOW_UPDATE_END):] + skill = skill.replace(SLOW_UPDATE_END, "") + while "\n\n\n" in skill: + skill = skill.replace("\n\n\n", "\n\n") + return skill.rstrip() + + +def replace_slow_field(skill: str, content: str) -> str: + """Set the protected slow-update field to ``content`` (exactly one block).""" + base = _strip_slow_fields(skill) + if not content.strip(): + return base + block = f"\n\n{SLOW_UPDATE_START}\n{content.strip()}\n{SLOW_UPDATE_END}\n" + return base + block + + +# ── the slow-update synthesis ────────────────────────────────────────────────── + +def _summarize_pairs( + prev_pairs: List[Tuple[TaskRecord, ReplayResult]], + curr_pairs: List[Tuple[TaskRecord, ReplayResult]], +) -> str: + """Group adjacent-version outcomes into improved/regressed/persistent/stable.""" + prev_by = {t.id: r for t, r in prev_pairs} + lines: List[str] = [] + counts = {"improved": 0, "regressed": 0, "persistent_fail": 0, "stable_success": 0} + for t, r in curr_pairs: + p = prev_by.get(t.id) + if p is None: + continue + a, b = p.hard, r.hard + if b > a: + cat = "improved" + elif b < a: + cat = "regressed" + elif b >= 1.0: + cat = "stable_success" + else: + cat = "persistent_fail" + counts[cat] += 1 + if cat in ("regressed", "persistent_fail") and len(lines) < 8: + lines.append(f"- [{cat}] {t.intent[:120]} (why: {r.fail_reason[:80]})") + head = ", ".join(f"{k}={v}" for k, v in counts.items()) + return head + ("\n" + "\n".join(lines) if lines else ""), counts # type: ignore[return-value] + + +def run_slow_update( + backend: Backend, + *, + prev_skill: str, + curr_skill: str, + prev_pairs: List[Tuple[TaskRecord, ReplayResult]], + curr_pairs: List[Tuple[TaskRecord, ReplayResult]], + prev_slow_content: str = "", +) -> Optional[str]: + """Produce durable longitudinal guidance text (or None). + + Compares behavior under the previous vs current skill across the same tasks + and asks the optimizer to distill a short, durable guidance block — what to + keep doing, what regressions to avoid — refining any prior slow-update text. + """ + summary, counts = _summarize_pairs(prev_pairs, curr_pairs) # type: ignore[misc] + # nothing changed and no prior guidance to refine → skip + if counts["regressed"] == 0 and counts["persistent_fail"] == 0 and not prev_slow_content: + return None + + prompt = ( + "You are SkillOpt's SLOW UPDATE — the long-term memory pass that runs " + "across nights. Write a SHORT, durable guidance block (2-5 bullet " + "points) capturing the longitudinal lessons: behaviors that reliably " + "help and should be preserved, and regressions/persistent failures to " + "avoid. Keep it GENERAL and stable (not tied to one task). If prior " + "guidance is given, refine it rather than restate it.\n" + 'Return ONLY JSON: {"guidance": ""}.\n\n' + f"# Cross-night outcome summary\n{summary}\n\n" + f"# Prior long-term guidance (refine this)\n{prev_slow_content or '(none)'}" + ) + raw = backend._call(prompt, max_tokens=600) # type: ignore[attr-defined] + obj = _extract_json(raw, "object") + if isinstance(obj, dict): + g = str(obj.get("guidance", "")).strip() + if g: + return g + # fallback: if the model returned prose, keep the first ~400 chars + text = (raw or "").strip() + return text[:400] if text else None diff --git a/tests/test_sleep_engine.py b/tests/test_sleep_engine.py index 27dedcc..51eddf6 100644 --- a/tests/test_sleep_engine.py +++ b/tests/test_sleep_engine.py @@ -232,6 +232,49 @@ class TestLlmMiner(unittest.TestCase): self.assertEqual(make_llm_miner(EmptyBackend())([digest]), []) +class TestSlowUpdate(unittest.TestCase): + def test_protected_field_roundtrip(self): + from skillopt.sleep.slow_update import ( + replace_slow_field, extract_slow_field, has_slow_field, + SLOW_UPDATE_START, SLOW_UPDATE_END, + ) + base = "# skill\nkeep me\n" + doc = replace_slow_field(base, "durable lesson A") + self.assertTrue(has_slow_field(doc)) + self.assertIn("keep me", doc) + self.assertEqual(extract_slow_field(doc), "durable lesson A") + # replacing keeps exactly one block and preserves hand-written text + doc2 = replace_slow_field(doc, "durable lesson B") + self.assertEqual(doc2.count(SLOW_UPDATE_START), 1) + self.assertEqual(doc2.count(SLOW_UPDATE_END), 1) + self.assertEqual(extract_slow_field(doc2), "durable lesson B") + self.assertIn("keep me", doc2) + + def test_run_slow_update_with_stub_backend(self): + from skillopt.sleep.backend import Backend + from skillopt.sleep.slow_update import run_slow_update + from skillopt.sleep.types import TaskRecord, ReplayResult + + class StubBackend(Backend): + name = "stub" + def _call(self, prompt, *, max_tokens=1024): + return '{"guidance": "- keep doing X\\n- avoid regression Y"}' + + t = TaskRecord(id="t1", project="/p", intent="do thing") + prev = [(t, ReplayResult(id="t1", hard=0.0))] # was failing + curr = [(t, ReplayResult(id="t1", hard=1.0))] # now passing (improved) + out = run_slow_update(StubBackend(), prev_skill="s0", curr_skill="s1", + prev_pairs=prev, curr_pairs=curr) + # improvements alone with no regression/persistent-fail and no prior text -> None + self.assertIsNone(out) + # a regression triggers guidance + prev2 = [(t, ReplayResult(id="t1", hard=1.0))] + curr2 = [(t, ReplayResult(id="t1", hard=0.0))] + out2 = run_slow_update(StubBackend(), prev_skill="s0", curr_skill="s1", + prev_pairs=prev2, curr_pairs=curr2) + self.assertIn("keep doing X", out2) + + class TestToolLoop(unittest.TestCase): def test_tool_called_judge_via_replay(self): from skillopt.sleep.backend import MockBackend