feat(sleep): slow-update long-term memory field (runs even with gate off)

Bring SkillOpt's epoch-wise slow/meta update (paper §3.6) into the sleep engine as skillopt/sleep/slow_update.py — import-light, driven through the Backend abstraction (mock/claude/codex): - Reuses the main repo's protected-field markers  ...  so the artifact is compatible; step-level edits never touch this field. - run_slow_update compares behavior under the first-night vs final skill across the val tasks, groups into improved/regressed/persistent/stable, and asks the optimizer to distill durable longitudinal guidance (refining prior text). - Wired into run_gbrain.run_seed AFTER the nights loop, gated by slow_update=True and run REGARDLESS of gate_mode — this is what preserves long-term memory even when the user turns the hard gate OFF (the user's slot_date=slow-update intent). 2 new tests (protected-field round-trip, stub-backend synthesis). 23 tests pass. Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
2026-07-03 14:02:58 +08:00 · 2026-06-08 14:31:51 +00:00
parent 6f1351edb9
commit c179a24c45
3 changed files with 211 additions and 0 deletions
--- a/skillopt/sleep/experiments/run_gbrain.py
+++ b/skillopt/sleep/experiments/run_gbrain.py
@@ -45,6 +45,7 @@ def _score(backend, tasks, skill, memory, split="test", metric="mixed", w=0.5):

 def run_seed(backend, seed: str, skill: str, tasks: List, *,
             nights: int = 3, edit_budget: int = 4, gate_mode: str = "on",
+             slow_update: bool = True,
             limit_replay: int = 0, limit_holdout: int = 0) -> dict:
    memory = ""
    # optionally cap each split to control API cost / latency.
@@ -63,6 +64,7 @@ def run_seed(backend, seed: str, skill: str, tasks: List, *,
    bh, bs, bscore = _score(backend, tasks, skill, memory, split="test")
    trace = [{"night": 0, "test_hard": round(bh, 3), "action": "baseline"}]
    cur = skill
+    first_night_skill = skill
    for night in range(1, nights + 1):
        res = consolidate(
            backend, tasks, cur, memory,
@@ -71,6 +73,8 @@ def run_seed(backend, seed: str, skill: str, tasks: List, *,
        )
        if res.accepted:
            cur = res.new_skill
+        if night == 1:
+            first_night_skill = cur
        # report the TEST score each night (independent of the val gate)
        th, _ts, _ = _score(backend, tasks, cur, memory, split="test")
        trace.append({
@@ -83,6 +87,27 @@ def run_seed(backend, seed: str, skill: str, tasks: List, *,
        })
        if th >= 0.999:
            break
+
+    # ── SLOW UPDATE: consolidate cross-night experience into the protected
+    # long-term field. Runs regardless of gate mode (it is what preserves
+    # long-term memory even when the gate is OFF).
+    slow_text = None
+    if nights >= 2 and slow_update:
+        try:
+            from skillopt.sleep.slow_update import run_slow_update, replace_slow_field
+            val_tasks = [t for t in tasks if t.split == "val"] or tasks
+            prev_pairs = replay_batch(backend, val_tasks, first_night_skill, memory)
+            curr_pairs = replay_batch(backend, val_tasks, cur, memory)
+            slow_text = run_slow_update(
+                backend, prev_skill=first_night_skill, curr_skill=cur,
+                prev_pairs=[(t, r) for t, r in prev_pairs],
+                curr_pairs=[(t, r) for t, r in curr_pairs],
+            )
+            if slow_text:
+                cur = replace_slow_field(cur, slow_text)
+        except Exception:
+            slow_text = None
+
    ah, as_, ascore = _score(backend, tasks, cur, memory, split="test")
    return {
        "seed": seed,
@@ -91,6 +116,7 @@ def run_seed(backend, seed: str, skill: str, tasks: List, *,
        "improved": ah > bh,
        "nights": len(trace) - 1,
        "trace": trace,
+        "slow_update": slow_text,
        "final_skill_tail": cur[-400:],
    }

--- a/skillopt/sleep/slow_update.py
+++ b/skillopt/sleep/slow_update.py
@@ -0,0 +1,142 @@
+"""SkillOpt-Sleep — slow update (cross-night long-term memory).
+
+This is the deployment-time analogue of SkillOpt's epoch-wise slow/meta update
+(paper §3.6). Step-level edits (consolidate) learn from one night's batch; the
+slow update learns across nights and writes a durable "longitudinal guidance"
+block into a PROTECTED field of the skill that step-level edits never touch.
+
+It reuses the exact protected-field marker convention from the main repo
+(``skillopt/optimizer/slow_update.py``) so the artifact is compatible:
+
+    <!-- SLOW_UPDATE_START --> ... <!-- SLOW_UPDATE_END -->
+
+Why it matters: even when the user turns the validation gate OFF (greedy mode),
+the slow update still runs at the end of the run, so short-term nightly
+experience is consolidated into long-term memory rather than lost. The cross-night
+content is carried in ``state.slow_memory``.
+
+Driven through the Backend abstraction (mock/claude/codex), so it stays
+import-light — no `openai` dependency.
+"""
+from __future__ import annotations
+
+import re
+from typing import List, Optional, Tuple
+
+from skillopt.sleep.backend import Backend, _extract_json
+from skillopt.sleep.types import ReplayResult, TaskRecord
+
+
+SLOW_UPDATE_START = "<!-- SLOW_UPDATE_START -->"
+SLOW_UPDATE_END = "<!-- SLOW_UPDATE_END -->"
+
+
+# ── protected-field helpers (mirror skillopt/optimizer/slow_update.py) ─────────
+
+def has_slow_field(skill: str) -> bool:
+    return SLOW_UPDATE_START in skill and SLOW_UPDATE_END in skill
+
+
+def extract_slow_field(skill: str) -> str:
+    s = skill.find(SLOW_UPDATE_START)
+    e = skill.find(SLOW_UPDATE_END)
+    if s == -1 or e == -1:
+        return ""
+    return skill[s + len(SLOW_UPDATE_START):e].strip()
+
+
+def _strip_slow_fields(skill: str) -> str:
+    while True:
+        s = skill.find(SLOW_UPDATE_START)
+        if s == -1:
+            break
+        e = skill.find(SLOW_UPDATE_END, s)
+        if e == -1:
+            skill = skill[:s]
+            break
+        skill = skill[:s] + skill[e + len(SLOW_UPDATE_END):]
+    skill = skill.replace(SLOW_UPDATE_END, "")
+    while "\n\n\n" in skill:
+        skill = skill.replace("\n\n\n", "\n\n")
+    return skill.rstrip()
+
+
+def replace_slow_field(skill: str, content: str) -> str:
+    """Set the protected slow-update field to ``content`` (exactly one block)."""
+    base = _strip_slow_fields(skill)
+    if not content.strip():
+        return base
+    block = f"\n\n{SLOW_UPDATE_START}\n{content.strip()}\n{SLOW_UPDATE_END}\n"
+    return base + block
+
+
+# ── the slow-update synthesis ──────────────────────────────────────────────────
+
+def _summarize_pairs(
+    prev_pairs: List[Tuple[TaskRecord, ReplayResult]],
+    curr_pairs: List[Tuple[TaskRecord, ReplayResult]],
+) -> str:
+    """Group adjacent-version outcomes into improved/regressed/persistent/stable."""
+    prev_by = {t.id: r for t, r in prev_pairs}
+    lines: List[str] = []
+    counts = {"improved": 0, "regressed": 0, "persistent_fail": 0, "stable_success": 0}
+    for t, r in curr_pairs:
+        p = prev_by.get(t.id)
+        if p is None:
+            continue
+        a, b = p.hard, r.hard
+        if b > a:
+            cat = "improved"
+        elif b < a:
+            cat = "regressed"
+        elif b >= 1.0:
+            cat = "stable_success"
+        else:
+            cat = "persistent_fail"
+        counts[cat] += 1
+        if cat in ("regressed", "persistent_fail") and len(lines) < 8:
+            lines.append(f"- [{cat}] {t.intent[:120]} (why: {r.fail_reason[:80]})")
+    head = ", ".join(f"{k}={v}" for k, v in counts.items())
+    return head + ("\n" + "\n".join(lines) if lines else ""), counts  # type: ignore[return-value]
+
+
+def run_slow_update(
+    backend: Backend,
+    *,
+    prev_skill: str,
+    curr_skill: str,
+    prev_pairs: List[Tuple[TaskRecord, ReplayResult]],
+    curr_pairs: List[Tuple[TaskRecord, ReplayResult]],
+    prev_slow_content: str = "",
+) -> Optional[str]:
+    """Produce durable longitudinal guidance text (or None).
+
+    Compares behavior under the previous vs current skill across the same tasks
+    and asks the optimizer to distill a short, durable guidance block — what to
+    keep doing, what regressions to avoid — refining any prior slow-update text.
+    """
+    summary, counts = _summarize_pairs(prev_pairs, curr_pairs)  # type: ignore[misc]
+    # nothing changed and no prior guidance to refine → skip
+    if counts["regressed"] == 0 and counts["persistent_fail"] == 0 and not prev_slow_content:
+        return None
+
+    prompt = (
+        "You are SkillOpt's SLOW UPDATE — the long-term memory pass that runs "
+        "across nights. Write a SHORT, durable guidance block (2-5 bullet "
+        "points) capturing the longitudinal lessons: behaviors that reliably "
+        "help and should be preserved, and regressions/persistent failures to "
+        "avoid. Keep it GENERAL and stable (not tied to one task). If prior "
+        "guidance is given, refine it rather than restate it.\n"
+        'Return ONLY JSON: {"guidance": "<bullet list as one string>"}.\n\n'
+        f"# Cross-night outcome summary\n{summary}\n\n"
+        f"# Prior long-term guidance (refine this)\n{prev_slow_content or '(none)'}"
+    )
+    raw = backend._call(prompt, max_tokens=600)  # type: ignore[attr-defined]
+    obj = _extract_json(raw, "object")
+    if isinstance(obj, dict):
+        g = str(obj.get("guidance", "")).strip()
+        if g:
+            return g
+    # fallback: if the model returned prose, keep the first ~400 chars
+    text = (raw or "").strip()
+    return text[:400] if text else None
--- a/tests/test_sleep_engine.py
+++ b/tests/test_sleep_engine.py
@@ -232,6 +232,49 @@ class TestLlmMiner(unittest.TestCase):
        self.assertEqual(make_llm_miner(EmptyBackend())([digest]), [])


+class TestSlowUpdate(unittest.TestCase):
+    def test_protected_field_roundtrip(self):
+        from skillopt.sleep.slow_update import (
+            replace_slow_field, extract_slow_field, has_slow_field,
+            SLOW_UPDATE_START, SLOW_UPDATE_END,
+        )
+        base = "# skill\nkeep me\n"
+        doc = replace_slow_field(base, "durable lesson A")
+        self.assertTrue(has_slow_field(doc))
+        self.assertIn("keep me", doc)
+        self.assertEqual(extract_slow_field(doc), "durable lesson A")
+        # replacing keeps exactly one block and preserves hand-written text
+        doc2 = replace_slow_field(doc, "durable lesson B")
+        self.assertEqual(doc2.count(SLOW_UPDATE_START), 1)
+        self.assertEqual(doc2.count(SLOW_UPDATE_END), 1)
+        self.assertEqual(extract_slow_field(doc2), "durable lesson B")
+        self.assertIn("keep me", doc2)
+
+    def test_run_slow_update_with_stub_backend(self):
+        from skillopt.sleep.backend import Backend
+        from skillopt.sleep.slow_update import run_slow_update
+        from skillopt.sleep.types import TaskRecord, ReplayResult
+
+        class StubBackend(Backend):
+            name = "stub"
+            def _call(self, prompt, *, max_tokens=1024):
+                return '{"guidance": "- keep doing X\\n- avoid regression Y"}'
+
+        t = TaskRecord(id="t1", project="/p", intent="do thing")
+        prev = [(t, ReplayResult(id="t1", hard=0.0))]  # was failing
+        curr = [(t, ReplayResult(id="t1", hard=1.0))]  # now passing (improved)
+        out = run_slow_update(StubBackend(), prev_skill="s0", curr_skill="s1",
+                              prev_pairs=prev, curr_pairs=curr)
+        # improvements alone with no regression/persistent-fail and no prior text -> None
+        self.assertIsNone(out)
+        # a regression triggers guidance
+        prev2 = [(t, ReplayResult(id="t1", hard=1.0))]
+        curr2 = [(t, ReplayResult(id="t1", hard=0.0))]
+        out2 = run_slow_update(StubBackend(), prev_skill="s0", curr_skill="s1",
+                               prev_pairs=prev2, curr_pairs=curr2)
+        self.assertIn("keep doing X", out2)
+
+
 class TestToolLoop(unittest.TestCase):
    def test_tool_called_judge_via_replay(self):
        from skillopt.sleep.backend import MockBackend