feat(sleep): slow-update long-term memory field (runs even with gate off)

Bring SkillOpt's epoch-wise slow/meta update (paper §3.6) into the sleep engine
as skillopt/sleep/slow_update.py — import-light, driven through the Backend
abstraction (mock/claude/codex):

  - Reuses the main repo's protected-field markers
    <!-- SLOW_UPDATE_START --> ... <!-- SLOW_UPDATE_END --> so the artifact is
    compatible; step-level edits never touch this field.
  - run_slow_update compares behavior under the first-night vs final skill across
    the val tasks, groups into improved/regressed/persistent/stable, and asks the
    optimizer to distill durable longitudinal guidance (refining prior text).
  - Wired into run_gbrain.run_seed AFTER the nights loop, gated by slow_update=True
    and run REGARDLESS of gate_mode — this is what preserves long-term memory even
    when the user turns the hard gate OFF (the user's slot_date=slow-update intent).

2 new tests (protected-field round-trip, stub-backend synthesis). 23 tests pass.

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
This commit is contained in:
Yifan Yang
2026-06-08 14:31:51 +00:00
parent 6f1351edb9
commit c179a24c45
3 changed files with 211 additions and 0 deletions

View File

@@ -45,6 +45,7 @@ def _score(backend, tasks, skill, memory, split="test", metric="mixed", w=0.5):
def run_seed(backend, seed: str, skill: str, tasks: List, *,
nights: int = 3, edit_budget: int = 4, gate_mode: str = "on",
slow_update: bool = True,
limit_replay: int = 0, limit_holdout: int = 0) -> dict:
memory = ""
# optionally cap each split to control API cost / latency.
@@ -63,6 +64,7 @@ def run_seed(backend, seed: str, skill: str, tasks: List, *,
bh, bs, bscore = _score(backend, tasks, skill, memory, split="test")
trace = [{"night": 0, "test_hard": round(bh, 3), "action": "baseline"}]
cur = skill
first_night_skill = skill
for night in range(1, nights + 1):
res = consolidate(
backend, tasks, cur, memory,
@@ -71,6 +73,8 @@ def run_seed(backend, seed: str, skill: str, tasks: List, *,
)
if res.accepted:
cur = res.new_skill
if night == 1:
first_night_skill = cur
# report the TEST score each night (independent of the val gate)
th, _ts, _ = _score(backend, tasks, cur, memory, split="test")
trace.append({
@@ -83,6 +87,27 @@ def run_seed(backend, seed: str, skill: str, tasks: List, *,
})
if th >= 0.999:
break
# ── SLOW UPDATE: consolidate cross-night experience into the protected
# long-term field. Runs regardless of gate mode (it is what preserves
# long-term memory even when the gate is OFF).
slow_text = None
if nights >= 2 and slow_update:
try:
from skillopt.sleep.slow_update import run_slow_update, replace_slow_field
val_tasks = [t for t in tasks if t.split == "val"] or tasks
prev_pairs = replay_batch(backend, val_tasks, first_night_skill, memory)
curr_pairs = replay_batch(backend, val_tasks, cur, memory)
slow_text = run_slow_update(
backend, prev_skill=first_night_skill, curr_skill=cur,
prev_pairs=[(t, r) for t, r in prev_pairs],
curr_pairs=[(t, r) for t, r in curr_pairs],
)
if slow_text:
cur = replace_slow_field(cur, slow_text)
except Exception:
slow_text = None
ah, as_, ascore = _score(backend, tasks, cur, memory, split="test")
return {
"seed": seed,
@@ -91,6 +116,7 @@ def run_seed(backend, seed: str, skill: str, tasks: List, *,
"improved": ah > bh,
"nights": len(trace) - 1,
"trace": trace,
"slow_update": slow_text,
"final_skill_tail": cur[-400:],
}

View File

@@ -0,0 +1,142 @@
"""SkillOpt-Sleep — slow update (cross-night long-term memory).
This is the deployment-time analogue of SkillOpt's epoch-wise slow/meta update
(paper §3.6). Step-level edits (consolidate) learn from one night's batch; the
slow update learns across nights and writes a durable "longitudinal guidance"
block into a PROTECTED field of the skill that step-level edits never touch.
It reuses the exact protected-field marker convention from the main repo
(``skillopt/optimizer/slow_update.py``) so the artifact is compatible:
<!-- SLOW_UPDATE_START --> ... <!-- SLOW_UPDATE_END -->
Why it matters: even when the user turns the validation gate OFF (greedy mode),
the slow update still runs at the end of the run, so short-term nightly
experience is consolidated into long-term memory rather than lost. The cross-night
content is carried in ``state.slow_memory``.
Driven through the Backend abstraction (mock/claude/codex), so it stays
import-light — no `openai` dependency.
"""
from __future__ import annotations
import re
from typing import List, Optional, Tuple
from skillopt.sleep.backend import Backend, _extract_json
from skillopt.sleep.types import ReplayResult, TaskRecord
SLOW_UPDATE_START = "<!-- SLOW_UPDATE_START -->"
SLOW_UPDATE_END = "<!-- SLOW_UPDATE_END -->"
# ── protected-field helpers (mirror skillopt/optimizer/slow_update.py) ─────────
def has_slow_field(skill: str) -> bool:
return SLOW_UPDATE_START in skill and SLOW_UPDATE_END in skill
def extract_slow_field(skill: str) -> str:
s = skill.find(SLOW_UPDATE_START)
e = skill.find(SLOW_UPDATE_END)
if s == -1 or e == -1:
return ""
return skill[s + len(SLOW_UPDATE_START):e].strip()
def _strip_slow_fields(skill: str) -> str:
while True:
s = skill.find(SLOW_UPDATE_START)
if s == -1:
break
e = skill.find(SLOW_UPDATE_END, s)
if e == -1:
skill = skill[:s]
break
skill = skill[:s] + skill[e + len(SLOW_UPDATE_END):]
skill = skill.replace(SLOW_UPDATE_END, "")
while "\n\n\n" in skill:
skill = skill.replace("\n\n\n", "\n\n")
return skill.rstrip()
def replace_slow_field(skill: str, content: str) -> str:
"""Set the protected slow-update field to ``content`` (exactly one block)."""
base = _strip_slow_fields(skill)
if not content.strip():
return base
block = f"\n\n{SLOW_UPDATE_START}\n{content.strip()}\n{SLOW_UPDATE_END}\n"
return base + block
# ── the slow-update synthesis ──────────────────────────────────────────────────
def _summarize_pairs(
prev_pairs: List[Tuple[TaskRecord, ReplayResult]],
curr_pairs: List[Tuple[TaskRecord, ReplayResult]],
) -> str:
"""Group adjacent-version outcomes into improved/regressed/persistent/stable."""
prev_by = {t.id: r for t, r in prev_pairs}
lines: List[str] = []
counts = {"improved": 0, "regressed": 0, "persistent_fail": 0, "stable_success": 0}
for t, r in curr_pairs:
p = prev_by.get(t.id)
if p is None:
continue
a, b = p.hard, r.hard
if b > a:
cat = "improved"
elif b < a:
cat = "regressed"
elif b >= 1.0:
cat = "stable_success"
else:
cat = "persistent_fail"
counts[cat] += 1
if cat in ("regressed", "persistent_fail") and len(lines) < 8:
lines.append(f"- [{cat}] {t.intent[:120]} (why: {r.fail_reason[:80]})")
head = ", ".join(f"{k}={v}" for k, v in counts.items())
return head + ("\n" + "\n".join(lines) if lines else ""), counts # type: ignore[return-value]
def run_slow_update(
backend: Backend,
*,
prev_skill: str,
curr_skill: str,
prev_pairs: List[Tuple[TaskRecord, ReplayResult]],
curr_pairs: List[Tuple[TaskRecord, ReplayResult]],
prev_slow_content: str = "",
) -> Optional[str]:
"""Produce durable longitudinal guidance text (or None).
Compares behavior under the previous vs current skill across the same tasks
and asks the optimizer to distill a short, durable guidance block — what to
keep doing, what regressions to avoid — refining any prior slow-update text.
"""
summary, counts = _summarize_pairs(prev_pairs, curr_pairs) # type: ignore[misc]
# nothing changed and no prior guidance to refine → skip
if counts["regressed"] == 0 and counts["persistent_fail"] == 0 and not prev_slow_content:
return None
prompt = (
"You are SkillOpt's SLOW UPDATE — the long-term memory pass that runs "
"across nights. Write a SHORT, durable guidance block (2-5 bullet "
"points) capturing the longitudinal lessons: behaviors that reliably "
"help and should be preserved, and regressions/persistent failures to "
"avoid. Keep it GENERAL and stable (not tied to one task). If prior "
"guidance is given, refine it rather than restate it.\n"
'Return ONLY JSON: {"guidance": "<bullet list as one string>"}.\n\n'
f"# Cross-night outcome summary\n{summary}\n\n"
f"# Prior long-term guidance (refine this)\n{prev_slow_content or '(none)'}"
)
raw = backend._call(prompt, max_tokens=600) # type: ignore[attr-defined]
obj = _extract_json(raw, "object")
if isinstance(obj, dict):
g = str(obj.get("guidance", "")).strip()
if g:
return g
# fallback: if the model returned prose, keep the first ~400 chars
text = (raw or "").strip()
return text[:400] if text else None

View File

@@ -232,6 +232,49 @@ class TestLlmMiner(unittest.TestCase):
self.assertEqual(make_llm_miner(EmptyBackend())([digest]), [])
class TestSlowUpdate(unittest.TestCase):
def test_protected_field_roundtrip(self):
from skillopt.sleep.slow_update import (
replace_slow_field, extract_slow_field, has_slow_field,
SLOW_UPDATE_START, SLOW_UPDATE_END,
)
base = "# skill\nkeep me\n"
doc = replace_slow_field(base, "durable lesson A")
self.assertTrue(has_slow_field(doc))
self.assertIn("keep me", doc)
self.assertEqual(extract_slow_field(doc), "durable lesson A")
# replacing keeps exactly one block and preserves hand-written text
doc2 = replace_slow_field(doc, "durable lesson B")
self.assertEqual(doc2.count(SLOW_UPDATE_START), 1)
self.assertEqual(doc2.count(SLOW_UPDATE_END), 1)
self.assertEqual(extract_slow_field(doc2), "durable lesson B")
self.assertIn("keep me", doc2)
def test_run_slow_update_with_stub_backend(self):
from skillopt.sleep.backend import Backend
from skillopt.sleep.slow_update import run_slow_update
from skillopt.sleep.types import TaskRecord, ReplayResult
class StubBackend(Backend):
name = "stub"
def _call(self, prompt, *, max_tokens=1024):
return '{"guidance": "- keep doing X\\n- avoid regression Y"}'
t = TaskRecord(id="t1", project="/p", intent="do thing")
prev = [(t, ReplayResult(id="t1", hard=0.0))] # was failing
curr = [(t, ReplayResult(id="t1", hard=1.0))] # now passing (improved)
out = run_slow_update(StubBackend(), prev_skill="s0", curr_skill="s1",
prev_pairs=prev, curr_pairs=curr)
# improvements alone with no regression/persistent-fail and no prior text -> None
self.assertIsNone(out)
# a regression triggers guidance
prev2 = [(t, ReplayResult(id="t1", hard=1.0))]
curr2 = [(t, ReplayResult(id="t1", hard=0.0))]
out2 = run_slow_update(StubBackend(), prev_skill="s0", curr_skill="s1",
prev_pairs=prev2, curr_pairs=curr2)
self.assertIn("keep doing X", out2)
class TestToolLoop(unittest.TestCase):
def test_tool_called_judge_via_replay(self):
from skillopt.sleep.backend import MockBackend