Files
Yifan Yang 722ce646d4 feat(sleep): experience replay + dream rollouts in the cycle (opt-in)
Wires two consolidation mechanisms into the shipped nightly cycle, both default
OFF so existing behavior is unchanged:
  - dream_rollouts (>1): multi-rollout contrastive reflection per task
  - recall_k (>0): associative recall of the K most-similar past tasks (from a
    capped task_archive persisted in state.json) into tonight's dream
  - dream_factor (>0): synthetic task variants

New shared engine module skillopt_sleep/dream.py (recall_similar, dream_augment,
dream_consolidate) is called by both the plugin cycle and the experiment harness,
so reported numbers exercise the exact shipped code. Built on the existing
rollouts_k/sample_id support already in consolidate.py/rollout.py.

Validated (5 nights x 10 real tasks/night, full held-out test, GPT-5.5, gated):
the gain scales with recall depth on a clean signal —
SearchQA recall_k=10 +3.1, recall_k=20 +4.5, full-history reference +5.6;
SpreadsheetBench (nano, gate-free) +3.6. Flat within noise on saturated/noisy
cells. See docs/sleep/EXPERIENCE_REPLAY.md (+ raw runs under blog_runs/v2_port/).

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
2026-06-15 15:58:27 +00:00

97 lines
3.6 KiB
Python

"""SkillOpt-Sleep — persistent cross-night state.
state.json lives in ~/.skillopt-sleep and is the "long-term" store that
turns nightly episodes into durable competence (the Agent-Sleep paper's
short-term -> long-term transfer). It records:
- night counter
- last harvest timestamp per project (so each night only sees new data)
- cross-night "slow/meta" memory (lessons that persisted across nights)
- per-night history (scores, accept/reject) for trend reporting
"""
from __future__ import annotations
import json
import os
from typing import Any, Dict, List, Optional
def _now_iso(clock: Optional[float] = None) -> str:
# caller passes a timestamp; we avoid importing time at module import
import time as _t
return _t.strftime("%Y-%m-%dT%H:%M:%S", _t.localtime(clock if clock is not None else _t.time()))
DEFAULT_STATE: Dict[str, Any] = {
"version": 1,
"night": 0,
"last_harvest": {}, # project -> iso timestamp of last harvested record
"slow_memory": "", # cross-night consolidated lessons (meta-skill analogue)
"history": [], # list of per-night summaries
"task_archive": [], # capped list of past mined tasks (for associative recall)
}
class SleepState:
def __init__(self, path: str, data: Optional[Dict[str, Any]] = None) -> None:
self.path = path
self.data = data if data is not None else dict(DEFAULT_STATE)
# io ---------------------------------------------------------------------
@classmethod
def load(cls, path: str) -> "SleepState":
if os.path.exists(path):
try:
with open(path) as f:
data = json.load(f)
merged = dict(DEFAULT_STATE)
merged.update(data if isinstance(data, dict) else {})
return cls(path, merged)
except Exception:
pass
return cls(path, dict(DEFAULT_STATE))
def save(self) -> None:
os.makedirs(os.path.dirname(self.path), exist_ok=True)
tmp = self.path + ".tmp"
with open(tmp, "w") as f:
json.dump(self.data, f, ensure_ascii=False, indent=2)
os.replace(tmp, self.path)
# accessors --------------------------------------------------------------
@property
def night(self) -> int:
return int(self.data.get("night", 0))
def last_harvest_for(self, project: str) -> Optional[str]:
return self.data.get("last_harvest", {}).get(project)
def set_last_harvest(self, project: str, iso_ts: str) -> None:
self.data.setdefault("last_harvest", {})[project] = iso_ts
@property
def slow_memory(self) -> str:
return str(self.data.get("slow_memory", ""))
def set_slow_memory(self, content: str) -> None:
self.data["slow_memory"] = content
def begin_night(self, clock: Optional[float] = None) -> int:
self.data["night"] = self.night + 1
return self.night
def record_night(self, summary: Dict[str, Any]) -> None:
self.data.setdefault("history", []).append(summary)
# ── task archive (associative-recall memory) ──────────────────────────
def task_archive(self) -> list:
"""Past mined tasks as plain dicts (newest last)."""
return list(self.data.get("task_archive", []))
def add_to_archive(self, task_dicts: list, cap: int = 300) -> None:
"""Append tonight's tasks; keep only the most recent ``cap``."""
arc = self.data.setdefault("task_archive", [])
arc.extend(task_dicts)
if len(arc) > cap:
self.data["task_archive"] = arc[-cap:]