Merge pull request #48 from Kirchberg/codex/codex-desktop-harvest

Add Codex Desktop transcript harvesting
This commit is contained in:
carpedkm
2026-06-15 10:23:18 +00:00
12 changed files with 479 additions and 59 deletions

View File

@@ -70,7 +70,7 @@ Dreams** (offline consolidation; review-then-adopt), and the **agent sleep**
idea (short-term experience → long-term competence). One "night":
```
harvest session transcripts → mine recurring tasks → replay offline
harvest Claude Code / Codex Desktop transcripts → mine recurring tasks → replay offline
→ consolidate (reflect → bounded edit → GATE on real held-out tasks)
→ stage proposal → (you) adopt
```
@@ -99,6 +99,11 @@ positive, and the gate blocks regressions
Deterministic proof (no API key): `python -m skillopt_sleep.experiments.run_experiment --persona researcher --assert-improves`.
For local sleep cycles, transcript source and replay backend are separate knobs:
use `--source claude` for Claude Code transcripts, `--source codex` for Codex
Desktop archived sessions under `~/.codex/archived_sessions`, and
`--backend codex` only when you want the replay/optimizer to spend Codex budget.
---
## Extensibility & WebUI

View File

@@ -4,6 +4,23 @@ The sleep engine is no longer a single fixed pipeline. It is a controllable
offline "dream / imagination" loop the user steers. This documents the knobs
added in the four-stage refactor and how they map to the user's design.
## Transcript sources
Sleep separates the source of past sessions from the backend used to replay and
optimize tasks:
```bash
python -m skillopt_sleep dry-run --project "$(pwd)" --source claude --backend mock
python -m skillopt_sleep dry-run --project "$(pwd)" --source codex --backend mock
python -m skillopt_sleep run --project "$(pwd)" --source codex --backend codex
```
`--source claude` reads Claude Code transcripts from `~/.claude/projects`.
`--source codex` reads Codex Desktop archives from
`~/.codex/archived_sessions`. `--source auto` tries Codex archives first, then
falls back to Claude Code transcripts. Use `--codex-home /path/to/.codex` or
`--claude-home /path/to/.claude` to point at non-default homes.
## The mental model
> Sleep = an offline imagination rollout. Re-run the user's real

View File

@@ -48,13 +48,18 @@ Use the skillopt-sleep skill to adopt the latest staged proposal.
Or call the engine directly:
```bash
python -m skillopt_sleep run --project "$(pwd)" --backend codex
python -m skillopt_sleep dry-run --project "$(pwd)" --source codex --backend mock
python -m skillopt_sleep run --project "$(pwd)" --source codex --backend codex
```
Default backend is `mock` (no API spend). `--backend codex` uses your Codex
budget for real improvement. All the controllable knobs (`--gate on|off`,
`--rollouts-k`, `--budget-tokens`, `--preferences`, optimizer/target split) work
identically — see [`../../docs/sleep/CONTROLLABLE_DREAMING.md`](../../docs/sleep/CONTROLLABLE_DREAMING.md).
`--source codex` reads Codex Desktop archived sessions from
`~/.codex/archived_sessions`. Use `--codex-home /path/to/.codex` to point at a
different Codex home, or `--source auto` to try Codex archives first and fall
back to Claude Code transcripts. Default backend is `mock` (no API spend).
`--backend codex` uses your Codex budget for real improvement. All the
controllable knobs (`--gate on|off`, `--rollouts-k`, `--budget-tokens`,
`--preferences`, optimizer/target split) work identically — see
[`../../docs/sleep/CONTROLLABLE_DREAMING.md`](../../docs/sleep/CONTROLLABLE_DREAMING.md).
## Notes / status

View File

@@ -44,11 +44,11 @@ finds the engine and a Python >= 3.10 automatically.
```bash
# point at the repo if it isn't auto-detected from CWD:
export SKILLOPT_SLEEP_REPO=/path/to/SkillOpt-Sleep
bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" status --project "$(pwd)"
bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" harvest --project "$(pwd)"
bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" dry-run --project "$(pwd)" --backend mock
bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" run --project "$(pwd)" --backend codex
bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" run --project "$(pwd)" --source codex # harvest from Codex Desktop
bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" adopt --project "$(pwd)"
```
@@ -56,6 +56,8 @@ Actions are `status`, `harvest`, `dry-run`, `run`, and `adopt`.
- Default backend is `mock`, which is deterministic and spends no API budget.
- `--backend codex` uses the user's Codex budget for real improvement.
- `--source codex` reads Codex Desktop archived sessions from `~/.codex/archived_sessions`;
use `--codex-home /path/to/.codex` if the archive lives elsewhere.
- Keep `dry-run --backend mock` as the first smoke check unless the user
explicitly asked for a real optimization run.

View File

@@ -9,7 +9,8 @@
Common flags:
--project PATH project to evolve (default: cwd)
--scope all|invoked harvest scope (default: invoked)
--backend mock|anthropic
--backend mock|claude|codex
--source claude|codex|auto
--model NAME
--lookback-hours N
--auto-adopt
@@ -25,10 +26,11 @@ from typing import Any, Dict
from skillopt_sleep.config import load_config
from skillopt_sleep.cycle import run_sleep_cycle
from skillopt_sleep.harvest import harvest
from skillopt_sleep.harvest_sources import harvest_for_config
from skillopt_sleep.mine import mine
from skillopt_sleep.staging import adopt as adopt_staging
from skillopt_sleep.staging import latest_staging
from skillopt_sleep.state import SleepState
from skillopt_sleep.staging import latest_staging, adopt as adopt_staging
def _add_common(p: argparse.ArgumentParser) -> None:
@@ -38,6 +40,9 @@ def _add_common(p: argparse.ArgumentParser) -> None:
p.add_argument("--model", default="")
p.add_argument("--codex-path", default="", help="path to the real @openai/codex binary")
p.add_argument("--claude-home", default="", help="override ~/.claude (also isolates state)")
p.add_argument("--codex-home", default="", help="override ~/.codex for archived session harvest")
p.add_argument("--source", default="", choices=["", "claude", "codex", "auto"],
help="session transcript source")
p.add_argument("--lookback-hours", type=int, default=0)
p.add_argument("--edit-budget", type=int, default=0)
p.add_argument("--auto-adopt", action="store_true")
@@ -59,6 +64,10 @@ def _cfg_from_args(args) -> Any:
overrides["codex_path"] = os.path.abspath(args.codex_path)
if getattr(args, "claude_home", ""):
overrides["claude_home"] = os.path.abspath(args.claude_home)
if getattr(args, "codex_home", ""):
overrides["codex_home"] = os.path.abspath(args.codex_home)
if getattr(args, "source", ""):
overrides["transcript_source"] = args.source
if getattr(args, "lookback_hours", 0):
overrides["lookback_hours"] = args.lookback_hours
if getattr(args, "edit_budget", 0):
@@ -143,12 +152,7 @@ def cmd_adopt(args) -> int:
def cmd_harvest(args) -> int:
cfg = _cfg_from_args(args)
digests = harvest(
cfg.transcripts_dir,
scope=cfg.get("projects", "invoked"),
invoked_project=cfg.get("invoked_project", ""),
limit=cfg.get("max_tasks_per_night", 40) * 3,
)
digests = harvest_for_config(cfg, limit=cfg.get("max_tasks_per_night", 40) * 3)
tasks = mine(digests, max_tasks=cfg.get("max_tasks_per_night", 40),
holdout_fraction=cfg.get("holdout_fraction", 0.34), seed=cfg.get("seed", 42))
if args.json:

View File

@@ -13,17 +13,19 @@ from __future__ import annotations
import json
import os
from dataclasses import dataclass, field, asdict
from typing import Any, Dict, List, Optional
from dataclasses import dataclass, field
from typing import Any, Dict, Optional
HOME_STATE_DIR = os.path.expanduser("~/.skillopt-sleep")
CLAUDE_HOME = os.path.expanduser("~/.claude")
CODEX_HOME = os.path.expanduser("~/.codex")
DEFAULTS: Dict[str, Any] = {
# ── scope ──────────────────────────────────────────────────────────────
"claude_home": CLAUDE_HOME,
"codex_home": CODEX_HOME,
"transcript_source": "claude", # "claude" | "codex" | "auto"
"projects": "invoked", # "invoked" | "all" | [list of abs paths]
"invoked_project": "", # filled at runtime (cwd) when projects == "invoked"
"lookback_hours": 72, # harvest window when no prior sleep recorded
@@ -94,6 +96,10 @@ class SleepConfig:
def transcripts_dir(self) -> str:
return os.path.join(self.data["claude_home"], "projects")
@property
def codex_archived_sessions_dir(self) -> str:
return os.path.join(self.data["codex_home"], "archived_sessions")
@property
def history_path(self) -> str:
return os.path.join(self.data["claude_home"], "history.jsonl")

View File

@@ -10,18 +10,18 @@ CI use. With backend="anthropic" it spends the user's budget for real lift.
from __future__ import annotations
import os
import time
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
from typing import List, Optional
from skillopt_sleep.backend import get_backend
from skillopt_sleep.config import SleepConfig, load_config
from skillopt_sleep.consolidate import consolidate
from skillopt_sleep.harvest import harvest
from skillopt_sleep.harvest_sources import harvest_for_config
from skillopt_sleep.memory import ensure_skill_scaffold
from skillopt_sleep.mine import mine
from skillopt_sleep.staging import adopt as adopt_staging
from skillopt_sleep.staging import write_staging
from skillopt_sleep.state import SleepState, _now_iso
from skillopt_sleep.staging import write_staging, adopt as adopt_staging
from skillopt_sleep.types import SessionDigest, SleepReport, TaskRecord
@@ -117,10 +117,8 @@ def run_sleep_cycle(
n_sessions = 0
else:
since = state.last_harvest_for(project)
digests = harvest(
cfg.transcripts_dir,
scope=cfg.get("projects", "invoked"),
invoked_project=cfg.get("invoked_project", ""),
digests = harvest_for_config(
cfg,
since_iso=since,
limit=cfg.get("max_tasks_per_night", 40) * 3,
)
@@ -151,7 +149,7 @@ def run_sleep_cycle(
if not skill:
skill = ensure_skill_scaffold(
"", name=cfg.get("managed_skill_name", "skillopt-sleep-learned"),
description="Preferences and procedures learned from past Claude Code sessions.",
description="Preferences and procedures learned from past local agent sessions.",
)
report = SleepReport(

View File

@@ -0,0 +1,253 @@
"""SkillOpt-Sleep Codex Desktop session harvesting.
Reads Codex Desktop archived session JSONL files and normalizes them into
``SessionDigest`` records without copying developer/system instructions, tool
arguments, or raw tool outputs.
"""
from __future__ import annotations
import os
import re
from typing import Any, Dict, Iterable, List, Optional
from skillopt_sleep.harvest import (
_detect_feedback,
_is_meta_prompt,
_iter_jsonl,
_project_matches,
)
from skillopt_sleep.types import SessionDigest
_SECRET_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
(re.compile(r"sk-[A-Za-z0-9_-]{10,}"), "[REDACTED_OPENAI_KEY]"),
(re.compile(r"(?i)(Authorization:\s*Bearer\s+)[^\s\"']+"), r"\1[REDACTED]"),
(re.compile(r"(?i)(Authorization:\s*Basic\s+)[^\s\"']+"), r"\1[REDACTED]"),
(
re.compile(r"(?i)\b(api[_-]?key|token|password|secret)\b(\s*[:=]\s*)[^\s\"']+"),
r"\1\2[REDACTED]",
),
(
re.compile(r"(?i)\b(api[_-]?key|token|password|secret)\b(\s+)[^\s\"']+"),
r"\1\2[REDACTED]",
),
(
re.compile(
r"-----BEGIN [A-Z ]*PRIVATE KEY-----.*?-----END [A-Z ]*PRIVATE KEY-----",
re.DOTALL,
),
"[REDACTED_PRIVATE_KEY]",
),
)
def _payload(rec: Dict[str, Any]) -> Dict[str, Any]:
payload = rec.get("payload")
return payload if isinstance(payload, dict) else {}
def _timestamp(rec: Dict[str, Any], payload: Dict[str, Any]) -> str:
for value in (
payload.get("timestamp"),
rec.get("timestamp"),
payload.get("started_at"),
payload.get("completed_at"),
):
if isinstance(value, str) and value:
return value
return ""
def _text_from_any(content: Any) -> str:
if isinstance(content, str):
return content
if isinstance(content, list):
parts: List[str] = []
for item in content:
if isinstance(item, str):
parts.append(item)
elif isinstance(item, dict):
if item.get("type") == "text" and item.get("text"):
parts.append(str(item["text"]))
elif item.get("text"):
parts.append(str(item["text"]))
return "\n".join(parts)
if isinstance(content, dict):
if content.get("text"):
return str(content["text"])
if content.get("content"):
return _text_from_any(content["content"])
return ""
def _strip_codex_meta(text: str) -> str:
stripped = text.strip()
if not stripped:
return ""
if stripped.startswith("<codex_internal_context"):
return ""
if stripped.startswith("<environment_context"):
return ""
if stripped.startswith("# AGENTS.md instructions") or "--- project-doc ---" in stripped:
for marker in ("</environment_context>", "</INSTRUCTIONS>"):
idx = stripped.rfind(marker)
if idx == -1:
continue
tail = stripped[idx + len(marker):].strip()
if tail and not tail.startswith("<"):
return tail
return ""
return stripped
def _sanitize_text(text: str) -> str:
sanitized = _strip_codex_meta(text).replace("\x00", "").strip()
if not sanitized or _is_meta_prompt(sanitized):
return ""
for pattern, replacement in _SECRET_PATTERNS:
sanitized = pattern.sub(replacement, sanitized)
return sanitized
def _sanitize_tool_name(name: str) -> str:
return re.sub(r"[^A-Za-z0-9_.:-]+", "_", name)[:80]
def _tool_name(payload: Dict[str, Any]) -> str:
payload_type = payload.get("type")
name = payload.get("name")
if isinstance(name, str) and name:
return _sanitize_tool_name(name)
if payload_type == "exec_command_end":
return "exec_command"
if payload_type == "patch_apply_end":
return "apply_patch"
if payload_type == "web_search_call":
return "web_search"
if payload_type == "tool_search_call":
return "tool_search"
if isinstance(payload_type, str) and payload_type.endswith("_tool_call"):
return _sanitize_tool_name(payload_type)
return ""
def _dedup(xs: Iterable[str]) -> List[str]:
seen = set()
out: List[str] = []
for x in xs:
if x not in seen:
seen.add(x)
out.append(x)
return out
def digest_codex_archived_session(path: str, project: str = "") -> Optional[SessionDigest]:
"""Build a ``SessionDigest`` from one Codex Desktop archived session."""
session_id = os.path.splitext(os.path.basename(path))[0]
started = ""
ended = ""
session_project = ""
user_prompts: List[str] = []
assistant_finals: List[str] = []
tools: List[str] = []
feedback: List[str] = []
n_user = 0
n_asst = 0
for rec in _iter_jsonl(path):
payload = _payload(rec)
payload_type = payload.get("type")
ts = _timestamp(rec, payload)
if ts:
if not started:
started = ts
ended = ts
cwd = payload.get("cwd")
if isinstance(cwd, str) and cwd:
if not session_project:
session_project = cwd
if project and _project_matches(cwd, "invoked", project):
session_project = cwd
role = payload.get("role")
text = ""
output_role = ""
if payload_type == "user_message":
text = _text_from_any(payload.get("message"))
output_role = "user"
elif payload_type == "agent_message":
text = _text_from_any(payload.get("message"))
output_role = "assistant"
elif payload_type == "message" and role in {"user", "assistant"}:
text = _text_from_any(payload.get("content"))
output_role = str(role)
else:
tool = _tool_name(payload)
if tool:
tools.append(tool)
continue
sanitized = _sanitize_text(text)
if not sanitized:
continue
if output_role == "user":
n_user += 1
user_prompts.append(sanitized)
feedback.extend(_detect_feedback(sanitized))
elif output_role == "assistant":
n_asst += 1
assistant_finals.append(sanitized)
if project and not _project_matches(session_project or "", "invoked", project):
return None
if n_user == 0 and n_asst == 0:
return None
return SessionDigest(
session_id=session_id,
project=session_project,
started_at=started,
ended_at=ended,
user_prompts=user_prompts,
assistant_finals=assistant_finals[-5:],
tools_used=_dedup(tools),
files_touched=[],
feedback_signals=feedback,
n_user_turns=n_user,
n_assistant_turns=n_asst,
raw_path=path,
)
def harvest_codex(
archived_sessions_dir: str,
*,
scope: Any = "all",
invoked_project: str = "",
since_iso: Optional[str] = None,
limit: int = 0,
) -> List[SessionDigest]:
"""Walk ``~/.codex/archived_sessions`` and return matching digests."""
digests: List[SessionDigest] = []
if not os.path.isdir(archived_sessions_dir):
return digests
paths = [
os.path.join(archived_sessions_dir, fn)
for fn in os.listdir(archived_sessions_dir)
if fn.endswith(".jsonl")
]
paths.sort(key=lambda p: os.path.getmtime(p), reverse=True)
project_hint = invoked_project if scope == "invoked" else ""
for path in paths:
digest = digest_codex_archived_session(path, project=project_hint)
if digest is None:
continue
if not _project_matches(digest.project or "", scope, invoked_project):
continue
if since_iso and digest.ended_at and digest.ended_at < since_iso:
continue
digests.append(digest)
if limit and len(digests) >= limit:
break
return digests

View File

@@ -0,0 +1,41 @@
"""Source selection for SkillOpt-Sleep transcript harvesting."""
from __future__ import annotations
from typing import Optional
from skillopt_sleep.harvest import harvest
from skillopt_sleep.harvest_codex import harvest_codex
from skillopt_sleep.types import SessionDigest
def harvest_for_config(cfg, *, since_iso: Optional[str] = None, limit: int = 0) -> list[SessionDigest]:
source = cfg.get("transcript_source", "claude")
scope = cfg.get("projects", "invoked")
invoked_project = cfg.get("invoked_project", "")
if source == "codex":
return harvest_codex(
cfg.codex_archived_sessions_dir,
scope=scope,
invoked_project=invoked_project,
since_iso=since_iso,
limit=limit,
)
if source == "auto":
codex_digests = harvest_codex(
cfg.codex_archived_sessions_dir,
scope=scope,
invoked_project=invoked_project,
since_iso=since_iso,
limit=limit,
)
if codex_digests:
return codex_digests
return harvest(
cfg.transcripts_dir,
scope=scope,
invoked_project=invoked_project,
since_iso=since_iso,
limit=limit,
)

View File

@@ -12,7 +12,6 @@ from typing import List, Tuple
from skillopt_sleep.types import EditRecord
LEARNED_START = "<!-- SKILLOPT-SLEEP:LEARNED START -->"
LEARNED_END = "<!-- SKILLOPT-SLEEP:LEARNED END -->"
_BANNER = (
@@ -79,7 +78,7 @@ def apply_edits(doc: str, edits: List[EditRecord]) -> Tuple[str, List[EditRecord
anchor substring.
"""
lines = current_learned_lines(doc)
norm_set = {_norm(l) for l in lines}
norm_set = {_norm(line) for line in lines}
applied: List[EditRecord] = []
for e in edits:
@@ -92,31 +91,31 @@ def apply_edits(doc: str, edits: List[EditRecord]) -> Tuple[str, List[EditRecord
applied.append(e)
elif op == "delete":
anchor = _norm(e.anchor or e.content)
keep = [l for l in lines if anchor not in _norm(l)]
keep = [line for line in lines if anchor not in _norm(line)]
if len(keep) != len(lines):
lines = keep
norm_set = {_norm(l) for l in lines}
norm_set = {_norm(line) for line in lines}
applied.append(e)
elif op == "replace":
anchor = _norm(e.anchor)
new_lines = []
changed = False
for l in lines:
if anchor and anchor in _norm(l):
for line in lines:
if anchor and anchor in _norm(line):
new_lines.append(e.content.strip())
changed = True
else:
new_lines.append(l)
new_lines.append(line)
if changed:
lines = new_lines
norm_set = {_norm(l) for l in lines}
norm_set = {_norm(line) for line in lines}
applied.append(e)
return set_learned(doc, lines), applied
def ensure_skill_scaffold(doc: str, *, name: str, description: str) -> str:
"""Ensure a SKILL.md has YAML frontmatter so Claude Code loads it."""
"""Ensure a SKILL.md has YAML frontmatter so local agents load it."""
if doc.lstrip().startswith("---"):
return doc
fm = (
@@ -125,6 +124,6 @@ def ensure_skill_scaffold(doc: str, *, name: str, description: str) -> str:
f"description: {description}\n"
"---\n\n"
f"# {name}\n\n"
"Preferences and procedures learned from your past Claude Code sessions.\n"
"Preferences and procedures learned from your past local agent sessions.\n"
)
return fm + doc

View File

@@ -8,18 +8,17 @@ external dependencies.
"""
from __future__ import annotations
from dataclasses import dataclass, field, asdict
from typing import Any, Dict, List, Optional
from dataclasses import asdict, dataclass, field
from typing import Any, Dict, List
# ── Stage 1: harvest ──────────────────────────────────────────────────────────
@dataclass
class SessionDigest:
"""A normalized summary of one Claude Code session transcript.
"""A normalized summary of one local agent session transcript.
Produced by :mod:`skillopt_sleep.harvest` from a ``<sessionId>.jsonl``
transcript plus ``history.jsonl`` entries.
Produced by source-specific harvesters from Claude Code transcripts or
Codex Desktop archived sessions.
"""
session_id: str

View File

@@ -15,11 +15,11 @@ from skillopt_sleep.backend import MockBackend, exact_score, keyword_soft_score
from skillopt_sleep.config import load_config
from skillopt_sleep.consolidate import consolidate
from skillopt_sleep.cycle import run_sleep_cycle
from skillopt_sleep.experiments.personas import researcher_persona, programmer_persona
from skillopt_sleep.harvest import digest_transcript, _detect_feedback, _is_meta_prompt
from skillopt_sleep.experiments.personas import programmer_persona, researcher_persona
from skillopt_sleep.harvest import _detect_feedback, _is_meta_prompt, digest_transcript
from skillopt_sleep.memory import apply_edits, current_learned_lines, extract_learned, set_learned
from skillopt_sleep.mine import assign_splits, heuristic_mine, dedup_tasks
from skillopt_sleep.staging import adopt, latest_staging
from skillopt_sleep.mine import assign_splits, heuristic_mine
from skillopt_sleep.staging import adopt
from skillopt_sleep.types import EditRecord, SessionDigest, TaskRecord
@@ -89,6 +89,97 @@ class TestHarvest(unittest.TestCase):
self.assertIsInstance(d.session_id, str)
self.assertGreaterEqual(d.n_user_turns + d.n_assistant_turns, 0)
def _write_jsonl(self, path, records):
with open(path, "w", encoding="utf-8") as f:
for record in records:
f.write(json.dumps(record) + "\n")
def test_digest_codex_archived_session_sanitizes_and_skips_meta(self):
from skillopt_sleep.harvest_codex import digest_codex_archived_session
with tempfile.TemporaryDirectory() as tmp:
path = os.path.join(tmp, "rollout-example.jsonl")
self._write_jsonl(path, [
{"type": "turn_context", "timestamp": "2026-06-12T10:00:00Z",
"payload": {"cwd": "/repo/Yoshi", "type": None}},
{"type": "response_item", "timestamp": "2026-06-12T10:00:01Z",
"payload": {"type": "message", "role": "developer",
"content": [{"type": "text", "text": "do not copy"}]}},
{"type": "response_item", "timestamp": "2026-06-12T10:00:02Z",
"payload": {"type": "user_message",
"message": "# AGENTS.md instructions for /repo/Yoshi\n"
"<INSTRUCTIONS>do not keep</INSTRUCTIONS>"}},
{"type": "response_item", "timestamp": "2026-06-12T10:00:03Z",
"payload": {"type": "user_message",
"message": "run deploy with sk-1234567890abcdef and token local-secret"}},
{"type": "response_item", "timestamp": "2026-06-12T10:00:04Z",
"payload": {"type": "function_call", "name": "exec_command",
"arguments": "raw args should not copy"}},
{"type": "response_item", "timestamp": "2026-06-12T10:00:05Z",
"payload": {"type": "function_call_output",
"output": "raw output should not copy"}},
{"type": "response_item", "timestamp": "2026-06-12T10:00:06Z",
"payload": {"type": "agent_message", "message": "done"}},
])
digest = digest_codex_archived_session(path, project="/repo/Yoshi")
self.assertIsNotNone(digest)
joined = "\n".join(digest.user_prompts + digest.assistant_finals)
self.assertEqual(digest.project, "/repo/Yoshi")
self.assertIn("[REDACTED_OPENAI_KEY]", joined)
self.assertIn("token [REDACTED]", joined)
self.assertIn("exec_command", digest.tools_used)
self.assertNotIn("AGENTS.md instructions", joined)
self.assertNotIn("do not copy", joined)
self.assertNotIn("raw args should not copy", joined)
self.assertNotIn("raw output should not copy", joined)
def test_harvest_codex_filters_project_and_cli_source(self):
from skillopt_sleep.__main__ import _cfg_from_args
from skillopt_sleep.harvest_sources import harvest_for_config
with tempfile.TemporaryDirectory() as tmp:
codex_home = os.path.join(tmp, ".codex")
sessions = os.path.join(codex_home, "archived_sessions")
os.makedirs(sessions)
self._write_jsonl(os.path.join(sessions, "rollout-yoshi.jsonl"), [
{"type": "turn_context", "timestamp": "2026-06-12T10:00:00Z",
"payload": {"cwd": "/repo/Yoshi", "type": None}},
{"type": "response_item", "timestamp": "2026-06-12T10:00:01Z",
"payload": {"type": "user_message", "message": "fix Yoshi"}},
{"type": "response_item", "timestamp": "2026-06-12T10:00:02Z",
"payload": {"type": "agent_message", "message": "fixed"}},
])
self._write_jsonl(os.path.join(sessions, "rollout-other.jsonl"), [
{"type": "turn_context", "timestamp": "2026-06-12T10:00:00Z",
"payload": {"cwd": "/repo/Other", "type": None}},
{"type": "response_item", "timestamp": "2026-06-12T10:00:01Z",
"payload": {"type": "user_message", "message": "fix Other"}},
])
Args = type("Args", (), {
"project": "/repo/Yoshi",
"scope": "",
"backend": "",
"model": "",
"codex_path": "",
"claude_home": "",
"codex_home": codex_home,
"source": "codex",
"lookback_hours": 0,
"edit_budget": 0,
"auto_adopt": False,
})
cfg = _cfg_from_args(Args())
digests = harvest_for_config(cfg, limit=10)
self.assertEqual(cfg.get("transcript_source"), "codex")
self.assertEqual(len(digests), 1)
self.assertEqual(digests[0].session_id, "rollout-yoshi")
self.assertEqual(digests[0].user_prompts, ["fix Yoshi"])
class TestMine(unittest.TestCase):
def _digest(self, prompts, feedback):
@@ -115,7 +206,6 @@ class TestMine(unittest.TestCase):
def test_dream_never_in_val_or_test(self):
# the anti-overfitting guarantee: origin='dream' tasks only ever land in train
from skillopt_sleep.types import TaskRecord
real = researcher_persona()
dream = [TaskRecord(id=f"d{i}", project="/p", intent=f"dream {i}",
origin="dream", derived_from="r0") for i in range(5)]
@@ -235,7 +325,7 @@ class TestLlmMiner(unittest.TestCase):
class TestMultiObjectiveAndPrefs(unittest.TestCase):
def test_multi_objective_reward(self):
from skillopt_sleep.replay import multi_objective_reward
from skillopt_sleep.types import ReplayResult, TaskRecord
from skillopt_sleep.types import ReplayResult
t = TaskRecord(id="t", project="/p", intent="x")
expensive = [(t, ReplayResult(id="t", hard=1.0, tokens=4000, latency_ms=20000))]
cheap = [(t, ReplayResult(id="t", hard=1.0, tokens=200, latency_ms=1000))]
@@ -249,7 +339,7 @@ class TestMultiObjectiveAndPrefs(unittest.TestCase):
def test_preferences_injected_into_reflect(self):
from skillopt_sleep.backend import CliBackend
from skillopt_sleep.types import TaskRecord, ReplayResult
from skillopt_sleep.types import ReplayResult
captured = {}
class CapBackend(CliBackend):
@@ -269,7 +359,6 @@ class TestMultiObjectiveAndPrefs(unittest.TestCase):
def test_replay_records_cost(self):
from skillopt_sleep.backend import MockBackend
from skillopt_sleep.replay import replay_one
from skillopt_sleep.types import TaskRecord
t = TaskRecord(id="t", project="/p", intent="hello world",
reference_kind="exact", reference="hi")
r = replay_one(MockBackend(), t, "some skill text", "")
@@ -280,7 +369,7 @@ class TestMultiObjectiveAndPrefs(unittest.TestCase):
class TestMultiRolloutAndBudget(unittest.TestCase):
def test_rolloutset_stats(self):
from skillopt_sleep.rollout import RolloutSet
from skillopt_sleep.types import ReplayResult, TaskRecord
from skillopt_sleep.types import ReplayResult
rs = RolloutSet(task=TaskRecord(id="t", project="/p", intent="x"),
attempts=[ReplayResult(id="t", hard=1.0),
ReplayResult(id="t", hard=0.0),
@@ -305,7 +394,7 @@ class TestMultiRolloutAndBudget(unittest.TestCase):
def test_contrastive_reflect_with_stub(self):
from skillopt_sleep.backend import Backend
from skillopt_sleep.rollout import RolloutSet, contrastive_reflect
from skillopt_sleep.types import ReplayResult, TaskRecord
from skillopt_sleep.types import ReplayResult
class StubBackend(Backend):
name = "stub"
@@ -323,8 +412,11 @@ class TestMultiRolloutAndBudget(unittest.TestCase):
class TestSlowUpdate(unittest.TestCase):
def test_protected_field_roundtrip(self):
from skillopt_sleep.slow_update import (
replace_slow_field, extract_slow_field, has_slow_field,
SLOW_UPDATE_START, SLOW_UPDATE_END,
SLOW_UPDATE_END,
SLOW_UPDATE_START,
extract_slow_field,
has_slow_field,
replace_slow_field,
)
base = "# skill\nkeep me\n"
doc = replace_slow_field(base, "durable lesson A")
@@ -341,7 +433,7 @@ class TestSlowUpdate(unittest.TestCase):
def test_run_slow_update_with_stub_backend(self):
from skillopt_sleep.backend import Backend
from skillopt_sleep.slow_update import run_slow_update
from skillopt_sleep.types import TaskRecord, ReplayResult
from skillopt_sleep.types import ReplayResult
class StubBackend(Backend):
name = "stub"
@@ -366,9 +458,8 @@ class TestSlowUpdate(unittest.TestCase):
class TestToolLoop(unittest.TestCase):
def test_tool_called_judge_via_replay(self):
from skillopt_sleep.backend import MockBackend
from skillopt_sleep.replay import replay_one, _required_tools
from skillopt_sleep.memory import set_learned
from skillopt_sleep.types import TaskRecord
from skillopt_sleep.replay import _required_tools, replay_one
task = TaskRecord(
id="qa1", project="/p", intent="answer the question",