mirror of
https://github.com/microsoft/SkillOpt.git
synced 2026-07-03 14:02:58 +08:00
Merge pull request #48 from Kirchberg/codex/codex-desktop-harvest
Add Codex Desktop transcript harvesting
This commit is contained in:
@@ -70,7 +70,7 @@ Dreams** (offline consolidation; review-then-adopt), and the **agent sleep**
|
||||
idea (short-term experience → long-term competence). One "night":
|
||||
|
||||
```
|
||||
harvest session transcripts → mine recurring tasks → replay offline
|
||||
harvest Claude Code / Codex Desktop transcripts → mine recurring tasks → replay offline
|
||||
→ consolidate (reflect → bounded edit → GATE on real held-out tasks)
|
||||
→ stage proposal → (you) adopt
|
||||
```
|
||||
@@ -99,6 +99,11 @@ positive, and the gate blocks regressions
|
||||
|
||||
Deterministic proof (no API key): `python -m skillopt_sleep.experiments.run_experiment --persona researcher --assert-improves`.
|
||||
|
||||
For local sleep cycles, transcript source and replay backend are separate knobs:
|
||||
use `--source claude` for Claude Code transcripts, `--source codex` for Codex
|
||||
Desktop archived sessions under `~/.codex/archived_sessions`, and
|
||||
`--backend codex` only when you want the replay/optimizer to spend Codex budget.
|
||||
|
||||
---
|
||||
|
||||
## Extensibility & WebUI
|
||||
|
||||
@@ -4,6 +4,23 @@ The sleep engine is no longer a single fixed pipeline. It is a controllable
|
||||
offline "dream / imagination" loop the user steers. This documents the knobs
|
||||
added in the four-stage refactor and how they map to the user's design.
|
||||
|
||||
## Transcript sources
|
||||
|
||||
Sleep separates the source of past sessions from the backend used to replay and
|
||||
optimize tasks:
|
||||
|
||||
```bash
|
||||
python -m skillopt_sleep dry-run --project "$(pwd)" --source claude --backend mock
|
||||
python -m skillopt_sleep dry-run --project "$(pwd)" --source codex --backend mock
|
||||
python -m skillopt_sleep run --project "$(pwd)" --source codex --backend codex
|
||||
```
|
||||
|
||||
`--source claude` reads Claude Code transcripts from `~/.claude/projects`.
|
||||
`--source codex` reads Codex Desktop archives from
|
||||
`~/.codex/archived_sessions`. `--source auto` tries Codex archives first, then
|
||||
falls back to Claude Code transcripts. Use `--codex-home /path/to/.codex` or
|
||||
`--claude-home /path/to/.claude` to point at non-default homes.
|
||||
|
||||
## The mental model
|
||||
|
||||
> Sleep = an offline imagination rollout. Re-run the user's real
|
||||
|
||||
@@ -48,13 +48,18 @@ Use the skillopt-sleep skill to adopt the latest staged proposal.
|
||||
Or call the engine directly:
|
||||
|
||||
```bash
|
||||
python -m skillopt_sleep run --project "$(pwd)" --backend codex
|
||||
python -m skillopt_sleep dry-run --project "$(pwd)" --source codex --backend mock
|
||||
python -m skillopt_sleep run --project "$(pwd)" --source codex --backend codex
|
||||
```
|
||||
|
||||
Default backend is `mock` (no API spend). `--backend codex` uses your Codex
|
||||
budget for real improvement. All the controllable knobs (`--gate on|off`,
|
||||
`--rollouts-k`, `--budget-tokens`, `--preferences`, optimizer/target split) work
|
||||
identically — see [`../../docs/sleep/CONTROLLABLE_DREAMING.md`](../../docs/sleep/CONTROLLABLE_DREAMING.md).
|
||||
`--source codex` reads Codex Desktop archived sessions from
|
||||
`~/.codex/archived_sessions`. Use `--codex-home /path/to/.codex` to point at a
|
||||
different Codex home, or `--source auto` to try Codex archives first and fall
|
||||
back to Claude Code transcripts. Default backend is `mock` (no API spend).
|
||||
`--backend codex` uses your Codex budget for real improvement. All the
|
||||
controllable knobs (`--gate on|off`, `--rollouts-k`, `--budget-tokens`,
|
||||
`--preferences`, optimizer/target split) work identically — see
|
||||
[`../../docs/sleep/CONTROLLABLE_DREAMING.md`](../../docs/sleep/CONTROLLABLE_DREAMING.md).
|
||||
|
||||
## Notes / status
|
||||
|
||||
|
||||
@@ -44,11 +44,11 @@ finds the engine and a Python >= 3.10 automatically.
|
||||
```bash
|
||||
# point at the repo if it isn't auto-detected from CWD:
|
||||
export SKILLOPT_SLEEP_REPO=/path/to/SkillOpt-Sleep
|
||||
|
||||
bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" status --project "$(pwd)"
|
||||
bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" harvest --project "$(pwd)"
|
||||
bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" dry-run --project "$(pwd)" --backend mock
|
||||
bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" run --project "$(pwd)" --backend codex
|
||||
bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" run --project "$(pwd)" --source codex # harvest from Codex Desktop
|
||||
bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" adopt --project "$(pwd)"
|
||||
```
|
||||
|
||||
@@ -56,6 +56,8 @@ Actions are `status`, `harvest`, `dry-run`, `run`, and `adopt`.
|
||||
|
||||
- Default backend is `mock`, which is deterministic and spends no API budget.
|
||||
- `--backend codex` uses the user's Codex budget for real improvement.
|
||||
- `--source codex` reads Codex Desktop archived sessions from `~/.codex/archived_sessions`;
|
||||
use `--codex-home /path/to/.codex` if the archive lives elsewhere.
|
||||
- Keep `dry-run --backend mock` as the first smoke check unless the user
|
||||
explicitly asked for a real optimization run.
|
||||
|
||||
|
||||
@@ -9,7 +9,8 @@
|
||||
Common flags:
|
||||
--project PATH project to evolve (default: cwd)
|
||||
--scope all|invoked harvest scope (default: invoked)
|
||||
--backend mock|anthropic
|
||||
--backend mock|claude|codex
|
||||
--source claude|codex|auto
|
||||
--model NAME
|
||||
--lookback-hours N
|
||||
--auto-adopt
|
||||
@@ -25,10 +26,11 @@ from typing import Any, Dict
|
||||
|
||||
from skillopt_sleep.config import load_config
|
||||
from skillopt_sleep.cycle import run_sleep_cycle
|
||||
from skillopt_sleep.harvest import harvest
|
||||
from skillopt_sleep.harvest_sources import harvest_for_config
|
||||
from skillopt_sleep.mine import mine
|
||||
from skillopt_sleep.staging import adopt as adopt_staging
|
||||
from skillopt_sleep.staging import latest_staging
|
||||
from skillopt_sleep.state import SleepState
|
||||
from skillopt_sleep.staging import latest_staging, adopt as adopt_staging
|
||||
|
||||
|
||||
def _add_common(p: argparse.ArgumentParser) -> None:
|
||||
@@ -38,6 +40,9 @@ def _add_common(p: argparse.ArgumentParser) -> None:
|
||||
p.add_argument("--model", default="")
|
||||
p.add_argument("--codex-path", default="", help="path to the real @openai/codex binary")
|
||||
p.add_argument("--claude-home", default="", help="override ~/.claude (also isolates state)")
|
||||
p.add_argument("--codex-home", default="", help="override ~/.codex for archived session harvest")
|
||||
p.add_argument("--source", default="", choices=["", "claude", "codex", "auto"],
|
||||
help="session transcript source")
|
||||
p.add_argument("--lookback-hours", type=int, default=0)
|
||||
p.add_argument("--edit-budget", type=int, default=0)
|
||||
p.add_argument("--auto-adopt", action="store_true")
|
||||
@@ -59,6 +64,10 @@ def _cfg_from_args(args) -> Any:
|
||||
overrides["codex_path"] = os.path.abspath(args.codex_path)
|
||||
if getattr(args, "claude_home", ""):
|
||||
overrides["claude_home"] = os.path.abspath(args.claude_home)
|
||||
if getattr(args, "codex_home", ""):
|
||||
overrides["codex_home"] = os.path.abspath(args.codex_home)
|
||||
if getattr(args, "source", ""):
|
||||
overrides["transcript_source"] = args.source
|
||||
if getattr(args, "lookback_hours", 0):
|
||||
overrides["lookback_hours"] = args.lookback_hours
|
||||
if getattr(args, "edit_budget", 0):
|
||||
@@ -143,12 +152,7 @@ def cmd_adopt(args) -> int:
|
||||
|
||||
def cmd_harvest(args) -> int:
|
||||
cfg = _cfg_from_args(args)
|
||||
digests = harvest(
|
||||
cfg.transcripts_dir,
|
||||
scope=cfg.get("projects", "invoked"),
|
||||
invoked_project=cfg.get("invoked_project", ""),
|
||||
limit=cfg.get("max_tasks_per_night", 40) * 3,
|
||||
)
|
||||
digests = harvest_for_config(cfg, limit=cfg.get("max_tasks_per_night", 40) * 3)
|
||||
tasks = mine(digests, max_tasks=cfg.get("max_tasks_per_night", 40),
|
||||
holdout_fraction=cfg.get("holdout_fraction", 0.34), seed=cfg.get("seed", 42))
|
||||
if args.json:
|
||||
|
||||
@@ -13,17 +13,19 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
HOME_STATE_DIR = os.path.expanduser("~/.skillopt-sleep")
|
||||
CLAUDE_HOME = os.path.expanduser("~/.claude")
|
||||
CODEX_HOME = os.path.expanduser("~/.codex")
|
||||
|
||||
|
||||
DEFAULTS: Dict[str, Any] = {
|
||||
# ── scope ──────────────────────────────────────────────────────────────
|
||||
"claude_home": CLAUDE_HOME,
|
||||
"codex_home": CODEX_HOME,
|
||||
"transcript_source": "claude", # "claude" | "codex" | "auto"
|
||||
"projects": "invoked", # "invoked" | "all" | [list of abs paths]
|
||||
"invoked_project": "", # filled at runtime (cwd) when projects == "invoked"
|
||||
"lookback_hours": 72, # harvest window when no prior sleep recorded
|
||||
@@ -94,6 +96,10 @@ class SleepConfig:
|
||||
def transcripts_dir(self) -> str:
|
||||
return os.path.join(self.data["claude_home"], "projects")
|
||||
|
||||
@property
|
||||
def codex_archived_sessions_dir(self) -> str:
|
||||
return os.path.join(self.data["codex_home"], "archived_sessions")
|
||||
|
||||
@property
|
||||
def history_path(self) -> str:
|
||||
return os.path.join(self.data["claude_home"], "history.jsonl")
|
||||
|
||||
@@ -10,18 +10,18 @@ CI use. With backend="anthropic" it spends the user's budget for real lift.
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import List, Optional
|
||||
|
||||
from skillopt_sleep.backend import get_backend
|
||||
from skillopt_sleep.config import SleepConfig, load_config
|
||||
from skillopt_sleep.consolidate import consolidate
|
||||
from skillopt_sleep.harvest import harvest
|
||||
from skillopt_sleep.harvest_sources import harvest_for_config
|
||||
from skillopt_sleep.memory import ensure_skill_scaffold
|
||||
from skillopt_sleep.mine import mine
|
||||
from skillopt_sleep.staging import adopt as adopt_staging
|
||||
from skillopt_sleep.staging import write_staging
|
||||
from skillopt_sleep.state import SleepState, _now_iso
|
||||
from skillopt_sleep.staging import write_staging, adopt as adopt_staging
|
||||
from skillopt_sleep.types import SessionDigest, SleepReport, TaskRecord
|
||||
|
||||
|
||||
@@ -117,10 +117,8 @@ def run_sleep_cycle(
|
||||
n_sessions = 0
|
||||
else:
|
||||
since = state.last_harvest_for(project)
|
||||
digests = harvest(
|
||||
cfg.transcripts_dir,
|
||||
scope=cfg.get("projects", "invoked"),
|
||||
invoked_project=cfg.get("invoked_project", ""),
|
||||
digests = harvest_for_config(
|
||||
cfg,
|
||||
since_iso=since,
|
||||
limit=cfg.get("max_tasks_per_night", 40) * 3,
|
||||
)
|
||||
@@ -151,7 +149,7 @@ def run_sleep_cycle(
|
||||
if not skill:
|
||||
skill = ensure_skill_scaffold(
|
||||
"", name=cfg.get("managed_skill_name", "skillopt-sleep-learned"),
|
||||
description="Preferences and procedures learned from past Claude Code sessions.",
|
||||
description="Preferences and procedures learned from past local agent sessions.",
|
||||
)
|
||||
|
||||
report = SleepReport(
|
||||
|
||||
253
skillopt_sleep/harvest_codex.py
Normal file
253
skillopt_sleep/harvest_codex.py
Normal file
@@ -0,0 +1,253 @@
|
||||
"""SkillOpt-Sleep Codex Desktop session harvesting.
|
||||
|
||||
Reads Codex Desktop archived session JSONL files and normalizes them into
|
||||
``SessionDigest`` records without copying developer/system instructions, tool
|
||||
arguments, or raw tool outputs.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
from typing import Any, Dict, Iterable, List, Optional
|
||||
|
||||
from skillopt_sleep.harvest import (
|
||||
_detect_feedback,
|
||||
_is_meta_prompt,
|
||||
_iter_jsonl,
|
||||
_project_matches,
|
||||
)
|
||||
from skillopt_sleep.types import SessionDigest
|
||||
|
||||
_SECRET_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
|
||||
(re.compile(r"sk-[A-Za-z0-9_-]{10,}"), "[REDACTED_OPENAI_KEY]"),
|
||||
(re.compile(r"(?i)(Authorization:\s*Bearer\s+)[^\s\"']+"), r"\1[REDACTED]"),
|
||||
(re.compile(r"(?i)(Authorization:\s*Basic\s+)[^\s\"']+"), r"\1[REDACTED]"),
|
||||
(
|
||||
re.compile(r"(?i)\b(api[_-]?key|token|password|secret)\b(\s*[:=]\s*)[^\s\"']+"),
|
||||
r"\1\2[REDACTED]",
|
||||
),
|
||||
(
|
||||
re.compile(r"(?i)\b(api[_-]?key|token|password|secret)\b(\s+)[^\s\"']+"),
|
||||
r"\1\2[REDACTED]",
|
||||
),
|
||||
(
|
||||
re.compile(
|
||||
r"-----BEGIN [A-Z ]*PRIVATE KEY-----.*?-----END [A-Z ]*PRIVATE KEY-----",
|
||||
re.DOTALL,
|
||||
),
|
||||
"[REDACTED_PRIVATE_KEY]",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _payload(rec: Dict[str, Any]) -> Dict[str, Any]:
|
||||
payload = rec.get("payload")
|
||||
return payload if isinstance(payload, dict) else {}
|
||||
|
||||
|
||||
def _timestamp(rec: Dict[str, Any], payload: Dict[str, Any]) -> str:
|
||||
for value in (
|
||||
payload.get("timestamp"),
|
||||
rec.get("timestamp"),
|
||||
payload.get("started_at"),
|
||||
payload.get("completed_at"),
|
||||
):
|
||||
if isinstance(value, str) and value:
|
||||
return value
|
||||
return ""
|
||||
|
||||
|
||||
def _text_from_any(content: Any) -> str:
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
parts: List[str] = []
|
||||
for item in content:
|
||||
if isinstance(item, str):
|
||||
parts.append(item)
|
||||
elif isinstance(item, dict):
|
||||
if item.get("type") == "text" and item.get("text"):
|
||||
parts.append(str(item["text"]))
|
||||
elif item.get("text"):
|
||||
parts.append(str(item["text"]))
|
||||
return "\n".join(parts)
|
||||
if isinstance(content, dict):
|
||||
if content.get("text"):
|
||||
return str(content["text"])
|
||||
if content.get("content"):
|
||||
return _text_from_any(content["content"])
|
||||
return ""
|
||||
|
||||
|
||||
def _strip_codex_meta(text: str) -> str:
|
||||
stripped = text.strip()
|
||||
if not stripped:
|
||||
return ""
|
||||
if stripped.startswith("<codex_internal_context"):
|
||||
return ""
|
||||
if stripped.startswith("<environment_context"):
|
||||
return ""
|
||||
if stripped.startswith("# AGENTS.md instructions") or "--- project-doc ---" in stripped:
|
||||
for marker in ("</environment_context>", "</INSTRUCTIONS>"):
|
||||
idx = stripped.rfind(marker)
|
||||
if idx == -1:
|
||||
continue
|
||||
tail = stripped[idx + len(marker):].strip()
|
||||
if tail and not tail.startswith("<"):
|
||||
return tail
|
||||
return ""
|
||||
return stripped
|
||||
|
||||
|
||||
def _sanitize_text(text: str) -> str:
|
||||
sanitized = _strip_codex_meta(text).replace("\x00", "").strip()
|
||||
if not sanitized or _is_meta_prompt(sanitized):
|
||||
return ""
|
||||
for pattern, replacement in _SECRET_PATTERNS:
|
||||
sanitized = pattern.sub(replacement, sanitized)
|
||||
return sanitized
|
||||
|
||||
|
||||
def _sanitize_tool_name(name: str) -> str:
|
||||
return re.sub(r"[^A-Za-z0-9_.:-]+", "_", name)[:80]
|
||||
|
||||
|
||||
def _tool_name(payload: Dict[str, Any]) -> str:
|
||||
payload_type = payload.get("type")
|
||||
name = payload.get("name")
|
||||
if isinstance(name, str) and name:
|
||||
return _sanitize_tool_name(name)
|
||||
if payload_type == "exec_command_end":
|
||||
return "exec_command"
|
||||
if payload_type == "patch_apply_end":
|
||||
return "apply_patch"
|
||||
if payload_type == "web_search_call":
|
||||
return "web_search"
|
||||
if payload_type == "tool_search_call":
|
||||
return "tool_search"
|
||||
if isinstance(payload_type, str) and payload_type.endswith("_tool_call"):
|
||||
return _sanitize_tool_name(payload_type)
|
||||
return ""
|
||||
|
||||
|
||||
def _dedup(xs: Iterable[str]) -> List[str]:
|
||||
seen = set()
|
||||
out: List[str] = []
|
||||
for x in xs:
|
||||
if x not in seen:
|
||||
seen.add(x)
|
||||
out.append(x)
|
||||
return out
|
||||
|
||||
|
||||
def digest_codex_archived_session(path: str, project: str = "") -> Optional[SessionDigest]:
|
||||
"""Build a ``SessionDigest`` from one Codex Desktop archived session."""
|
||||
session_id = os.path.splitext(os.path.basename(path))[0]
|
||||
started = ""
|
||||
ended = ""
|
||||
session_project = ""
|
||||
user_prompts: List[str] = []
|
||||
assistant_finals: List[str] = []
|
||||
tools: List[str] = []
|
||||
feedback: List[str] = []
|
||||
n_user = 0
|
||||
n_asst = 0
|
||||
|
||||
for rec in _iter_jsonl(path):
|
||||
payload = _payload(rec)
|
||||
payload_type = payload.get("type")
|
||||
ts = _timestamp(rec, payload)
|
||||
if ts:
|
||||
if not started:
|
||||
started = ts
|
||||
ended = ts
|
||||
cwd = payload.get("cwd")
|
||||
if isinstance(cwd, str) and cwd:
|
||||
if not session_project:
|
||||
session_project = cwd
|
||||
if project and _project_matches(cwd, "invoked", project):
|
||||
session_project = cwd
|
||||
|
||||
role = payload.get("role")
|
||||
text = ""
|
||||
output_role = ""
|
||||
if payload_type == "user_message":
|
||||
text = _text_from_any(payload.get("message"))
|
||||
output_role = "user"
|
||||
elif payload_type == "agent_message":
|
||||
text = _text_from_any(payload.get("message"))
|
||||
output_role = "assistant"
|
||||
elif payload_type == "message" and role in {"user", "assistant"}:
|
||||
text = _text_from_any(payload.get("content"))
|
||||
output_role = str(role)
|
||||
else:
|
||||
tool = _tool_name(payload)
|
||||
if tool:
|
||||
tools.append(tool)
|
||||
continue
|
||||
|
||||
sanitized = _sanitize_text(text)
|
||||
if not sanitized:
|
||||
continue
|
||||
if output_role == "user":
|
||||
n_user += 1
|
||||
user_prompts.append(sanitized)
|
||||
feedback.extend(_detect_feedback(sanitized))
|
||||
elif output_role == "assistant":
|
||||
n_asst += 1
|
||||
assistant_finals.append(sanitized)
|
||||
|
||||
if project and not _project_matches(session_project or "", "invoked", project):
|
||||
return None
|
||||
if n_user == 0 and n_asst == 0:
|
||||
return None
|
||||
|
||||
return SessionDigest(
|
||||
session_id=session_id,
|
||||
project=session_project,
|
||||
started_at=started,
|
||||
ended_at=ended,
|
||||
user_prompts=user_prompts,
|
||||
assistant_finals=assistant_finals[-5:],
|
||||
tools_used=_dedup(tools),
|
||||
files_touched=[],
|
||||
feedback_signals=feedback,
|
||||
n_user_turns=n_user,
|
||||
n_assistant_turns=n_asst,
|
||||
raw_path=path,
|
||||
)
|
||||
|
||||
|
||||
def harvest_codex(
|
||||
archived_sessions_dir: str,
|
||||
*,
|
||||
scope: Any = "all",
|
||||
invoked_project: str = "",
|
||||
since_iso: Optional[str] = None,
|
||||
limit: int = 0,
|
||||
) -> List[SessionDigest]:
|
||||
"""Walk ``~/.codex/archived_sessions`` and return matching digests."""
|
||||
digests: List[SessionDigest] = []
|
||||
if not os.path.isdir(archived_sessions_dir):
|
||||
return digests
|
||||
|
||||
paths = [
|
||||
os.path.join(archived_sessions_dir, fn)
|
||||
for fn in os.listdir(archived_sessions_dir)
|
||||
if fn.endswith(".jsonl")
|
||||
]
|
||||
paths.sort(key=lambda p: os.path.getmtime(p), reverse=True)
|
||||
|
||||
project_hint = invoked_project if scope == "invoked" else ""
|
||||
for path in paths:
|
||||
digest = digest_codex_archived_session(path, project=project_hint)
|
||||
if digest is None:
|
||||
continue
|
||||
if not _project_matches(digest.project or "", scope, invoked_project):
|
||||
continue
|
||||
if since_iso and digest.ended_at and digest.ended_at < since_iso:
|
||||
continue
|
||||
digests.append(digest)
|
||||
if limit and len(digests) >= limit:
|
||||
break
|
||||
return digests
|
||||
41
skillopt_sleep/harvest_sources.py
Normal file
41
skillopt_sleep/harvest_sources.py
Normal file
@@ -0,0 +1,41 @@
|
||||
"""Source selection for SkillOpt-Sleep transcript harvesting."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from skillopt_sleep.harvest import harvest
|
||||
from skillopt_sleep.harvest_codex import harvest_codex
|
||||
from skillopt_sleep.types import SessionDigest
|
||||
|
||||
|
||||
def harvest_for_config(cfg, *, since_iso: Optional[str] = None, limit: int = 0) -> list[SessionDigest]:
|
||||
source = cfg.get("transcript_source", "claude")
|
||||
scope = cfg.get("projects", "invoked")
|
||||
invoked_project = cfg.get("invoked_project", "")
|
||||
|
||||
if source == "codex":
|
||||
return harvest_codex(
|
||||
cfg.codex_archived_sessions_dir,
|
||||
scope=scope,
|
||||
invoked_project=invoked_project,
|
||||
since_iso=since_iso,
|
||||
limit=limit,
|
||||
)
|
||||
if source == "auto":
|
||||
codex_digests = harvest_codex(
|
||||
cfg.codex_archived_sessions_dir,
|
||||
scope=scope,
|
||||
invoked_project=invoked_project,
|
||||
since_iso=since_iso,
|
||||
limit=limit,
|
||||
)
|
||||
if codex_digests:
|
||||
return codex_digests
|
||||
|
||||
return harvest(
|
||||
cfg.transcripts_dir,
|
||||
scope=scope,
|
||||
invoked_project=invoked_project,
|
||||
since_iso=since_iso,
|
||||
limit=limit,
|
||||
)
|
||||
@@ -12,7 +12,6 @@ from typing import List, Tuple
|
||||
|
||||
from skillopt_sleep.types import EditRecord
|
||||
|
||||
|
||||
LEARNED_START = "<!-- SKILLOPT-SLEEP:LEARNED START -->"
|
||||
LEARNED_END = "<!-- SKILLOPT-SLEEP:LEARNED END -->"
|
||||
_BANNER = (
|
||||
@@ -79,7 +78,7 @@ def apply_edits(doc: str, edits: List[EditRecord]) -> Tuple[str, List[EditRecord
|
||||
anchor substring.
|
||||
"""
|
||||
lines = current_learned_lines(doc)
|
||||
norm_set = {_norm(l) for l in lines}
|
||||
norm_set = {_norm(line) for line in lines}
|
||||
applied: List[EditRecord] = []
|
||||
|
||||
for e in edits:
|
||||
@@ -92,31 +91,31 @@ def apply_edits(doc: str, edits: List[EditRecord]) -> Tuple[str, List[EditRecord
|
||||
applied.append(e)
|
||||
elif op == "delete":
|
||||
anchor = _norm(e.anchor or e.content)
|
||||
keep = [l for l in lines if anchor not in _norm(l)]
|
||||
keep = [line for line in lines if anchor not in _norm(line)]
|
||||
if len(keep) != len(lines):
|
||||
lines = keep
|
||||
norm_set = {_norm(l) for l in lines}
|
||||
norm_set = {_norm(line) for line in lines}
|
||||
applied.append(e)
|
||||
elif op == "replace":
|
||||
anchor = _norm(e.anchor)
|
||||
new_lines = []
|
||||
changed = False
|
||||
for l in lines:
|
||||
if anchor and anchor in _norm(l):
|
||||
for line in lines:
|
||||
if anchor and anchor in _norm(line):
|
||||
new_lines.append(e.content.strip())
|
||||
changed = True
|
||||
else:
|
||||
new_lines.append(l)
|
||||
new_lines.append(line)
|
||||
if changed:
|
||||
lines = new_lines
|
||||
norm_set = {_norm(l) for l in lines}
|
||||
norm_set = {_norm(line) for line in lines}
|
||||
applied.append(e)
|
||||
|
||||
return set_learned(doc, lines), applied
|
||||
|
||||
|
||||
def ensure_skill_scaffold(doc: str, *, name: str, description: str) -> str:
|
||||
"""Ensure a SKILL.md has YAML frontmatter so Claude Code loads it."""
|
||||
"""Ensure a SKILL.md has YAML frontmatter so local agents load it."""
|
||||
if doc.lstrip().startswith("---"):
|
||||
return doc
|
||||
fm = (
|
||||
@@ -125,6 +124,6 @@ def ensure_skill_scaffold(doc: str, *, name: str, description: str) -> str:
|
||||
f"description: {description}\n"
|
||||
"---\n\n"
|
||||
f"# {name}\n\n"
|
||||
"Preferences and procedures learned from your past Claude Code sessions.\n"
|
||||
"Preferences and procedures learned from your past local agent sessions.\n"
|
||||
)
|
||||
return fm + doc
|
||||
|
||||
@@ -8,18 +8,17 @@ external dependencies.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from typing import Any, Dict, List
|
||||
|
||||
# ── Stage 1: harvest ──────────────────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class SessionDigest:
|
||||
"""A normalized summary of one Claude Code session transcript.
|
||||
"""A normalized summary of one local agent session transcript.
|
||||
|
||||
Produced by :mod:`skillopt_sleep.harvest` from a ``<sessionId>.jsonl``
|
||||
transcript plus ``history.jsonl`` entries.
|
||||
Produced by source-specific harvesters from Claude Code transcripts or
|
||||
Codex Desktop archived sessions.
|
||||
"""
|
||||
|
||||
session_id: str
|
||||
|
||||
@@ -15,11 +15,11 @@ from skillopt_sleep.backend import MockBackend, exact_score, keyword_soft_score
|
||||
from skillopt_sleep.config import load_config
|
||||
from skillopt_sleep.consolidate import consolidate
|
||||
from skillopt_sleep.cycle import run_sleep_cycle
|
||||
from skillopt_sleep.experiments.personas import researcher_persona, programmer_persona
|
||||
from skillopt_sleep.harvest import digest_transcript, _detect_feedback, _is_meta_prompt
|
||||
from skillopt_sleep.experiments.personas import programmer_persona, researcher_persona
|
||||
from skillopt_sleep.harvest import _detect_feedback, _is_meta_prompt, digest_transcript
|
||||
from skillopt_sleep.memory import apply_edits, current_learned_lines, extract_learned, set_learned
|
||||
from skillopt_sleep.mine import assign_splits, heuristic_mine, dedup_tasks
|
||||
from skillopt_sleep.staging import adopt, latest_staging
|
||||
from skillopt_sleep.mine import assign_splits, heuristic_mine
|
||||
from skillopt_sleep.staging import adopt
|
||||
from skillopt_sleep.types import EditRecord, SessionDigest, TaskRecord
|
||||
|
||||
|
||||
@@ -89,6 +89,97 @@ class TestHarvest(unittest.TestCase):
|
||||
self.assertIsInstance(d.session_id, str)
|
||||
self.assertGreaterEqual(d.n_user_turns + d.n_assistant_turns, 0)
|
||||
|
||||
def _write_jsonl(self, path, records):
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
for record in records:
|
||||
f.write(json.dumps(record) + "\n")
|
||||
|
||||
def test_digest_codex_archived_session_sanitizes_and_skips_meta(self):
|
||||
from skillopt_sleep.harvest_codex import digest_codex_archived_session
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = os.path.join(tmp, "rollout-example.jsonl")
|
||||
self._write_jsonl(path, [
|
||||
{"type": "turn_context", "timestamp": "2026-06-12T10:00:00Z",
|
||||
"payload": {"cwd": "/repo/Yoshi", "type": None}},
|
||||
{"type": "response_item", "timestamp": "2026-06-12T10:00:01Z",
|
||||
"payload": {"type": "message", "role": "developer",
|
||||
"content": [{"type": "text", "text": "do not copy"}]}},
|
||||
{"type": "response_item", "timestamp": "2026-06-12T10:00:02Z",
|
||||
"payload": {"type": "user_message",
|
||||
"message": "# AGENTS.md instructions for /repo/Yoshi\n"
|
||||
"<INSTRUCTIONS>do not keep</INSTRUCTIONS>"}},
|
||||
{"type": "response_item", "timestamp": "2026-06-12T10:00:03Z",
|
||||
"payload": {"type": "user_message",
|
||||
"message": "run deploy with sk-1234567890abcdef and token local-secret"}},
|
||||
{"type": "response_item", "timestamp": "2026-06-12T10:00:04Z",
|
||||
"payload": {"type": "function_call", "name": "exec_command",
|
||||
"arguments": "raw args should not copy"}},
|
||||
{"type": "response_item", "timestamp": "2026-06-12T10:00:05Z",
|
||||
"payload": {"type": "function_call_output",
|
||||
"output": "raw output should not copy"}},
|
||||
{"type": "response_item", "timestamp": "2026-06-12T10:00:06Z",
|
||||
"payload": {"type": "agent_message", "message": "done"}},
|
||||
])
|
||||
|
||||
digest = digest_codex_archived_session(path, project="/repo/Yoshi")
|
||||
|
||||
self.assertIsNotNone(digest)
|
||||
joined = "\n".join(digest.user_prompts + digest.assistant_finals)
|
||||
self.assertEqual(digest.project, "/repo/Yoshi")
|
||||
self.assertIn("[REDACTED_OPENAI_KEY]", joined)
|
||||
self.assertIn("token [REDACTED]", joined)
|
||||
self.assertIn("exec_command", digest.tools_used)
|
||||
self.assertNotIn("AGENTS.md instructions", joined)
|
||||
self.assertNotIn("do not copy", joined)
|
||||
self.assertNotIn("raw args should not copy", joined)
|
||||
self.assertNotIn("raw output should not copy", joined)
|
||||
|
||||
def test_harvest_codex_filters_project_and_cli_source(self):
|
||||
from skillopt_sleep.__main__ import _cfg_from_args
|
||||
from skillopt_sleep.harvest_sources import harvest_for_config
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
codex_home = os.path.join(tmp, ".codex")
|
||||
sessions = os.path.join(codex_home, "archived_sessions")
|
||||
os.makedirs(sessions)
|
||||
self._write_jsonl(os.path.join(sessions, "rollout-yoshi.jsonl"), [
|
||||
{"type": "turn_context", "timestamp": "2026-06-12T10:00:00Z",
|
||||
"payload": {"cwd": "/repo/Yoshi", "type": None}},
|
||||
{"type": "response_item", "timestamp": "2026-06-12T10:00:01Z",
|
||||
"payload": {"type": "user_message", "message": "fix Yoshi"}},
|
||||
{"type": "response_item", "timestamp": "2026-06-12T10:00:02Z",
|
||||
"payload": {"type": "agent_message", "message": "fixed"}},
|
||||
])
|
||||
self._write_jsonl(os.path.join(sessions, "rollout-other.jsonl"), [
|
||||
{"type": "turn_context", "timestamp": "2026-06-12T10:00:00Z",
|
||||
"payload": {"cwd": "/repo/Other", "type": None}},
|
||||
{"type": "response_item", "timestamp": "2026-06-12T10:00:01Z",
|
||||
"payload": {"type": "user_message", "message": "fix Other"}},
|
||||
])
|
||||
|
||||
Args = type("Args", (), {
|
||||
"project": "/repo/Yoshi",
|
||||
"scope": "",
|
||||
"backend": "",
|
||||
"model": "",
|
||||
"codex_path": "",
|
||||
"claude_home": "",
|
||||
"codex_home": codex_home,
|
||||
"source": "codex",
|
||||
"lookback_hours": 0,
|
||||
"edit_budget": 0,
|
||||
"auto_adopt": False,
|
||||
})
|
||||
|
||||
cfg = _cfg_from_args(Args())
|
||||
digests = harvest_for_config(cfg, limit=10)
|
||||
|
||||
self.assertEqual(cfg.get("transcript_source"), "codex")
|
||||
self.assertEqual(len(digests), 1)
|
||||
self.assertEqual(digests[0].session_id, "rollout-yoshi")
|
||||
self.assertEqual(digests[0].user_prompts, ["fix Yoshi"])
|
||||
|
||||
|
||||
class TestMine(unittest.TestCase):
|
||||
def _digest(self, prompts, feedback):
|
||||
@@ -115,7 +206,6 @@ class TestMine(unittest.TestCase):
|
||||
|
||||
def test_dream_never_in_val_or_test(self):
|
||||
# the anti-overfitting guarantee: origin='dream' tasks only ever land in train
|
||||
from skillopt_sleep.types import TaskRecord
|
||||
real = researcher_persona()
|
||||
dream = [TaskRecord(id=f"d{i}", project="/p", intent=f"dream {i}",
|
||||
origin="dream", derived_from="r0") for i in range(5)]
|
||||
@@ -235,7 +325,7 @@ class TestLlmMiner(unittest.TestCase):
|
||||
class TestMultiObjectiveAndPrefs(unittest.TestCase):
|
||||
def test_multi_objective_reward(self):
|
||||
from skillopt_sleep.replay import multi_objective_reward
|
||||
from skillopt_sleep.types import ReplayResult, TaskRecord
|
||||
from skillopt_sleep.types import ReplayResult
|
||||
t = TaskRecord(id="t", project="/p", intent="x")
|
||||
expensive = [(t, ReplayResult(id="t", hard=1.0, tokens=4000, latency_ms=20000))]
|
||||
cheap = [(t, ReplayResult(id="t", hard=1.0, tokens=200, latency_ms=1000))]
|
||||
@@ -249,7 +339,7 @@ class TestMultiObjectiveAndPrefs(unittest.TestCase):
|
||||
|
||||
def test_preferences_injected_into_reflect(self):
|
||||
from skillopt_sleep.backend import CliBackend
|
||||
from skillopt_sleep.types import TaskRecord, ReplayResult
|
||||
from skillopt_sleep.types import ReplayResult
|
||||
captured = {}
|
||||
|
||||
class CapBackend(CliBackend):
|
||||
@@ -269,7 +359,6 @@ class TestMultiObjectiveAndPrefs(unittest.TestCase):
|
||||
def test_replay_records_cost(self):
|
||||
from skillopt_sleep.backend import MockBackend
|
||||
from skillopt_sleep.replay import replay_one
|
||||
from skillopt_sleep.types import TaskRecord
|
||||
t = TaskRecord(id="t", project="/p", intent="hello world",
|
||||
reference_kind="exact", reference="hi")
|
||||
r = replay_one(MockBackend(), t, "some skill text", "")
|
||||
@@ -280,7 +369,7 @@ class TestMultiObjectiveAndPrefs(unittest.TestCase):
|
||||
class TestMultiRolloutAndBudget(unittest.TestCase):
|
||||
def test_rolloutset_stats(self):
|
||||
from skillopt_sleep.rollout import RolloutSet
|
||||
from skillopt_sleep.types import ReplayResult, TaskRecord
|
||||
from skillopt_sleep.types import ReplayResult
|
||||
rs = RolloutSet(task=TaskRecord(id="t", project="/p", intent="x"),
|
||||
attempts=[ReplayResult(id="t", hard=1.0),
|
||||
ReplayResult(id="t", hard=0.0),
|
||||
@@ -305,7 +394,7 @@ class TestMultiRolloutAndBudget(unittest.TestCase):
|
||||
def test_contrastive_reflect_with_stub(self):
|
||||
from skillopt_sleep.backend import Backend
|
||||
from skillopt_sleep.rollout import RolloutSet, contrastive_reflect
|
||||
from skillopt_sleep.types import ReplayResult, TaskRecord
|
||||
from skillopt_sleep.types import ReplayResult
|
||||
|
||||
class StubBackend(Backend):
|
||||
name = "stub"
|
||||
@@ -323,8 +412,11 @@ class TestMultiRolloutAndBudget(unittest.TestCase):
|
||||
class TestSlowUpdate(unittest.TestCase):
|
||||
def test_protected_field_roundtrip(self):
|
||||
from skillopt_sleep.slow_update import (
|
||||
replace_slow_field, extract_slow_field, has_slow_field,
|
||||
SLOW_UPDATE_START, SLOW_UPDATE_END,
|
||||
SLOW_UPDATE_END,
|
||||
SLOW_UPDATE_START,
|
||||
extract_slow_field,
|
||||
has_slow_field,
|
||||
replace_slow_field,
|
||||
)
|
||||
base = "# skill\nkeep me\n"
|
||||
doc = replace_slow_field(base, "durable lesson A")
|
||||
@@ -341,7 +433,7 @@ class TestSlowUpdate(unittest.TestCase):
|
||||
def test_run_slow_update_with_stub_backend(self):
|
||||
from skillopt_sleep.backend import Backend
|
||||
from skillopt_sleep.slow_update import run_slow_update
|
||||
from skillopt_sleep.types import TaskRecord, ReplayResult
|
||||
from skillopt_sleep.types import ReplayResult
|
||||
|
||||
class StubBackend(Backend):
|
||||
name = "stub"
|
||||
@@ -366,9 +458,8 @@ class TestSlowUpdate(unittest.TestCase):
|
||||
class TestToolLoop(unittest.TestCase):
|
||||
def test_tool_called_judge_via_replay(self):
|
||||
from skillopt_sleep.backend import MockBackend
|
||||
from skillopt_sleep.replay import replay_one, _required_tools
|
||||
from skillopt_sleep.memory import set_learned
|
||||
from skillopt_sleep.types import TaskRecord
|
||||
from skillopt_sleep.replay import _required_tools, replay_one
|
||||
|
||||
task = TaskRecord(
|
||||
id="qa1", project="/p", intent="answer the question",
|
||||
|
||||
Reference in New Issue
Block a user