mirror of
https://github.com/microsoft/SkillOpt.git
synced 2026-07-03 22:24:36 +08:00
Adds a thin OpenClaw shell wrapping the SkillOpt-Sleep engine. Enables nightly validation-gated skill improvement cycles for OpenClaw agents. Components: - skillopt_sleep_openclaw.py: DeepSeek V4 Pro + Ollama nomic-embed-text backend, mirroring the Claude/Codex/Copilot backend pattern. - run_sleep.py: CLI entry point supporting dry-run and pre-built task files. - run_sleep_cron.sh: bash wrapper for nightly cron invocation. - slash_sleep.py: /sleep command (status / run / adopt / reject / cost). - config.json: engine config tuned for our stack. - SKILL.md: OpenClaw skill manifest. - tests/: 14 held-out tasks across 3 categories (research-cron, devops, wiki). OpenClaw is the 4th ecosystem in which SkillOpt-Sleep can be deployed, joining Claude Code, Codex, and Copilot. The shell follows the same single-engine / thin-shell pattern as the existing three plugins. End-to-end tested: pipeline runs against real OpenClaw session transcripts, gate correctly rejects non-improvements, staging artifacts land in ~/.skillopt-sleep/staging/<night>/. Cost: ~$0.02/night on DeepSeek V4 Pro.
123 lines
4.6 KiB
Python
Executable File
123 lines
4.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""run_sleep.py — OpenClaw entry point for SkillOpt-Sleep.
|
|
|
|
Runs one nightly sleep cycle:
|
|
1. harvest recent session transcripts
|
|
2. mine recurring task patterns
|
|
3. replay tasks with current skill (baseline) + candidate skill (with proposed edit)
|
|
4. gate candidate vs baseline on held-out accuracy
|
|
5. stage the proposal in ~/.skillopt-sleep/staging/<night>/
|
|
6. leave adoption to Ethan (auto_adopt=false)
|
|
|
|
Usage:
|
|
python3 run_sleep.py # one cycle, default config
|
|
python3 run_sleep.py --dry-run # compute report only, no staging
|
|
python3 run_sleep.py --tasks path.json # use a pre-built task file
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Ensure the skillopt_sleep package is importable (it lives in the cloned repo)
|
|
REPO = Path("/home/ethanclaw/.openclaw/workspace/SkillOpt")
|
|
sys.path.insert(0, str(REPO))
|
|
|
|
# Register our backend before importing cycle
|
|
from skillopt_sleep_openclaw import OpenClawDeepSeekBackend
|
|
import skillopt_sleep.backend as _b
|
|
_b._BACKENDS = getattr(_b, "_BACKENDS", {})
|
|
_b._BACKENDS["openclaw-deepseek"] = OpenClawDeepSeekBackend
|
|
|
|
# Patch get_backend to know about our backend
|
|
_orig_get_backend = _b.get_backend
|
|
|
|
def get_backend(name, model="", codex_path=""):
|
|
if name == "openclaw-deepseek":
|
|
return OpenClawDeepSeekBackend(model=model or "deepseek-v4-pro")
|
|
return _orig_get_backend(name, model=model, codex_path=codex_path)
|
|
|
|
_b.get_backend = get_backend
|
|
|
|
from skillopt_sleep.cycle import run_sleep_cycle
|
|
from skillopt_sleep.config import load_config
|
|
|
|
|
|
def main() -> int:
|
|
ap = argparse.ArgumentParser(description="OpenClaw SkillOpt-Sleep nightly cycle")
|
|
ap.add_argument("--dry-run", action="store_true", help="Compute but don't stage")
|
|
ap.add_argument("--config", default="/home/ethanclaw/.openclaw/workspace/skills/skillopt-sleep/config.json")
|
|
ap.add_argument("--tasks", default=None, help="Path to pre-built tasks JSON")
|
|
ap.add_argument("--verbose", action="store_true")
|
|
args = ap.parse_args()
|
|
|
|
# Load config from file then override with our defaults
|
|
overrides = {}
|
|
if os.path.exists(args.config):
|
|
with open(args.config) as f:
|
|
overrides.update(json.load(f))
|
|
overrides.pop("_comment", None)
|
|
|
|
cfg = load_config(**overrides)
|
|
|
|
seed_tasks = None
|
|
if args.tasks:
|
|
from skillopt_sleep.types import TaskRecord
|
|
with open(args.tasks) as f:
|
|
raw = json.load(f)
|
|
# Translate our test-set fields → TaskRecord fields
|
|
seed_tasks = []
|
|
for t in raw:
|
|
seed_tasks.append(TaskRecord(
|
|
id=t['id'],
|
|
project=t.get('project', 'openclaw'),
|
|
intent=t.get('intent') or t.get('prompt', ''),
|
|
context_excerpt=t.get('context_excerpt', ''),
|
|
attempted_solution=t.get('attempted_solution', ''),
|
|
outcome=t.get('outcome', 'unknown'),
|
|
reference_kind=t.get('reference_kind', 'rubric'),
|
|
reference=t.get('reference', ''),
|
|
judge=t.get('judge', {}),
|
|
tags=t.get('tags', []),
|
|
source_sessions=t.get('source_sessions', []),
|
|
split=t.get('split', 'train'),
|
|
))
|
|
|
|
print(f"[skillopt-sleep] starting cycle...")
|
|
print(f" backend: {cfg.get('backend')}")
|
|
print(f" project: {cfg.get('invoked_project')}")
|
|
print(f" max tasks: {cfg.get('max_tasks_per_night')}")
|
|
print(f" edit budget: {cfg.get('edit_budget')}")
|
|
print(f" dry_run: {args.dry_run}")
|
|
|
|
outcome = run_sleep_cycle(cfg, seed_tasks=seed_tasks, dry_run=args.dry_run)
|
|
|
|
r = outcome.report
|
|
print(f"\n=== Report — night {r.night} ===")
|
|
print(f" sessions harvested: {r.n_sessions}")
|
|
print(f" tasks mined: {r.n_tasks} (replayed: {r.n_replayed})")
|
|
print(f" baseline: {r.baseline_score:.3f} -> candidate: {r.candidate_score:.3f}")
|
|
print(f" gate: {r.gate_action} accepted={r.accepted}")
|
|
print(f" tokens: {r.tokens_used}")
|
|
if r.edits:
|
|
print(f" applied edits ({len(r.edits)}):")
|
|
for e in r.edits:
|
|
print(f" [{e.target}/{e.op}] {e.content[:80]}...")
|
|
if r.rejected_edits:
|
|
print(f" rejected edits ({len(r.rejected_edits)}) — kept as negative feedback")
|
|
if r.notes:
|
|
for n in r.notes:
|
|
print(f" note: {n}")
|
|
if outcome.staging_dir:
|
|
print(f"\n STAGED at: {outcome.staging_dir}")
|
|
print(f" Review with: ls {outcome.staging_dir}")
|
|
|
|
return 0 if r.accepted or r.candidate_score >= r.baseline_score else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|