Files
microsoft-SkillOpt/plugins/openclaw/run_sleep.py
elzlxx 553446575a feat(plugins): add OpenClaw shell for SkillOpt-Sleep
Adds a thin OpenClaw shell wrapping the SkillOpt-Sleep engine. Enables
nightly validation-gated skill improvement cycles for OpenClaw agents.

Components:
- skillopt_sleep_openclaw.py: DeepSeek V4 Pro + Ollama nomic-embed-text
  backend, mirroring the Claude/Codex/Copilot backend pattern.
- run_sleep.py: CLI entry point supporting dry-run and pre-built task files.
- run_sleep_cron.sh: bash wrapper for nightly cron invocation.
- slash_sleep.py: /sleep command (status / run / adopt / reject / cost).
- config.json: engine config tuned for our stack.
- SKILL.md: OpenClaw skill manifest.
- tests/: 14 held-out tasks across 3 categories (research-cron, devops, wiki).

OpenClaw is the 4th ecosystem in which SkillOpt-Sleep can be deployed,
joining Claude Code, Codex, and Copilot. The shell follows the same
single-engine / thin-shell pattern as the existing three plugins.

End-to-end tested: pipeline runs against real OpenClaw session transcripts,
gate correctly rejects non-improvements, staging artifacts land in
~/.skillopt-sleep/staging/<night>/. Cost: ~$0.02/night on DeepSeek V4 Pro.
2026-06-14 23:27:54 +08:00

123 lines
4.6 KiB
Python
Executable File

#!/usr/bin/env python3
"""run_sleep.py — OpenClaw entry point for SkillOpt-Sleep.
Runs one nightly sleep cycle:
1. harvest recent session transcripts
2. mine recurring task patterns
3. replay tasks with current skill (baseline) + candidate skill (with proposed edit)
4. gate candidate vs baseline on held-out accuracy
5. stage the proposal in ~/.skillopt-sleep/staging/<night>/
6. leave adoption to Ethan (auto_adopt=false)
Usage:
python3 run_sleep.py # one cycle, default config
python3 run_sleep.py --dry-run # compute report only, no staging
python3 run_sleep.py --tasks path.json # use a pre-built task file
"""
from __future__ import annotations
import argparse
import json
import os
import sys
from pathlib import Path
# Ensure the skillopt_sleep package is importable (it lives in the cloned repo)
REPO = Path("/home/ethanclaw/.openclaw/workspace/SkillOpt")
sys.path.insert(0, str(REPO))
# Register our backend before importing cycle
from skillopt_sleep_openclaw import OpenClawDeepSeekBackend
import skillopt_sleep.backend as _b
_b._BACKENDS = getattr(_b, "_BACKENDS", {})
_b._BACKENDS["openclaw-deepseek"] = OpenClawDeepSeekBackend
# Patch get_backend to know about our backend
_orig_get_backend = _b.get_backend
def get_backend(name, model="", codex_path=""):
if name == "openclaw-deepseek":
return OpenClawDeepSeekBackend(model=model or "deepseek-v4-pro")
return _orig_get_backend(name, model=model, codex_path=codex_path)
_b.get_backend = get_backend
from skillopt_sleep.cycle import run_sleep_cycle
from skillopt_sleep.config import load_config
def main() -> int:
ap = argparse.ArgumentParser(description="OpenClaw SkillOpt-Sleep nightly cycle")
ap.add_argument("--dry-run", action="store_true", help="Compute but don't stage")
ap.add_argument("--config", default="/home/ethanclaw/.openclaw/workspace/skills/skillopt-sleep/config.json")
ap.add_argument("--tasks", default=None, help="Path to pre-built tasks JSON")
ap.add_argument("--verbose", action="store_true")
args = ap.parse_args()
# Load config from file then override with our defaults
overrides = {}
if os.path.exists(args.config):
with open(args.config) as f:
overrides.update(json.load(f))
overrides.pop("_comment", None)
cfg = load_config(**overrides)
seed_tasks = None
if args.tasks:
from skillopt_sleep.types import TaskRecord
with open(args.tasks) as f:
raw = json.load(f)
# Translate our test-set fields → TaskRecord fields
seed_tasks = []
for t in raw:
seed_tasks.append(TaskRecord(
id=t['id'],
project=t.get('project', 'openclaw'),
intent=t.get('intent') or t.get('prompt', ''),
context_excerpt=t.get('context_excerpt', ''),
attempted_solution=t.get('attempted_solution', ''),
outcome=t.get('outcome', 'unknown'),
reference_kind=t.get('reference_kind', 'rubric'),
reference=t.get('reference', ''),
judge=t.get('judge', {}),
tags=t.get('tags', []),
source_sessions=t.get('source_sessions', []),
split=t.get('split', 'train'),
))
print(f"[skillopt-sleep] starting cycle...")
print(f" backend: {cfg.get('backend')}")
print(f" project: {cfg.get('invoked_project')}")
print(f" max tasks: {cfg.get('max_tasks_per_night')}")
print(f" edit budget: {cfg.get('edit_budget')}")
print(f" dry_run: {args.dry_run}")
outcome = run_sleep_cycle(cfg, seed_tasks=seed_tasks, dry_run=args.dry_run)
r = outcome.report
print(f"\n=== Report — night {r.night} ===")
print(f" sessions harvested: {r.n_sessions}")
print(f" tasks mined: {r.n_tasks} (replayed: {r.n_replayed})")
print(f" baseline: {r.baseline_score:.3f} -> candidate: {r.candidate_score:.3f}")
print(f" gate: {r.gate_action} accepted={r.accepted}")
print(f" tokens: {r.tokens_used}")
if r.edits:
print(f" applied edits ({len(r.edits)}):")
for e in r.edits:
print(f" [{e.target}/{e.op}] {e.content[:80]}...")
if r.rejected_edits:
print(f" rejected edits ({len(r.rejected_edits)}) — kept as negative feedback")
if r.notes:
for n in r.notes:
print(f" note: {n}")
if outcome.staging_dir:
print(f"\n STAGED at: {outcome.staging_dir}")
print(f" Review with: ls {outcome.staging_dir}")
return 0 if r.accepted or r.candidate_score >= r.baseline_score else 1
if __name__ == "__main__":
sys.exit(main())