From f9db99853b23865b3afba9a3e90b52c6c3782c91 Mon Sep 17 00:00:00 2001 From: Yifan Yang Date: Mon, 8 Jun 2026 14:31:52 +0000 Subject: [PATCH] feat(plugins): ship SkillOpt-Sleep for Claude Code, Codex, and Copilot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restructure into plugins/{claude-code,codex,copilot}/ β€” one engine, three thin shells, all calling the shared plugins/run-sleep.sh -> python -m skillopt_sleep. - claude-code/: existing plugin moved here; runner delegates to the shared launcher (fixes repo-root resolution after the move). - codex/: ~/.codex/prompts/sleep.md custom prompt + ~/.agents/skills SKILL.md + install.sh + AGENTS.md hint β€” Codex's documented, stable extension surfaces. - copilot/: a stdlib-only MCP server (mcp_server.py) exposing sleep_* tools, plus mcp-config.example.json and a copilot-instructions snippet. Verified end to end (initialize -> tools/list -> tools/call returns real engine output). - plugins/README.md overview table; main README News + a dedicated SkillOpt-Sleep section; pyproject lists skillopt_sleep as a first-class package. Decoupling emphasized throughout: open-source tool (skillopt_sleep/) with zero dependency on the research package. 29 tests pass; all three shells resolve. Co-Authored-By: Claude Opus 4 --- README.md | 46 +++++++ plugins/README.md | 74 ++++++++++ .../.claude-plugin/marketplace.json | 0 .../claude-code}/.claude-plugin/plugin.json | 0 .../claude-code}/README.md | 0 .../claude-code}/commands/sleep.md | 0 .../claude-code}/hooks/hooks.json | 0 .../claude-code}/hooks/on-session-end.sh | 0 .../claude-code}/scripts/install-cron.sh | 0 plugins/claude-code/scripts/sleep.sh | 11 ++ .../skills/skillopt-sleep/SKILL.md | 0 plugins/codex/README.md | 59 ++++++++ plugins/codex/install.sh | 36 +++++ plugins/codex/prompts/sleep.md | 21 +++ plugins/codex/skills/skillopt-sleep/SKILL.md | 49 +++++++ plugins/copilot/README.md | 67 +++++++++ .../copilot/copilot-instructions.snippet.md | 25 ++++ plugins/copilot/mcp-config.example.json | 11 ++ plugins/copilot/mcp_server.py | 128 ++++++++++++++++++ plugins/run-sleep.sh | 46 +++++++ pyproject.toml | 4 +- skillopt-sleep-plugin/scripts/sleep.sh | 30 ---- 22 files changed, 576 insertions(+), 31 deletions(-) create mode 100644 plugins/README.md rename {skillopt-sleep-plugin => plugins/claude-code}/.claude-plugin/marketplace.json (100%) rename {skillopt-sleep-plugin => plugins/claude-code}/.claude-plugin/plugin.json (100%) rename {skillopt-sleep-plugin => plugins/claude-code}/README.md (100%) rename {skillopt-sleep-plugin => plugins/claude-code}/commands/sleep.md (100%) rename {skillopt-sleep-plugin => plugins/claude-code}/hooks/hooks.json (100%) rename {skillopt-sleep-plugin => plugins/claude-code}/hooks/on-session-end.sh (100%) rename {skillopt-sleep-plugin => plugins/claude-code}/scripts/install-cron.sh (100%) create mode 100755 plugins/claude-code/scripts/sleep.sh rename {skillopt-sleep-plugin => plugins/claude-code}/skills/skillopt-sleep/SKILL.md (100%) create mode 100644 plugins/codex/README.md create mode 100755 plugins/codex/install.sh create mode 100644 plugins/codex/prompts/sleep.md create mode 100644 plugins/codex/skills/skillopt-sleep/SKILL.md create mode 100644 plugins/copilot/README.md create mode 100644 plugins/copilot/copilot-instructions.snippet.md create mode 100644 plugins/copilot/mcp-config.example.json create mode 100755 plugins/copilot/mcp_server.py create mode 100755 plugins/run-sleep.sh delete mode 100755 skillopt-sleep-plugin/scripts/sleep.sh diff --git a/README.md b/README.md index 1bdbde5..f4dd537 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ --- ## News πŸ”₯πŸ”₯πŸ”₯ +- **[2026-06-08]** 😴 **SkillOpt-Sleep is here β€” plugins for Claude Code, Codex, and Copilot.** Give your local coding agent a nightly *sleep cycle*: it reviews your past sessions offline, replays your recurring tasks, and consolidates validated long-term memory + skills behind a held-out gate β€”θΆŠη”¨θΆŠε₯½η”¨. Validated on the public [gbrain-evals](https://github.com/garrytan/gbrain-evals) `skillopt-v1` benchmark with **real Claude and Codex** (deficient skills 0.00 β†’ 1.00 on held-out, all 4 seeds). It's an **open-source tool decoupled from the paper code**. See [`plugins/`](plugins/) and the [SkillOpt-Sleep section](#-skillopt-sleep--θΆŠη”¨θΆŠε₯½η”¨-the-deployment-time-companion) below. - **[2026-06-03]** πŸŽ‰ **[gbrain](https://github.com/garrytan/gbrain), [gbrain-evals](https://github.com/garrytan/gbrain-evals/blob/main/docs/benchmarks/2026-06-03-skillopt.md), and [darwin-skill](https://github.com/alchaincyf/darwin-skill) have all integrated SkillOpt.** - **[2026-06-02]** πŸŽ‰ **SkillOpt [v0.1.0](https://github.com/microsoft/SkillOpt/releases/tag/v0.1.0) is now available on [PyPI](https://pypi.org/project/skillopt/)!** Install with `pip install skillopt`. This initial release includes the full training loop (rollout β†’ reflect β†’ aggregate β†’ select β†’ update β†’ evaluate), multi-backend support (OpenAI / Azure / Claude / Qwen / MiniMax), six built-in benchmarks, and WebUI dashboard. @@ -50,6 +51,51 @@ https://github.com/user-attachments/assets/eb12d3bc-371c-467f-904d-91b61f339ed7 --- +## 😴 SkillOpt-Sleep β€” θΆŠη”¨θΆŠε₯½η”¨, the deployment-time companion + +SkillOpt (above) trains a skill offline on a benchmark. **SkillOpt-Sleep** +applies the same discipline to *your own daily usage*: it gives a local coding +agent a nightly **sleep cycle** that reviews your past sessions, replays your +recurring tasks on your own API budget, and consolidates what it learns into +**validated** long-term memory and skills β€” behind a held-out gate, staged for +your review. The agent gets better the more you use it, with no weight training. + +It synthesizes **SkillOpt** (validation-gated bounded text edits), **Claude +Dreams** (offline consolidation; review-then-adopt), and the **agent sleep** +idea (short-term experience β†’ long-term competence). One "night": + +``` +harvest session transcripts β†’ mine recurring tasks β†’ replay offline + β†’ consolidate (reflect β†’ bounded edit β†’ GATE on real held-out tasks) + β†’ stage proposal β†’ (you) adopt +``` + +**Plugins for three agents** (one engine, three thin shells β€” see [`plugins/`](plugins/)): + +| Platform | Folder | Install | +|---|---|---| +| **Claude Code** | [`plugins/claude-code`](plugins/claude-code) | `/plugin marketplace add ./plugins/claude-code` β†’ `/sleep` | +| **Codex** | [`plugins/codex`](plugins/codex) | `bash plugins/codex/install.sh` β†’ `/sleep` | +| **Copilot** | [`plugins/copilot`](plugins/copilot) | register `plugins/copilot/mcp_server.py` as an MCP server | + +**Validated on real models.** On the public +[gbrain-evals](https://github.com/garrytan/gbrain-evals) `skillopt-v1` benchmark, +deficient skills go **0.00 β†’ 1.00** on held-out sets with **both Claude and +Codex** (all 4 seeds, including a real tool-use loop), cross-model transfer is +positive, and the gate blocks regressions +([full results](docs/sleep/FINAL_REPORT.md)). + +> **Open-source tool, decoupled from the research.** The engine lives in the +> top-level [`skillopt_sleep/`](skillopt_sleep) package with **zero dependency** +> on the paper's `skillopt/` experiment code (the validation gate is vendored). +> Controls β€” optional gate, multi-rollout contrastive reflection, token/time +> budget, multi-objective reward, user preferences, optimizer/target split β€” are +> documented in [`docs/sleep/CONTROLLABLE_DREAMING.md`](docs/sleep/CONTROLLABLE_DREAMING.md). + +Deterministic proof (no API key): `python -m skillopt_sleep.experiments.run_experiment --persona researcher --assert-improves`. + +--- + ## Install ### Requirements diff --git a/plugins/README.md b/plugins/README.md new file mode 100644 index 0000000..0fe7b69 --- /dev/null +++ b/plugins/README.md @@ -0,0 +1,74 @@ +# SkillOpt-Sleep β€” plugins for Claude Code, Codex, and Copilot + +One engine, three thin shells. **SkillOpt-Sleep** gives a local coding agent a +nightly **sleep cycle**: it reviews your past sessions offline, replays your +recurring tasks on your own API budget, and consolidates what it learns into +**validated** long-term memory and skills β€” behind a held-out gate, staged for +your review. Your agent gets better the more you use it, with no model-weight +training. + +It synthesizes three ideas: **SkillOpt** (validation-gated bounded text +optimization β€” the research in this repo), **Claude Dreams** (offline memory +consolidation; input never mutated; review-then-adopt), and the **agent sleep** +literature (short-term experience β†’ long-term competence). + +> **This is an open-source tool, decoupled from the research code.** The engine +> lives in the top-level [`skillopt_sleep/`](../skillopt_sleep) package and has +> **zero dependency** on the paper's `skillopt/` experiment package (the +> validation gate is vendored). You can ship/use it without the research stack. + +## The three integrations + +| Platform | Folder | Mechanism | Status | +|---|---|---|---| +| **Claude Code** | [`claude-code/`](claude-code) | `.claude-plugin` + `/sleep` command + skill + hooks | full, installable | +| **Codex** | [`codex/`](codex) | `~/.codex/prompts/sleep.md` + `~/.agents/skills` + `AGENTS.md` | full | +| **Copilot** | [`copilot/`](copilot) | MCP server (`sleep_*` tools) + `copilot-instructions` | full (MCP) | + +All three call the **same** [`plugins/run-sleep.sh`](run-sleep.sh) β†’ `python -m +skillopt_sleep`, so behaviour is identical everywhere. Per-platform setup is in +each folder's README. + +## Quick start (Claude Code) + +```bash +git clone && cd SkillOpt-Sleep +# Claude Code: +/plugin marketplace add ./plugins/claude-code +/plugin install skillopt-sleep@skillopt-sleep +/sleep status +``` +Codex: `bash plugins/codex/install.sh`. +Copilot: register `plugins/copilot/mcp_server.py` as an MCP server. + +## What one "night" does + +``` +harvest ~/.claude (or session) transcripts β†’ mine recurring tasks β†’ replay offline + β†’ consolidate (reflect β†’ bounded edit β†’ GATE on real held-out tasks) + β†’ stage proposal β†’ (you) adopt +``` + +Nothing live changes until you adopt; every adopt backs up first. + +## Controls (work on all platforms) + +`--gate on|off` Β· `--rollouts-k K` (multi-rollout contrastive reflection) Β· +`--budget-tokens/--budget-minutes` Β· `--preferences "..."` Β· separate +optimizer/target models (`--optimizer-model` / `--target-model`) Β· slow-update +long-term memory. Full guide: +[`../docs/sleep/CONTROLLABLE_DREAMING.md`](../docs/sleep/CONTROLLABLE_DREAMING.md). + +## Does it actually work? + +Validated on the public +[gbrain-evals](https://github.com/garrytan/gbrain-evals) `skillopt-v1` benchmark +with **real models on both Claude and Codex**: deficient skills go **0.00 β†’ +1.00** on held-out sets (all 4 seeds incl. a real tool-use loop), cross-model +transfer is positive, and the gate blocks regressions. Full results: +[`../docs/sleep/FINAL_REPORT.md`](../docs/sleep/FINAL_REPORT.md). + +Deterministic proof (no API key): +```bash +python -m skillopt_sleep.experiments.run_experiment --persona researcher --assert-improves +``` diff --git a/skillopt-sleep-plugin/.claude-plugin/marketplace.json b/plugins/claude-code/.claude-plugin/marketplace.json similarity index 100% rename from skillopt-sleep-plugin/.claude-plugin/marketplace.json rename to plugins/claude-code/.claude-plugin/marketplace.json diff --git a/skillopt-sleep-plugin/.claude-plugin/plugin.json b/plugins/claude-code/.claude-plugin/plugin.json similarity index 100% rename from skillopt-sleep-plugin/.claude-plugin/plugin.json rename to plugins/claude-code/.claude-plugin/plugin.json diff --git a/skillopt-sleep-plugin/README.md b/plugins/claude-code/README.md similarity index 100% rename from skillopt-sleep-plugin/README.md rename to plugins/claude-code/README.md diff --git a/skillopt-sleep-plugin/commands/sleep.md b/plugins/claude-code/commands/sleep.md similarity index 100% rename from skillopt-sleep-plugin/commands/sleep.md rename to plugins/claude-code/commands/sleep.md diff --git a/skillopt-sleep-plugin/hooks/hooks.json b/plugins/claude-code/hooks/hooks.json similarity index 100% rename from skillopt-sleep-plugin/hooks/hooks.json rename to plugins/claude-code/hooks/hooks.json diff --git a/skillopt-sleep-plugin/hooks/on-session-end.sh b/plugins/claude-code/hooks/on-session-end.sh similarity index 100% rename from skillopt-sleep-plugin/hooks/on-session-end.sh rename to plugins/claude-code/hooks/on-session-end.sh diff --git a/skillopt-sleep-plugin/scripts/install-cron.sh b/plugins/claude-code/scripts/install-cron.sh similarity index 100% rename from skillopt-sleep-plugin/scripts/install-cron.sh rename to plugins/claude-code/scripts/install-cron.sh diff --git a/plugins/claude-code/scripts/sleep.sh b/plugins/claude-code/scripts/sleep.sh new file mode 100755 index 0000000..3d2210e --- /dev/null +++ b/plugins/claude-code/scripts/sleep.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Claude Code plugin runner β€” thin wrapper over the shared runner so all three +# platform plugins share one engine launcher. The shared runner lives at +# /plugins/run-sleep.sh and handles repo-root + interpreter resolution. +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # /plugins/claude-code/scripts +SHARED="$(cd "$HERE/../.." && pwd)/run-sleep.sh" # /plugins/run-sleep.sh +if [ ! -f "$SHARED" ] && [ -n "${CLAUDE_PLUGIN_ROOT:-}" ]; then + SHARED="$(cd "$CLAUDE_PLUGIN_ROOT/.." && pwd)/run-sleep.sh" +fi +exec bash "$SHARED" "$@" diff --git a/skillopt-sleep-plugin/skills/skillopt-sleep/SKILL.md b/plugins/claude-code/skills/skillopt-sleep/SKILL.md similarity index 100% rename from skillopt-sleep-plugin/skills/skillopt-sleep/SKILL.md rename to plugins/claude-code/skills/skillopt-sleep/SKILL.md diff --git a/plugins/codex/README.md b/plugins/codex/README.md new file mode 100644 index 0000000..f5960da --- /dev/null +++ b/plugins/codex/README.md @@ -0,0 +1,59 @@ +# SkillOpt-Sleep β€” Codex integration + +Give your **Codex** agent a nightly **sleep cycle**: it reviews past sessions +offline, replays your recurring tasks on your own Codex budget, and consolidates +what it learns into validated memory + skills behind a held-out gate. Same engine +as the Claude Code plugin (`skillopt_sleep`), wrapped for Codex. + +> **Verified on Codex:** on the public +> [gbrain-evals](https://github.com/garrytan/gbrain-evals) `skillopt-v1` +> benchmark, a deliberately deficient skill goes **0.00 β†’ 1.00** on a held-out +> set with the Codex backend (incl. the tool-use seed via a real tool loop). +> See [`../../docs/sleep/FINAL_REPORT.md`](../../docs/sleep/FINAL_REPORT.md). + +## What Codex supports (and what we use) + +Codex (`@openai/codex`) extends via **`AGENTS.md`** instructions, **skills** at +`~/.agents/skills//SKILL.md`, and **custom prompts** at +`~/.codex/prompts/.md` (invoked as `/`). This integration ships all +three, plus a shared runner. + +## Install + +```bash +git clone SkillOpt-Sleep +cd SkillOpt-Sleep +bash plugins/codex/install.sh # installs the /sleep prompt + skill +export SKILLOPT_SLEEP_REPO="$(pwd)" # so the runner is found from anywhere +``` + +Requires Python β‰₯ 3.10 and the `codex` CLI on PATH. + +## Use + +```text +/sleep status # what's happened +/sleep dry-run # safe preview, stages nothing +/sleep run # full cycle, stages a reviewed proposal (no live edits) +/sleep adopt # apply the staged proposal (with backup) +``` + +Or call the engine directly: + +```bash +python -m skillopt_sleep run --project "$(pwd)" --backend codex +``` + +Default backend is `mock` (no API spend). `--backend codex` uses your Codex +budget for real improvement. All the controllable knobs (`--gate on|off`, +`--rollouts-k`, `--budget-tokens`, `--preferences`, optimizer/target split) work +identically β€” see [`../../docs/sleep/CONTROLLABLE_DREAMING.md`](../../docs/sleep/CONTROLLABLE_DREAMING.md). + +## Notes / status + +- Codex's `exec` runs shell, so the real-tool-loop replay (e.g. the + `tool_called: search` benchmark seed) works natively. +- Codex's standalone *plugin-package manifest* format is not yet a stable public + spec; this integration uses the documented `AGENTS.md` + skills + prompts + mechanisms, which are stable. If/when a `codex plugin` package format ships, + we'll add a one-file manifest. diff --git a/plugins/codex/install.sh b/plugins/codex/install.sh new file mode 100755 index 0000000..b7c0e14 --- /dev/null +++ b/plugins/codex/install.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# Install the SkillOpt-Sleep Codex integration into the user's ~/.codex and +# ~/.agents directories. Idempotent; prints what it does. +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +CODEX_HOME="${CODEX_HOME:-$HOME/.codex}" +AGENTS_SKILLS="${HOME}/.agents/skills" + +echo "[install] repo: $REPO_ROOT" + +# 1) custom /sleep prompt +mkdir -p "$CODEX_HOME/prompts" +cp "$REPO_ROOT/plugins/codex/prompts/sleep.md" "$CODEX_HOME/prompts/sleep.md" +echo "[install] /sleep prompt -> $CODEX_HOME/prompts/sleep.md" + +# 2) user-level skill +mkdir -p "$AGENTS_SKILLS/skillopt-sleep" +cp "$REPO_ROOT/plugins/codex/skills/skillopt-sleep/SKILL.md" "$AGENTS_SKILLS/skillopt-sleep/SKILL.md" +echo "[install] skill -> $AGENTS_SKILLS/skillopt-sleep/SKILL.md" + +# 3) record the repo location so the runner is found from anywhere +echo "[install] add to your shell profile:" +echo " export SKILLOPT_SLEEP_REPO=\"$REPO_ROOT\"" + +# 4) optional: append an AGENTS.md hint (only if the user opts in) +cat < --project "$(pwd)" +``` + +`` ∈ `status | dry-run | run | adopt | harvest`. Use `--backend codex` +for real improvement on the user's own Codex budget (default `mock` = no spend). + +## Steps + +1. Run the requested action; capture stdout. +2. For `run`/`dry-run`: read the staged `report.md` it prints and show the user + the held-out baseline β†’ candidate score and the exact proposed edits. +3. `run` only **stages** a proposal under `/.skillopt-sleep/staging/`; + nothing live changes until `adopt`. Offer `/sleep adopt`. +4. Never hand-edit the user's `AGENTS.md` / skills yourself β€” only `adopt` does, + and it backs up first. + +## Validate + +```bash +python -m skillopt_sleep.experiments.run_gbrain --backend codex \ + --seeds brief-writer --data-root /path/to/gbrain-evals/eval/data/skillopt-v1 \ + --nights 2 --limit-replay 3 --limit-holdout 3 +``` +A deficient skill goes 0.00 β†’ 1.00 on a held-out set; the optimizer's edits are +gated on real-task performance. diff --git a/plugins/copilot/README.md b/plugins/copilot/README.md new file mode 100644 index 0000000..c5a32c7 --- /dev/null +++ b/plugins/copilot/README.md @@ -0,0 +1,67 @@ +# SkillOpt-Sleep β€” GitHub Copilot integration + +Give **Copilot** (CLI or VS Code) a nightly **sleep cycle** via a tiny **MCP +server** that exposes the `skillopt_sleep` engine as tools. MCP is GitHub's +supported way to extend Copilot, so this works across Copilot CLI, VS Code, and +other MCP clients with the same server. + +## What's here + +| File | Purpose | +|---|---| +| `mcp_server.py` | stdlib-only MCP (stdio) server exposing `sleep_*` tools | +| `mcp-config.example.json` | drop-in MCP server config | +| `copilot-instructions.snippet.md` | paste into `.github/copilot-instructions.md` | + +## Install + +Requires Python β‰₯ 3.10. No third-party packages β€” the server is pure stdlib. + +1. **Register the MCP server.** Add the server to your Copilot MCP config + (Copilot CLI: `~/.copilot/mcp-config.json`; VS Code: your MCP settings). + Use `mcp-config.example.json` as a template β€” set `SKILLOPT_SLEEP_REPO` to + this repo's path: + + ```json + { + "mcpServers": { + "skillopt-sleep": { + "command": "python3", + "args": ["/abs/path/SkillOpt-Sleep/plugins/copilot/mcp_server.py"], + "env": { "SKILLOPT_SLEEP_REPO": "/abs/path/SkillOpt-Sleep" } + } + } + } + ``` + +2. **(Optional) Tell Copilot about it.** Append + `copilot-instructions.snippet.md` to your repo's + `.github/copilot-instructions.md` so Copilot reaches for the tools when the + user asks to "run the sleep cycle". + +## Use + +Ask Copilot things like *"run the sleep cycle"*, *"what did the last sleep +propose?"*, *"adopt the staged sleep proposal"*. Copilot calls the MCP tools: +`sleep_status`, `sleep_dry_run`, `sleep_run`, `sleep_adopt`, `sleep_harvest`. + +Each tool takes optional `project`, `backend` (`mock`/`claude`/`codex`), and +`scope` arguments. Default backend is `mock` (no API spend). + +## Verify the server directly (no Copilot needed) + +```bash +printf '%s\n' \ + '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{}}' \ + '{"jsonrpc":"2.0","id":2,"method":"tools/list"}' \ + | SKILLOPT_SLEEP_REPO="$(pwd)" python3 plugins/copilot/mcp_server.py +``` +You should see the server info and the five `sleep_*` tools. + +## Notes / status + +- MCP is the stable, official Copilot extension surface, so this is the most + portable of the three integrations (one server β†’ CLI + IDE). +- The engine and all its controls (gate on/off, multi-rollout, budget, + preferences, optimizer/target split) are identical across platforms β€” see + [`../../docs/sleep/CONTROLLABLE_DREAMING.md`](../../docs/sleep/CONTROLLABLE_DREAMING.md). diff --git a/plugins/copilot/copilot-instructions.snippet.md b/plugins/copilot/copilot-instructions.snippet.md new file mode 100644 index 0000000..be19047 --- /dev/null +++ b/plugins/copilot/copilot-instructions.snippet.md @@ -0,0 +1,25 @@ + + +## SkillOpt-Sleep (offline self-evolution) + +This project has SkillOpt-Sleep available via an MCP server (`skillopt-sleep`). +It gives the agent a nightly "sleep cycle": it reviews past sessions, replays +recurring tasks offline, and consolidates validated memory + skills behind a +held-out gate. + +When the user asks to "run the sleep cycle", "review my past sessions", "learn +my preferences", or "make the agent improve from past usage", use the MCP tools: + +- `sleep_status` β€” what's happened + the latest staged proposal +- `sleep_dry_run` β€” safe preview, stages nothing +- `sleep_run` β€” full cycle, stages a reviewed proposal (nothing live changes) +- `sleep_adopt` β€” apply the staged proposal (backs up first) +- `sleep_harvest` β€” list mined recurring tasks + +Always show the user the held-out baseline β†’ candidate score and the proposed +edits before suggesting `sleep_adopt`. Never hand-edit the user's memory/skill +files; only `sleep_adopt` does that, with a backup. diff --git a/plugins/copilot/mcp-config.example.json b/plugins/copilot/mcp-config.example.json new file mode 100644 index 0000000..80b31fa --- /dev/null +++ b/plugins/copilot/mcp-config.example.json @@ -0,0 +1,11 @@ +{ + "mcpServers": { + "skillopt-sleep": { + "command": "python3", + "args": ["plugins/copilot/mcp_server.py"], + "env": { + "SKILLOPT_SLEEP_REPO": "${workspaceFolder}" + } + } + } +} diff --git a/plugins/copilot/mcp_server.py b/plugins/copilot/mcp_server.py new file mode 100755 index 0000000..d03a95b --- /dev/null +++ b/plugins/copilot/mcp_server.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +"""SkillOpt-Sleep β€” minimal MCP server (stdio, stdlib-only). + +Exposes the sleep engine as MCP tools so any MCP-capable client (GitHub Copilot +CLI / VS Code, Claude Desktop, etc.) can drive it. No third-party deps: speaks +JSON-RPC 2.0 over stdio with just the handful of MCP methods clients need. + +Tools exposed: + - sleep_status : how many nights have run + the latest staged proposal + - sleep_dry_run : harvest+mine+replay, report only (no staging) + - sleep_run : full cycle, stages a proposal (nothing live changes) + - sleep_adopt : apply the latest staged proposal (with backup) + - sleep_harvest : debug β€” list mined recurring tasks + +Each tool shells out to `python -m skillopt_sleep ...` and returns its +stdout. Configure your client to launch: python plugins/copilot/mcp_server.py +""" +from __future__ import annotations + +import json +import os +import subprocess +import sys + +REPO_ROOT = os.environ.get("SKILLOPT_SLEEP_REPO") or os.path.abspath( + os.path.join(os.path.dirname(__file__), "..", "..") +) +PROTOCOL_VERSION = "2024-11-05" + +TOOLS = [ + {"name": "sleep_status", "action": "status", + "description": "Show how many SkillOpt-Sleep nights have run and the latest staged proposal."}, + {"name": "sleep_dry_run", "action": "dry-run", + "description": "Preview a sleep cycle (harvest+mine+replay) without staging anything."}, + {"name": "sleep_run", "action": "run", + "description": "Run a full sleep cycle; stages a reviewed proposal. Nothing live changes until adopt."}, + {"name": "sleep_adopt", "action": "adopt", + "description": "Apply the latest staged proposal to CLAUDE.md/SKILL.md (backs up first)."}, + {"name": "sleep_harvest", "action": "harvest", + "description": "Debug: list the recurring tasks mined from recent sessions."}, +] +_BY_NAME = {t["name"]: t for t in TOOLS} + +_TOOL_SCHEMA = { + "type": "object", + "properties": { + "project": {"type": "string", "description": "Project dir to evolve (default: cwd)."}, + "backend": {"type": "string", "enum": ["mock", "claude", "codex"], + "description": "mock = no API spend (default); claude/codex = real."}, + "scope": {"type": "string", "enum": ["invoked", "all"]}, + }, + "additionalProperties": False, +} + + +def _run_engine(action: str, args: dict) -> str: + py = sys.executable or "python3" + cmd = [py, "-m", "skillopt_sleep", action] + if args.get("project"): + cmd += ["--project", str(args["project"])] + if args.get("backend"): + cmd += ["--backend", str(args["backend"])] + if args.get("scope"): + cmd += ["--scope", str(args["scope"])] + try: + proc = subprocess.run(cmd, cwd=REPO_ROOT, capture_output=True, text=True, timeout=3600) + except Exception as e: # noqa: BLE001 + return f"[error] failed to run engine: {e}" + out = (proc.stdout or "").strip() + err = (proc.stderr or "").strip() + return out + (("\n[stderr]\n" + err) if err else "") + + +def _result(id_, result): + return {"jsonrpc": "2.0", "id": id_, "result": result} + + +def _error(id_, code, message): + return {"jsonrpc": "2.0", "id": id_, "error": {"code": code, "message": message}} + + +def handle(req: dict): + method = req.get("method") + id_ = req.get("id") + if method == "initialize": + return _result(id_, { + "protocolVersion": PROTOCOL_VERSION, + "capabilities": {"tools": {}}, + "serverInfo": {"name": "skillopt-sleep", "version": "0.1.0"}, + }) + if method in ("notifications/initialized", "initialized"): + return None # notification, no response + if method == "tools/list": + return _result(id_, {"tools": [ + {"name": t["name"], "description": t["description"], "inputSchema": _TOOL_SCHEMA} + for t in TOOLS + ]}) + if method == "tools/call": + params = req.get("params") or {} + name = params.get("name") + tool = _BY_NAME.get(name) + if not tool: + return _error(id_, -32602, f"unknown tool: {name}") + text = _run_engine(tool["action"], params.get("arguments") or {}) + return _result(id_, {"content": [{"type": "text", "text": text}]}) + if method == "ping": + return _result(id_, {}) + return _error(id_, -32601, f"method not found: {method}") + + +def main() -> int: + for line in sys.stdin: + line = line.strip() + if not line: + continue + try: + req = json.loads(line) + except Exception: + continue + resp = handle(req) + if resp is not None: + sys.stdout.write(json.dumps(resp) + "\n") + sys.stdout.flush() + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/plugins/run-sleep.sh b/plugins/run-sleep.sh new file mode 100755 index 0000000..e46e212 --- /dev/null +++ b/plugins/run-sleep.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# SkillOpt-Sleep shared runner β€” used by all platform plugins (Claude Code, +# Codex, Copilot). Resolves the repo root (which contains the skillopt_sleep +# package), picks a Python >= 3.10, and execs the engine CLI. +# +# Usage: run-sleep.sh [args...] +set -euo pipefail + +# This script lives at /plugins/run-sleep.sh, so the repo root (which +# holds skillopt_sleep/) is one level up. CLAUDE_PLUGIN_ROOT (if set by Claude +# Code) points at the plugin dir; the engine is then two levels above it. +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +if [ -d "$SCRIPT_DIR/../skillopt_sleep" ]; then + REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +elif [ -n "${CLAUDE_PLUGIN_ROOT:-}" ] && [ -d "$CLAUDE_PLUGIN_ROOT/../../skillopt_sleep" ]; then + REPO_ROOT="$(cd "$CLAUDE_PLUGIN_ROOT/../.." && pwd)" +elif [ -n "${SKILLOPT_SLEEP_REPO:-}" ] && [ -d "$SKILLOPT_SLEEP_REPO/skillopt_sleep" ]; then + REPO_ROOT="$SKILLOPT_SLEEP_REPO" +else + # last resort: search upward from CWD + d="$PWD" + while [ "$d" != "/" ]; do + [ -d "$d/skillopt_sleep" ] && { REPO_ROOT="$d"; break; } + d="$(dirname "$d")" + done +fi +if [ -z "${REPO_ROOT:-}" ]; then + echo "[sleep] ERROR: could not locate the skillopt_sleep package. Set SKILLOPT_SLEEP_REPO to the repo root." >&2 + exit 1 +fi + +PY="" +for cand in python3.12 python3.11 python3.10 python3; do + if command -v "$cand" >/dev/null 2>&1; then + ver="$("$cand" -c 'import sys; print("%d%d" % sys.version_info[:2])' 2>/dev/null || echo 0)" + if [ "${ver:-0}" -ge 310 ]; then PY="$cand"; break; fi + fi +done +if [ -z "$PY" ]; then + echo "[sleep] ERROR: need Python >= 3.10 (found none)." >&2 + exit 1 +fi + +if [ "$#" -eq 0 ]; then set -- status; fi +cd "$REPO_ROOT" +exec "$PY" -m skillopt_sleep "$@" diff --git a/pyproject.toml b/pyproject.toml index a45fe5b..e6a9021 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,7 +64,9 @@ Repository = "https://github.com/microsoft/SkillOpt" Issues = "https://github.com/microsoft/SkillOpt/issues" [tool.setuptools.packages.find] -include = ["skillopt*", "scripts*"] +# skillopt* = the research package; skillopt_sleep = the open-source Sleep tool +# (decoupled, zero dependency on the research code). +include = ["skillopt", "skillopt.*", "skillopt_sleep", "skillopt_sleep.*", "scripts*"] [tool.ruff] line-length = 120 diff --git a/skillopt-sleep-plugin/scripts/sleep.sh b/skillopt-sleep-plugin/scripts/sleep.sh deleted file mode 100755 index 052b0c5..0000000 --- a/skillopt-sleep-plugin/scripts/sleep.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash -# SkillOpt-Sleep runner β€” invokes the skillopt_sleep engine with a suitable -# Python interpreter, from the repo that contains this plugin. -# -# Usage: sleep.sh [extra args...] -set -euo pipefail - -# Resolve the repo root: the plugin lives at /skillopt-sleep-plugin, -# so the engine package is at /skillopt_sleep. CLAUDE_PLUGIN_ROOT points -# at the plugin dir when run by Claude Code; fall back to this script's dir. -PLUGIN_ROOT="${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}" -REPO_ROOT="$(cd "$PLUGIN_ROOT/.." && pwd)" - -# Pick an interpreter that satisfies SkillOpt's 3.10+ requirement. -PY="" -for cand in python3.12 python3.11 python3.10 python3; do - if command -v "$cand" >/dev/null 2>&1; then - ver="$("$cand" -c 'import sys; print("%d%d" % sys.version_info[:2])' 2>/dev/null || echo 0)" - if [ "${ver:-0}" -ge 310 ]; then PY="$cand"; break; fi - fi -done -if [ -z "$PY" ]; then - echo "[sleep] ERROR: need Python >= 3.10 (found none). Install one and retry." >&2 - exit 1 -fi - -if [ "$#" -eq 0 ]; then set -- status; fi - -cd "$REPO_ROOT" -exec "$PY" -m skillopt_sleep "$@"