From f9db99853b23865b3afba9a3e90b52c6c3782c91 Mon Sep 17 00:00:00 2001
From: Yifan Yang <yif_yang@qq.com>
Date: Mon, 8 Jun 2026 14:31:52 +0000
Subject: [PATCH] feat(plugins): ship SkillOpt-Sleep for Claude Code, Codex,
 and Copilot
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Restructure into plugins/{claude-code,codex,copilot}/ — one engine, three thin
shells, all calling the shared plugins/run-sleep.sh -> python -m skillopt_sleep.

  - claude-code/: existing plugin moved here; runner delegates to the shared
    launcher (fixes repo-root resolution after the move).
  - codex/: ~/.codex/prompts/sleep.md custom prompt + ~/.agents/skills SKILL.md +
    install.sh + AGENTS.md hint — Codex's documented, stable extension surfaces.
  - copilot/: a stdlib-only MCP server (mcp_server.py) exposing sleep_* tools,
    plus mcp-config.example.json and a copilot-instructions snippet. Verified end
    to end (initialize -> tools/list -> tools/call returns real engine output).
  - plugins/README.md overview table; main README News + a dedicated SkillOpt-Sleep
    section; pyproject lists skillopt_sleep as a first-class package.

Decoupling emphasized throughout: open-source tool (skillopt_sleep/) with zero
dependency on the research package. 29 tests pass; all three shells resolve.

Co-Authored-By: Claude Opus 4 <noreply@anthropic.com>
---
 README.md                                     |  46 +++++++
 plugins/README.md                             |  74 ++++++++++
 .../.claude-plugin/marketplace.json           |   0
 .../claude-code}/.claude-plugin/plugin.json   |   0
 .../claude-code}/README.md                    |   0
 .../claude-code}/commands/sleep.md            |   0
 .../claude-code}/hooks/hooks.json             |   0
 .../claude-code}/hooks/on-session-end.sh      |   0
 .../claude-code}/scripts/install-cron.sh      |   0
 plugins/claude-code/scripts/sleep.sh          |  11 ++
 .../skills/skillopt-sleep/SKILL.md            |   0
 plugins/codex/README.md                       |  59 ++++++++
 plugins/codex/install.sh                      |  36 +++++
 plugins/codex/prompts/sleep.md                |  21 +++
 plugins/codex/skills/skillopt-sleep/SKILL.md  |  49 +++++++
 plugins/copilot/README.md                     |  67 +++++++++
 .../copilot/copilot-instructions.snippet.md   |  25 ++++
 plugins/copilot/mcp-config.example.json       |  11 ++
 plugins/copilot/mcp_server.py                 | 128 ++++++++++++++++++
 plugins/run-sleep.sh                          |  46 +++++++
 pyproject.toml                                |   4 +-
 skillopt-sleep-plugin/scripts/sleep.sh        |  30 ----
 22 files changed, 576 insertions(+), 31 deletions(-)
 create mode 100644 plugins/README.md
 rename {skillopt-sleep-plugin => plugins/claude-code}/.claude-plugin/marketplace.json (100%)
 rename {skillopt-sleep-plugin => plugins/claude-code}/.claude-plugin/plugin.json (100%)
 rename {skillopt-sleep-plugin => plugins/claude-code}/README.md (100%)
 rename {skillopt-sleep-plugin => plugins/claude-code}/commands/sleep.md (100%)
 rename {skillopt-sleep-plugin => plugins/claude-code}/hooks/hooks.json (100%)
 rename {skillopt-sleep-plugin => plugins/claude-code}/hooks/on-session-end.sh (100%)
 rename {skillopt-sleep-plugin => plugins/claude-code}/scripts/install-cron.sh (100%)
 create mode 100755 plugins/claude-code/scripts/sleep.sh
 rename {skillopt-sleep-plugin => plugins/claude-code}/skills/skillopt-sleep/SKILL.md (100%)
 create mode 100644 plugins/codex/README.md
 create mode 100755 plugins/codex/install.sh
 create mode 100644 plugins/codex/prompts/sleep.md
 create mode 100644 plugins/codex/skills/skillopt-sleep/SKILL.md
 create mode 100644 plugins/copilot/README.md
 create mode 100644 plugins/copilot/copilot-instructions.snippet.md
 create mode 100644 plugins/copilot/mcp-config.example.json
 create mode 100755 plugins/copilot/mcp_server.py
 create mode 100755 plugins/run-sleep.sh
 delete mode 100755 skillopt-sleep-plugin/scripts/sleep.sh

diff --git a/README.md b/README.md
index 1bdbde5..f4dd537 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,7 @@
 ---
 
 ## News 🔥🔥🔥
+- **[2026-06-08]** 😴 **SkillOpt-Sleep is here — plugins for Claude Code, Codex, and Copilot.** Give your local coding agent a nightly *sleep cycle*: it reviews your past sessions offline, replays your recurring tasks, and consolidates validated long-term memory + skills behind a held-out gate —越用越好用. Validated on the public [gbrain-evals](https://github.com/garrytan/gbrain-evals) `skillopt-v1` benchmark with **real Claude and Codex** (deficient skills 0.00 → 1.00 on held-out, all 4 seeds). It's an **open-source tool decoupled from the paper code**. See [`plugins/`](plugins/) and the [SkillOpt-Sleep section](#-skillopt-sleep--越用越好用-the-deployment-time-companion) below.
 - **[2026-06-03]** 🎉 **[gbrain](https://github.com/garrytan/gbrain), [gbrain-evals](https://github.com/garrytan/gbrain-evals/blob/main/docs/benchmarks/2026-06-03-skillopt.md), and [darwin-skill](https://github.com/alchaincyf/darwin-skill) have all integrated SkillOpt.**
 - **[2026-06-02]** 🎉 **SkillOpt [v0.1.0](https://github.com/microsoft/SkillOpt/releases/tag/v0.1.0) is now available on [PyPI](https://pypi.org/project/skillopt/)!** Install with `pip install skillopt`. This initial release includes the full training loop (rollout → reflect → aggregate → select → update → evaluate), multi-backend support (OpenAI / Azure / Claude / Qwen / MiniMax), six built-in benchmarks, and WebUI dashboard.
 
@@ -50,6 +51,51 @@ https://github.com/user-attachments/assets/eb12d3bc-371c-467f-904d-91b61f339ed7
 
 ---
 
+## 😴 SkillOpt-Sleep — 越用越好用, the deployment-time companion
+
+SkillOpt (above) trains a skill offline on a benchmark. **SkillOpt-Sleep**
+applies the same discipline to *your own daily usage*: it gives a local coding
+agent a nightly **sleep cycle** that reviews your past sessions, replays your
+recurring tasks on your own API budget, and consolidates what it learns into
+**validated** long-term memory and skills — behind a held-out gate, staged for
+your review. The agent gets better the more you use it, with no weight training.
+
+It synthesizes **SkillOpt** (validation-gated bounded text edits), **Claude
+Dreams** (offline consolidation; review-then-adopt), and the **agent sleep**
+idea (short-term experience → long-term competence). One "night":
+
+```
+harvest session transcripts → mine recurring tasks → replay offline
+   → consolidate (reflect → bounded edit → GATE on real held-out tasks)
+   → stage proposal → (you) adopt
+```
+
+**Plugins for three agents** (one engine, three thin shells — see [`plugins/`](plugins/)):
+
+| Platform | Folder | Install |
+|---|---|---|
+| **Claude Code** | [`plugins/claude-code`](plugins/claude-code) | `/plugin marketplace add ./plugins/claude-code` → `/sleep` |
+| **Codex** | [`plugins/codex`](plugins/codex) | `bash plugins/codex/install.sh` → `/sleep` |
+| **Copilot** | [`plugins/copilot`](plugins/copilot) | register `plugins/copilot/mcp_server.py` as an MCP server |
+
+**Validated on real models.** On the public
+[gbrain-evals](https://github.com/garrytan/gbrain-evals) `skillopt-v1` benchmark,
+deficient skills go **0.00 → 1.00** on held-out sets with **both Claude and
+Codex** (all 4 seeds, including a real tool-use loop), cross-model transfer is
+positive, and the gate blocks regressions
+([full results](docs/sleep/FINAL_REPORT.md)).
+
+> **Open-source tool, decoupled from the research.** The engine lives in the
+> top-level [`skillopt_sleep/`](skillopt_sleep) package with **zero dependency**
+> on the paper's `skillopt/` experiment code (the validation gate is vendored).
+> Controls — optional gate, multi-rollout contrastive reflection, token/time
+> budget, multi-objective reward, user preferences, optimizer/target split — are
+> documented in [`docs/sleep/CONTROLLABLE_DREAMING.md`](docs/sleep/CONTROLLABLE_DREAMING.md).
+
+Deterministic proof (no API key): `python -m skillopt_sleep.experiments.run_experiment --persona researcher --assert-improves`.
+
+---
+
 ## Install
 
 ### Requirements
diff --git a/plugins/README.md b/plugins/README.md
new file mode 100644
index 0000000..0fe7b69
--- /dev/null
+++ b/plugins/README.md
@@ -0,0 +1,74 @@
+# SkillOpt-Sleep — plugins for Claude Code, Codex, and Copilot
+
+One engine, three thin shells. **SkillOpt-Sleep** gives a local coding agent a
+nightly **sleep cycle**: it reviews your past sessions offline, replays your
+recurring tasks on your own API budget, and consolidates what it learns into
+**validated** long-term memory and skills — behind a held-out gate, staged for
+your review. Your agent gets better the more you use it, with no model-weight
+training.
+
+It synthesizes three ideas: **SkillOpt** (validation-gated bounded text
+optimization — the research in this repo), **Claude Dreams** (offline memory
+consolidation; input never mutated; review-then-adopt), and the **agent sleep**
+literature (short-term experience → long-term competence).
+
+> **This is an open-source tool, decoupled from the research code.** The engine
+> lives in the top-level [`skillopt_sleep/`](../skillopt_sleep) package and has
+> **zero dependency** on the paper's `skillopt/` experiment package (the
+> validation gate is vendored). You can ship/use it without the research stack.
+
+## The three integrations
+
+| Platform | Folder | Mechanism | Status |
+|---|---|---|---|
+| **Claude Code** | [`claude-code/`](claude-code) | `.claude-plugin` + `/sleep` command + skill + hooks | full, installable |
+| **Codex** | [`codex/`](codex) | `~/.codex/prompts/sleep.md` + `~/.agents/skills` + `AGENTS.md` | full |
+| **Copilot** | [`copilot/`](copilot) | MCP server (`sleep_*` tools) + `copilot-instructions` | full (MCP) |
+
+All three call the **same** [`plugins/run-sleep.sh`](run-sleep.sh) → `python -m
+skillopt_sleep`, so behaviour is identical everywhere. Per-platform setup is in
+each folder's README.
+
+## Quick start (Claude Code)
+
+```bash
+git clone <repo-url> && cd SkillOpt-Sleep
+# Claude Code:
+/plugin marketplace add ./plugins/claude-code
+/plugin install skillopt-sleep@skillopt-sleep
+/sleep status
+```
+Codex: `bash plugins/codex/install.sh`.
+Copilot: register `plugins/copilot/mcp_server.py` as an MCP server.
+
+## What one "night" does
+
+```
+harvest ~/.claude (or session) transcripts → mine recurring tasks → replay offline
+   → consolidate (reflect → bounded edit → GATE on real held-out tasks)
+   → stage proposal → (you) adopt
+```
+
+Nothing live changes until you adopt; every adopt backs up first.
+
+## Controls (work on all platforms)
+
+`--gate on|off` · `--rollouts-k K` (multi-rollout contrastive reflection) ·
+`--budget-tokens/--budget-minutes` · `--preferences "..."` · separate
+optimizer/target models (`--optimizer-model` / `--target-model`) · slow-update
+long-term memory. Full guide:
+[`../docs/sleep/CONTROLLABLE_DREAMING.md`](../docs/sleep/CONTROLLABLE_DREAMING.md).
+
+## Does it actually work?
+
+Validated on the public
+[gbrain-evals](https://github.com/garrytan/gbrain-evals) `skillopt-v1` benchmark
+with **real models on both Claude and Codex**: deficient skills go **0.00 →
+1.00** on held-out sets (all 4 seeds incl. a real tool-use loop), cross-model
+transfer is positive, and the gate blocks regressions. Full results:
+[`../docs/sleep/FINAL_REPORT.md`](../docs/sleep/FINAL_REPORT.md).
+
+Deterministic proof (no API key):
+```bash
+python -m skillopt_sleep.experiments.run_experiment --persona researcher --assert-improves
+```
diff --git a/skillopt-sleep-plugin/.claude-plugin/marketplace.json b/plugins/claude-code/.claude-plugin/marketplace.json
similarity index 100%
rename from skillopt-sleep-plugin/.claude-plugin/marketplace.json
rename to plugins/claude-code/.claude-plugin/marketplace.json
diff --git a/skillopt-sleep-plugin/.claude-plugin/plugin.json b/plugins/claude-code/.claude-plugin/plugin.json
similarity index 100%
rename from skillopt-sleep-plugin/.claude-plugin/plugin.json
rename to plugins/claude-code/.claude-plugin/plugin.json
diff --git a/skillopt-sleep-plugin/README.md b/plugins/claude-code/README.md
similarity index 100%
rename from skillopt-sleep-plugin/README.md
rename to plugins/claude-code/README.md
diff --git a/skillopt-sleep-plugin/commands/sleep.md b/plugins/claude-code/commands/sleep.md
similarity index 100%
rename from skillopt-sleep-plugin/commands/sleep.md
rename to plugins/claude-code/commands/sleep.md
diff --git a/skillopt-sleep-plugin/hooks/hooks.json b/plugins/claude-code/hooks/hooks.json
similarity index 100%
rename from skillopt-sleep-plugin/hooks/hooks.json
rename to plugins/claude-code/hooks/hooks.json
diff --git a/skillopt-sleep-plugin/hooks/on-session-end.sh b/plugins/claude-code/hooks/on-session-end.sh
similarity index 100%
rename from skillopt-sleep-plugin/hooks/on-session-end.sh
rename to plugins/claude-code/hooks/on-session-end.sh
diff --git a/skillopt-sleep-plugin/scripts/install-cron.sh b/plugins/claude-code/scripts/install-cron.sh
similarity index 100%
rename from skillopt-sleep-plugin/scripts/install-cron.sh
rename to plugins/claude-code/scripts/install-cron.sh
diff --git a/plugins/claude-code/scripts/sleep.sh b/plugins/claude-code/scripts/sleep.sh
new file mode 100755
index 0000000..3d2210e
--- /dev/null
+++ b/plugins/claude-code/scripts/sleep.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+# Claude Code plugin runner — thin wrapper over the shared runner so all three
+# platform plugins share one engine launcher. The shared runner lives at
+# <repo>/plugins/run-sleep.sh and handles repo-root + interpreter resolution.
+set -euo pipefail
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"   # <repo>/plugins/claude-code/scripts
+SHARED="$(cd "$HERE/../.." && pwd)/run-sleep.sh"        # <repo>/plugins/run-sleep.sh
+if [ ! -f "$SHARED" ] && [ -n "${CLAUDE_PLUGIN_ROOT:-}" ]; then
+  SHARED="$(cd "$CLAUDE_PLUGIN_ROOT/.." && pwd)/run-sleep.sh"
+fi
+exec bash "$SHARED" "$@"
diff --git a/skillopt-sleep-plugin/skills/skillopt-sleep/SKILL.md b/plugins/claude-code/skills/skillopt-sleep/SKILL.md
similarity index 100%
rename from skillopt-sleep-plugin/skills/skillopt-sleep/SKILL.md
rename to plugins/claude-code/skills/skillopt-sleep/SKILL.md
diff --git a/plugins/codex/README.md b/plugins/codex/README.md
new file mode 100644
index 0000000..f5960da
--- /dev/null
+++ b/plugins/codex/README.md
@@ -0,0 +1,59 @@
+# SkillOpt-Sleep — Codex integration
+
+Give your **Codex** agent a nightly **sleep cycle**: it reviews past sessions
+offline, replays your recurring tasks on your own Codex budget, and consolidates
+what it learns into validated memory + skills behind a held-out gate. Same engine
+as the Claude Code plugin (`skillopt_sleep`), wrapped for Codex.
+
+> **Verified on Codex:** on the public
+> [gbrain-evals](https://github.com/garrytan/gbrain-evals) `skillopt-v1`
+> benchmark, a deliberately deficient skill goes **0.00 → 1.00** on a held-out
+> set with the Codex backend (incl. the tool-use seed via a real tool loop).
+> See [`../../docs/sleep/FINAL_REPORT.md`](../../docs/sleep/FINAL_REPORT.md).
+
+## What Codex supports (and what we use)
+
+Codex (`@openai/codex`) extends via **`AGENTS.md`** instructions, **skills** at
+`~/.agents/skills/<name>/SKILL.md`, and **custom prompts** at
+`~/.codex/prompts/<name>.md` (invoked as `/<name>`). This integration ships all
+three, plus a shared runner.
+
+## Install
+
+```bash
+git clone <repo-url> SkillOpt-Sleep
+cd SkillOpt-Sleep
+bash plugins/codex/install.sh          # installs the /sleep prompt + skill
+export SKILLOPT_SLEEP_REPO="$(pwd)"    # so the runner is found from anywhere
+```
+
+Requires Python ≥ 3.10 and the `codex` CLI on PATH.
+
+## Use
+
+```text
+/sleep status      # what's happened
+/sleep dry-run     # safe preview, stages nothing
+/sleep run         # full cycle, stages a reviewed proposal (no live edits)
+/sleep adopt       # apply the staged proposal (with backup)
+```
+
+Or call the engine directly:
+
+```bash
+python -m skillopt_sleep run --project "$(pwd)" --backend codex
+```
+
+Default backend is `mock` (no API spend). `--backend codex` uses your Codex
+budget for real improvement. All the controllable knobs (`--gate on|off`,
+`--rollouts-k`, `--budget-tokens`, `--preferences`, optimizer/target split) work
+identically — see [`../../docs/sleep/CONTROLLABLE_DREAMING.md`](../../docs/sleep/CONTROLLABLE_DREAMING.md).
+
+## Notes / status
+
+- Codex's `exec` runs shell, so the real-tool-loop replay (e.g. the
+  `tool_called: search` benchmark seed) works natively.
+- Codex's standalone *plugin-package manifest* format is not yet a stable public
+  spec; this integration uses the documented `AGENTS.md` + skills + prompts
+  mechanisms, which are stable. If/when a `codex plugin` package format ships,
+  we'll add a one-file manifest.
diff --git a/plugins/codex/install.sh b/plugins/codex/install.sh
new file mode 100755
index 0000000..b7c0e14
--- /dev/null
+++ b/plugins/codex/install.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+# Install the SkillOpt-Sleep Codex integration into the user's ~/.codex and
+# ~/.agents directories. Idempotent; prints what it does.
+set -euo pipefail
+
+REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+CODEX_HOME="${CODEX_HOME:-$HOME/.codex}"
+AGENTS_SKILLS="${HOME}/.agents/skills"
+
+echo "[install] repo: $REPO_ROOT"
+
+# 1) custom /sleep prompt
+mkdir -p "$CODEX_HOME/prompts"
+cp "$REPO_ROOT/plugins/codex/prompts/sleep.md" "$CODEX_HOME/prompts/sleep.md"
+echo "[install] /sleep prompt   -> $CODEX_HOME/prompts/sleep.md"
+
+# 2) user-level skill
+mkdir -p "$AGENTS_SKILLS/skillopt-sleep"
+cp "$REPO_ROOT/plugins/codex/skills/skillopt-sleep/SKILL.md" "$AGENTS_SKILLS/skillopt-sleep/SKILL.md"
+echo "[install] skill           -> $AGENTS_SKILLS/skillopt-sleep/SKILL.md"
+
+# 3) record the repo location so the runner is found from anywhere
+echo "[install] add to your shell profile:"
+echo "    export SKILLOPT_SLEEP_REPO=\"$REPO_ROOT\""
+
+# 4) optional: append an AGENTS.md hint (only if the user opts in)
+cat <<EOF
+
+[install] Optional — add this to ~/.codex/AGENTS.md so Codex always knows the tool:
+
+  ## SkillOpt-Sleep
+  An offline self-improvement cycle is available. To run it:
+  \`bash "$REPO_ROOT/plugins/run-sleep.sh" status\`. Use \`/sleep\` for the guided flow.
+
+Done. Try:  /sleep status
+EOF
diff --git a/plugins/codex/prompts/sleep.md b/plugins/codex/prompts/sleep.md
new file mode 100644
index 0000000..ee4319f
--- /dev/null
+++ b/plugins/codex/prompts/sleep.md
@@ -0,0 +1,21 @@
+# /sleep — SkillOpt-Sleep for Codex
+#
+# Custom prompt: copy this file to ~/.codex/prompts/sleep.md and invoke with
+# `/sleep` in the Codex CLI. ($ARGUMENTS is the text after /sleep.)
+
+Run the SkillOpt-Sleep offline self-evolution cycle. Action: $ARGUMENTS
+(empty → "status").
+
+Use the bundled runner via shell:
+
+    bash "${SKILLOPT_SLEEP_REPO:?set SKILLOPT_SLEEP_REPO to the repo root}/plugins/run-sleep.sh" $ARGUMENTS --project "$(pwd)"
+
+Then:
+- For `run`/`dry-run`: read the staged `report.md` and show the held-out
+  baseline → candidate score and the proposed edits. `run` only stages a
+  proposal; nothing live changes until `adopt`.
+- For `adopt`: confirm which files were updated and that a backup was written.
+- Never edit the user's AGENTS.md / skills yourself; only `adopt` does that.
+
+Default backend is `mock` (no API spend). Add `--backend codex` for real
+improvement on the user's Codex budget.
diff --git a/plugins/codex/skills/skillopt-sleep/SKILL.md b/plugins/codex/skills/skillopt-sleep/SKILL.md
new file mode 100644
index 0000000..c6f2439
--- /dev/null
+++ b/plugins/codex/skills/skillopt-sleep/SKILL.md
@@ -0,0 +1,49 @@
+---
+name: skillopt-sleep
+description: Nightly offline self-evolution for a Codex agent. Reviews past sessions, replays recurring tasks, and consolidates validated memory + skills behind a held-out gate. Use when the user wants Codex to learn from past usage, run a "sleep"/"dream" cycle, or schedule offline self-optimization.
+---
+
+# SkillOpt-Sleep (Codex skill)
+
+This skill drives the `skillopt_sleep` engine — an offline "sleep cycle" that
+makes a Codex agent better at the user's recurring work without retraining.
+
+## When to use
+
+Trigger when the user wants to: review past sessions, learn their preferences,
+consolidate feedback into long-term memory/skills, run a nightly/offline
+self-improvement cycle, or adopt a staged proposal.
+
+## How to run it
+
+Invoke the bundled runner via shell (Codex `exec` has shell access). The runner
+finds the engine and a Python ≥ 3.10 automatically:
+
+```bash
+# point at the repo if it isn't auto-detected from CWD:
+export SKILLOPT_SLEEP_REPO=/path/to/SkillOpt-Sleep
+bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" <action> --project "$(pwd)"
+```
+
+`<action>` ∈ `status | dry-run | run | adopt | harvest`. Use `--backend codex`
+for real improvement on the user's own Codex budget (default `mock` = no spend).
+
+## Steps
+
+1. Run the requested action; capture stdout.
+2. For `run`/`dry-run`: read the staged `report.md` it prints and show the user
+   the held-out baseline → candidate score and the exact proposed edits.
+3. `run` only **stages** a proposal under `<project>/.skillopt-sleep/staging/`;
+   nothing live changes until `adopt`. Offer `/sleep adopt`.
+4. Never hand-edit the user's `AGENTS.md` / skills yourself — only `adopt` does,
+   and it backs up first.
+
+## Validate
+
+```bash
+python -m skillopt_sleep.experiments.run_gbrain --backend codex \
+  --seeds brief-writer --data-root /path/to/gbrain-evals/eval/data/skillopt-v1 \
+  --nights 2 --limit-replay 3 --limit-holdout 3
+```
+A deficient skill goes 0.00 → 1.00 on a held-out set; the optimizer's edits are
+gated on real-task performance.
diff --git a/plugins/copilot/README.md b/plugins/copilot/README.md
new file mode 100644
index 0000000..c5a32c7
--- /dev/null
+++ b/plugins/copilot/README.md
@@ -0,0 +1,67 @@
+# SkillOpt-Sleep — GitHub Copilot integration
+
+Give **Copilot** (CLI or VS Code) a nightly **sleep cycle** via a tiny **MCP
+server** that exposes the `skillopt_sleep` engine as tools. MCP is GitHub's
+supported way to extend Copilot, so this works across Copilot CLI, VS Code, and
+other MCP clients with the same server.
+
+## What's here
+
+| File | Purpose |
+|---|---|
+| `mcp_server.py` | stdlib-only MCP (stdio) server exposing `sleep_*` tools |
+| `mcp-config.example.json` | drop-in MCP server config |
+| `copilot-instructions.snippet.md` | paste into `.github/copilot-instructions.md` |
+
+## Install
+
+Requires Python ≥ 3.10. No third-party packages — the server is pure stdlib.
+
+1. **Register the MCP server.** Add the server to your Copilot MCP config
+   (Copilot CLI: `~/.copilot/mcp-config.json`; VS Code: your MCP settings).
+   Use `mcp-config.example.json` as a template — set `SKILLOPT_SLEEP_REPO` to
+   this repo's path:
+
+   ```json
+   {
+     "mcpServers": {
+       "skillopt-sleep": {
+         "command": "python3",
+         "args": ["/abs/path/SkillOpt-Sleep/plugins/copilot/mcp_server.py"],
+         "env": { "SKILLOPT_SLEEP_REPO": "/abs/path/SkillOpt-Sleep" }
+       }
+     }
+   }
+   ```
+
+2. **(Optional) Tell Copilot about it.** Append
+   `copilot-instructions.snippet.md` to your repo's
+   `.github/copilot-instructions.md` so Copilot reaches for the tools when the
+   user asks to "run the sleep cycle".
+
+## Use
+
+Ask Copilot things like *"run the sleep cycle"*, *"what did the last sleep
+propose?"*, *"adopt the staged sleep proposal"*. Copilot calls the MCP tools:
+`sleep_status`, `sleep_dry_run`, `sleep_run`, `sleep_adopt`, `sleep_harvest`.
+
+Each tool takes optional `project`, `backend` (`mock`/`claude`/`codex`), and
+`scope` arguments. Default backend is `mock` (no API spend).
+
+## Verify the server directly (no Copilot needed)
+
+```bash
+printf '%s\n' \
+  '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{}}' \
+  '{"jsonrpc":"2.0","id":2,"method":"tools/list"}' \
+  | SKILLOPT_SLEEP_REPO="$(pwd)" python3 plugins/copilot/mcp_server.py
+```
+You should see the server info and the five `sleep_*` tools.
+
+## Notes / status
+
+- MCP is the stable, official Copilot extension surface, so this is the most
+  portable of the three integrations (one server → CLI + IDE).
+- The engine and all its controls (gate on/off, multi-rollout, budget,
+  preferences, optimizer/target split) are identical across platforms — see
+  [`../../docs/sleep/CONTROLLABLE_DREAMING.md`](../../docs/sleep/CONTROLLABLE_DREAMING.md).
diff --git a/plugins/copilot/copilot-instructions.snippet.md b/plugins/copilot/copilot-instructions.snippet.md
new file mode 100644
index 0000000..be19047
--- /dev/null
+++ b/plugins/copilot/copilot-instructions.snippet.md
@@ -0,0 +1,25 @@
+<!--
+Copy this block into your repo's .github/copilot-instructions.md so Copilot
+knows the SkillOpt-Sleep tools exist. (Copilot reads copilot-instructions.md
+automatically as ambient guidance.)
+-->
+
+## SkillOpt-Sleep (offline self-evolution)
+
+This project has SkillOpt-Sleep available via an MCP server (`skillopt-sleep`).
+It gives the agent a nightly "sleep cycle": it reviews past sessions, replays
+recurring tasks offline, and consolidates validated memory + skills behind a
+held-out gate.
+
+When the user asks to "run the sleep cycle", "review my past sessions", "learn
+my preferences", or "make the agent improve from past usage", use the MCP tools:
+
+- `sleep_status` — what's happened + the latest staged proposal
+- `sleep_dry_run` — safe preview, stages nothing
+- `sleep_run` — full cycle, stages a reviewed proposal (nothing live changes)
+- `sleep_adopt` — apply the staged proposal (backs up first)
+- `sleep_harvest` — list mined recurring tasks
+
+Always show the user the held-out baseline → candidate score and the proposed
+edits before suggesting `sleep_adopt`. Never hand-edit the user's memory/skill
+files; only `sleep_adopt` does that, with a backup.
diff --git a/plugins/copilot/mcp-config.example.json b/plugins/copilot/mcp-config.example.json
new file mode 100644
index 0000000..80b31fa
--- /dev/null
+++ b/plugins/copilot/mcp-config.example.json
@@ -0,0 +1,11 @@
+{
+  "mcpServers": {
+    "skillopt-sleep": {
+      "command": "python3",
+      "args": ["plugins/copilot/mcp_server.py"],
+      "env": {
+        "SKILLOPT_SLEEP_REPO": "${workspaceFolder}"
+      }
+    }
+  }
+}
diff --git a/plugins/copilot/mcp_server.py b/plugins/copilot/mcp_server.py
new file mode 100755
index 0000000..d03a95b
--- /dev/null
+++ b/plugins/copilot/mcp_server.py
@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+"""SkillOpt-Sleep — minimal MCP server (stdio, stdlib-only).
+
+Exposes the sleep engine as MCP tools so any MCP-capable client (GitHub Copilot
+CLI / VS Code, Claude Desktop, etc.) can drive it. No third-party deps: speaks
+JSON-RPC 2.0 over stdio with just the handful of MCP methods clients need.
+
+Tools exposed:
+  - sleep_status   : how many nights have run + the latest staged proposal
+  - sleep_dry_run  : harvest+mine+replay, report only (no staging)
+  - sleep_run      : full cycle, stages a proposal (nothing live changes)
+  - sleep_adopt    : apply the latest staged proposal (with backup)
+  - sleep_harvest  : debug — list mined recurring tasks
+
+Each tool shells out to `python -m skillopt_sleep <action> ...` and returns its
+stdout. Configure your client to launch:  python plugins/copilot/mcp_server.py
+"""
+from __future__ import annotations
+
+import json
+import os
+import subprocess
+import sys
+
+REPO_ROOT = os.environ.get("SKILLOPT_SLEEP_REPO") or os.path.abspath(
+    os.path.join(os.path.dirname(__file__), "..", "..")
+)
+PROTOCOL_VERSION = "2024-11-05"
+
+TOOLS = [
+    {"name": "sleep_status", "action": "status",
+     "description": "Show how many SkillOpt-Sleep nights have run and the latest staged proposal."},
+    {"name": "sleep_dry_run", "action": "dry-run",
+     "description": "Preview a sleep cycle (harvest+mine+replay) without staging anything."},
+    {"name": "sleep_run", "action": "run",
+     "description": "Run a full sleep cycle; stages a reviewed proposal. Nothing live changes until adopt."},
+    {"name": "sleep_adopt", "action": "adopt",
+     "description": "Apply the latest staged proposal to CLAUDE.md/SKILL.md (backs up first)."},
+    {"name": "sleep_harvest", "action": "harvest",
+     "description": "Debug: list the recurring tasks mined from recent sessions."},
+]
+_BY_NAME = {t["name"]: t for t in TOOLS}
+
+_TOOL_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "project": {"type": "string", "description": "Project dir to evolve (default: cwd)."},
+        "backend": {"type": "string", "enum": ["mock", "claude", "codex"],
+                     "description": "mock = no API spend (default); claude/codex = real."},
+        "scope": {"type": "string", "enum": ["invoked", "all"]},
+    },
+    "additionalProperties": False,
+}
+
+
+def _run_engine(action: str, args: dict) -> str:
+    py = sys.executable or "python3"
+    cmd = [py, "-m", "skillopt_sleep", action]
+    if args.get("project"):
+        cmd += ["--project", str(args["project"])]
+    if args.get("backend"):
+        cmd += ["--backend", str(args["backend"])]
+    if args.get("scope"):
+        cmd += ["--scope", str(args["scope"])]
+    try:
+        proc = subprocess.run(cmd, cwd=REPO_ROOT, capture_output=True, text=True, timeout=3600)
+    except Exception as e:  # noqa: BLE001
+        return f"[error] failed to run engine: {e}"
+    out = (proc.stdout or "").strip()
+    err = (proc.stderr or "").strip()
+    return out + (("\n[stderr]\n" + err) if err else "")
+
+
+def _result(id_, result):
+    return {"jsonrpc": "2.0", "id": id_, "result": result}
+
+
+def _error(id_, code, message):
+    return {"jsonrpc": "2.0", "id": id_, "error": {"code": code, "message": message}}
+
+
+def handle(req: dict):
+    method = req.get("method")
+    id_ = req.get("id")
+    if method == "initialize":
+        return _result(id_, {
+            "protocolVersion": PROTOCOL_VERSION,
+            "capabilities": {"tools": {}},
+            "serverInfo": {"name": "skillopt-sleep", "version": "0.1.0"},
+        })
+    if method in ("notifications/initialized", "initialized"):
+        return None  # notification, no response
+    if method == "tools/list":
+        return _result(id_, {"tools": [
+            {"name": t["name"], "description": t["description"], "inputSchema": _TOOL_SCHEMA}
+            for t in TOOLS
+        ]})
+    if method == "tools/call":
+        params = req.get("params") or {}
+        name = params.get("name")
+        tool = _BY_NAME.get(name)
+        if not tool:
+            return _error(id_, -32602, f"unknown tool: {name}")
+        text = _run_engine(tool["action"], params.get("arguments") or {})
+        return _result(id_, {"content": [{"type": "text", "text": text}]})
+    if method == "ping":
+        return _result(id_, {})
+    return _error(id_, -32601, f"method not found: {method}")
+
+
+def main() -> int:
+    for line in sys.stdin:
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            req = json.loads(line)
+        except Exception:
+            continue
+        resp = handle(req)
+        if resp is not None:
+            sys.stdout.write(json.dumps(resp) + "\n")
+            sys.stdout.flush()
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/plugins/run-sleep.sh b/plugins/run-sleep.sh
new file mode 100755
index 0000000..e46e212
--- /dev/null
+++ b/plugins/run-sleep.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+# SkillOpt-Sleep shared runner — used by all platform plugins (Claude Code,
+# Codex, Copilot). Resolves the repo root (which contains the skillopt_sleep
+# package), picks a Python >= 3.10, and execs the engine CLI.
+#
+# Usage: run-sleep.sh <run|dry-run|status|adopt|harvest|...> [args...]
+set -euo pipefail
+
+# This script lives at <repo>/plugins/run-sleep.sh, so the repo root (which
+# holds skillopt_sleep/) is one level up. CLAUDE_PLUGIN_ROOT (if set by Claude
+# Code) points at the plugin dir; the engine is then two levels above it.
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+if [ -d "$SCRIPT_DIR/../skillopt_sleep" ]; then
+  REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+elif [ -n "${CLAUDE_PLUGIN_ROOT:-}" ] && [ -d "$CLAUDE_PLUGIN_ROOT/../../skillopt_sleep" ]; then
+  REPO_ROOT="$(cd "$CLAUDE_PLUGIN_ROOT/../.." && pwd)"
+elif [ -n "${SKILLOPT_SLEEP_REPO:-}" ] && [ -d "$SKILLOPT_SLEEP_REPO/skillopt_sleep" ]; then
+  REPO_ROOT="$SKILLOPT_SLEEP_REPO"
+else
+  # last resort: search upward from CWD
+  d="$PWD"
+  while [ "$d" != "/" ]; do
+    [ -d "$d/skillopt_sleep" ] && { REPO_ROOT="$d"; break; }
+    d="$(dirname "$d")"
+  done
+fi
+if [ -z "${REPO_ROOT:-}" ]; then
+  echo "[sleep] ERROR: could not locate the skillopt_sleep package. Set SKILLOPT_SLEEP_REPO to the repo root." >&2
+  exit 1
+fi
+
+PY=""
+for cand in python3.12 python3.11 python3.10 python3; do
+  if command -v "$cand" >/dev/null 2>&1; then
+    ver="$("$cand" -c 'import sys; print("%d%d" % sys.version_info[:2])' 2>/dev/null || echo 0)"
+    if [ "${ver:-0}" -ge 310 ]; then PY="$cand"; break; fi
+  fi
+done
+if [ -z "$PY" ]; then
+  echo "[sleep] ERROR: need Python >= 3.10 (found none)." >&2
+  exit 1
+fi
+
+if [ "$#" -eq 0 ]; then set -- status; fi
+cd "$REPO_ROOT"
+exec "$PY" -m skillopt_sleep "$@"
diff --git a/pyproject.toml b/pyproject.toml
index a45fe5b..e6a9021 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -64,7 +64,9 @@ Repository = "https://github.com/microsoft/SkillOpt"
 Issues = "https://github.com/microsoft/SkillOpt/issues"
 
 [tool.setuptools.packages.find]
-include = ["skillopt*", "scripts*"]
+# skillopt* = the research package; skillopt_sleep = the open-source Sleep tool
+# (decoupled, zero dependency on the research code).
+include = ["skillopt", "skillopt.*", "skillopt_sleep", "skillopt_sleep.*", "scripts*"]
 
 [tool.ruff]
 line-length = 120
diff --git a/skillopt-sleep-plugin/scripts/sleep.sh b/skillopt-sleep-plugin/scripts/sleep.sh
deleted file mode 100755
index 052b0c5..0000000
--- a/skillopt-sleep-plugin/scripts/sleep.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/env bash
-# SkillOpt-Sleep runner — invokes the skillopt_sleep engine with a suitable
-# Python interpreter, from the repo that contains this plugin.
-#
-# Usage: sleep.sh <run|dry-run|status|adopt|harvest> [extra args...]
-set -euo pipefail
-
-# Resolve the repo root: the plugin lives at <repo>/skillopt-sleep-plugin,
-# so the engine package is at <repo>/skillopt_sleep. CLAUDE_PLUGIN_ROOT points
-# at the plugin dir when run by Claude Code; fall back to this script's dir.
-PLUGIN_ROOT="${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}"
-REPO_ROOT="$(cd "$PLUGIN_ROOT/.." && pwd)"
-
-# Pick an interpreter that satisfies SkillOpt's 3.10+ requirement.
-PY=""
-for cand in python3.12 python3.11 python3.10 python3; do
-  if command -v "$cand" >/dev/null 2>&1; then
-    ver="$("$cand" -c 'import sys; print("%d%d" % sys.version_info[:2])' 2>/dev/null || echo 0)"
-    if [ "${ver:-0}" -ge 310 ]; then PY="$cand"; break; fi
-  fi
-done
-if [ -z "$PY" ]; then
-  echo "[sleep] ERROR: need Python >= 3.10 (found none). Install one and retry." >&2
-  exit 1
-fi
-
-if [ "$#" -eq 0 ]; then set -- status; fi
-
-cd "$REPO_ROOT"
-exec "$PY" -m skillopt_sleep "$@"