feat: sync all 4 runtime plugins with full engine surface + fix #52 #58 #62

Bug fixes: - #52: bundle run-sleep.sh in Claude Code plugin + 4-level fallback - #58: add skillopt-sleep console script entry point in pyproject.toml - #62: filter headless claude -p replay sessions from harvest Plugin sync (Claude Code / Codex / Copilot / OpenClaw): - Document all 22 CLI flags, 7 actions, 4 backends across all SKILL.md files - Document config keys (preferences, gate_mode, dream_rollouts, etc.) - Document memory consolidation (evolve_memory / evolve_skill) - Add schedule/unschedule to all plugins - Copilot MCP: expand schema from 3 → 16 params + schedule tools - OpenClaw: add schedule/unschedule subcommands via shared scheduler Tests: - Cross-plugin parity test (prevents future feature drift) - MCP schema completeness test Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-07-03 14:02:58 +08:00 · 2026-06-20 11:31:09 +00:00
parent 0b5b9a4296
commit 0be780052a
12 changed files with 479 additions and 16 deletions
--- a/plugins/claude-code/scripts/run-sleep.sh
+++ b/plugins/claude-code/scripts/run-sleep.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+# SkillOpt-Sleep shared runner — used by all platform plugins (Claude Code,
+# Codex, Copilot). Resolves the repo root (which contains the skillopt_sleep
+# package), picks a Python >= 3.10, and execs the engine CLI.
+#
+# Usage: run-sleep.sh <run|dry-run|status|adopt|harvest|...> [args...]
+set -euo pipefail
+
+# This script lives at <repo>/plugins/run-sleep.sh, so the repo root (which
+# holds skillopt_sleep/) is one level up. CLAUDE_PLUGIN_ROOT (if set by Claude
+# Code) points at the plugin dir; the engine is then two levels above it.
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+if [ -d "$SCRIPT_DIR/../skillopt_sleep" ]; then
+  REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+elif [ -n "${CLAUDE_PLUGIN_ROOT:-}" ] && [ -d "$CLAUDE_PLUGIN_ROOT/../../skillopt_sleep" ]; then
+  REPO_ROOT="$(cd "$CLAUDE_PLUGIN_ROOT/../.." && pwd)"
+elif [ -n "${SKILLOPT_SLEEP_REPO:-}" ] && [ -d "$SKILLOPT_SLEEP_REPO/skillopt_sleep" ]; then
+  REPO_ROOT="$SKILLOPT_SLEEP_REPO"
+else
+  # last resort: search upward from CWD
+  d="$PWD"
+  while [ "$d" != "/" ]; do
+    [ -d "$d/skillopt_sleep" ] && { REPO_ROOT="$d"; break; }
+    d="$(dirname "$d")"
+  done
+fi
+if [ -z "${REPO_ROOT:-}" ]; then
+  echo "[sleep] ERROR: could not locate the skillopt_sleep package. Set SKILLOPT_SLEEP_REPO to the repo root." >&2
+  exit 1
+fi
+
+PY=""
+for cand in python3.12 python3.11 python3.10 python3; do
+  if command -v "$cand" >/dev/null 2>&1; then
+    ver="$("$cand" -c 'import sys; print("%d%d" % sys.version_info[:2])' 2>/dev/null || echo 0)"
+    if [ "${ver:-0}" -ge 310 ]; then PY="$cand"; break; fi
+  fi
+done
+if [ -z "$PY" ]; then
+  echo "[sleep] ERROR: need Python >= 3.10 (found none)." >&2
+  exit 1
+fi
+
+if [ "$#" -eq 0 ]; then set -- status; fi
+cd "$REPO_ROOT"
+exec "$PY" -m skillopt_sleep "$@"
--- a/plugins/claude-code/scripts/sleep.sh
+++ b/plugins/claude-code/scripts/sleep.sh
@@ -1,11 +1,30 @@
 #!/usr/bin/env bash
-# Claude Code plugin runner — thin wrapper over the shared runner so all three
-# platform plugins share one engine launcher. The shared runner lives at
-# <repo>/plugins/run-sleep.sh and handles repo-root + interpreter resolution.
+# Claude Code plugin runner — thin wrapper over the shared runner so all
+# platform plugins share one engine launcher.
+#
+# After marketplace install the plugin is isolated in a cache directory and
+# the repo-relative path no longer works.  We try four locations:
+#   1. Co-located run-sleep.sh (bundled copy — works in marketplace cache)
+#   2. Repo-relative ../../run-sleep.sh (dev checkout)
+#   3. CLAUDE_PLUGIN_ROOT/../run-sleep.sh (plugin env variable)
+#   4. SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh (explicit env)
 set -euo pipefail
-HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"   # <repo>/plugins/claude-code/scripts
-SHARED="$(cd "$HERE/../.." && pwd)/run-sleep.sh"        # <repo>/plugins/run-sleep.sh
-if [ ! -f "$SHARED" ] && [ -n "${CLAUDE_PLUGIN_ROOT:-}" ]; then
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+SHARED=""
+if [ -f "$HERE/run-sleep.sh" ]; then
+  SHARED="$HERE/run-sleep.sh"
+elif [ -f "$(cd "$HERE/../.." 2>/dev/null && pwd)/run-sleep.sh" ]; then
+  SHARED="$(cd "$HERE/../.." && pwd)/run-sleep.sh"
+elif [ -n "${CLAUDE_PLUGIN_ROOT:-}" ] && [ -f "$(cd "$CLAUDE_PLUGIN_ROOT/.." 2>/dev/null && pwd)/run-sleep.sh" ]; then
  SHARED="$(cd "$CLAUDE_PLUGIN_ROOT/.." && pwd)/run-sleep.sh"
+elif [ -n "${SKILLOPT_SLEEP_REPO:-}" ] && [ -f "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" ]; then
+  SHARED="$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh"
+fi
+
+if [ -z "$SHARED" ]; then
+  echo "[sleep] ERROR: cannot locate run-sleep.sh." >&2
+  echo "[sleep] Set SKILLOPT_SLEEP_REPO to the SkillOpt repo root, or pip install skillopt." >&2
+  exit 1
 fi
 exec bash "$SHARED" "$@"
--- a/plugins/claude-code/skills/skillopt-sleep/SKILL.md
+++ b/plugins/claude-code/skills/skillopt-sleep/SKILL.md
@@ -54,6 +54,53 @@ Prefer the `/skillopt-sleep` command. Under the hood it calls the bundled runner
 - Add `--backend claude` or `--backend codex` to spend the user's real budget for genuine improvement.
 - Scope defaults to the invoked project; `--scope all` harvests every project.

+### Scheduling
+
+```bash
+"${CLAUDE_PLUGIN_ROOT}/scripts/sleep.sh" schedule --project "$(pwd)" --hour 3 --minute 17
+"${CLAUDE_PLUGIN_ROOT}/scripts/sleep.sh" unschedule --project "$(pwd)"
+```
+
+Installs a nightly cron entry. `unschedule --all` removes every managed entry.
+
+## All CLI flags
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--project PATH` | cwd | Project directory to evolve |
+| `--scope all\|invoked` | invoked | Harvest scope |
+| `--backend mock\|claude\|codex\|copilot` | mock | Replay backend (mock = no API spend) |
+| `--model NAME` | backend default | Override the model used for replay |
+| `--source claude\|codex\|auto` | claude | Transcript source |
+| `--lookback-hours N` | 72 | Harvest window |
+| `--max-sessions N` | unlimited | Cap harvested sessions |
+| `--max-tasks N` | 40 | Cap mined tasks |
+| `--target-skill-path PATH` | auto | Explicit SKILL.md to evolve |
+| `--tasks-file PATH` | — | Reviewed TaskRecord JSON (skip harvest) |
+| `--progress` | off | Print phase progress to stderr |
+| `--auto-adopt` | off | Auto-adopt if gate passes |
+| `--edit-budget N` | 4 | Max bounded edits per night |
+| `--json` | off | Machine-readable JSON output |
+
+## Config keys (`~/.skillopt-sleep/config.json`)
+
+Beyond the CLI flags, advanced behavior is controlled via config:
+
+- **`preferences`** — free-text house rules injected into the optimizer's reflect step (e.g. "Always use async/await", "Answers in `\boxed{}`").
+- **`gate_mode`** — `on` (default, validation-gated) or `off` (greedy, accept all edits).
+- **`gate_metric`** — `hard`, `soft`, or `mixed` (default). Controls how the held-out gate scores.
+- **`dream_rollouts`** — >1 enables multi-rollout contrastive reflection per task.
+- **`recall_k`** — >0 recalls K similar past tasks into the dream (long-term memory).
+- **`evolve_memory`** / **`evolve_skill`** — independently toggle CLAUDE.md vs SKILL.md consolidation.
+
+## Memory consolidation
+
+The sleep cycle can consolidate both:
+- **SKILL.md** — the managed skill file (bounded edits: add/delete/replace)
+- **CLAUDE.md** — the project memory (same bounded edits)
+
+Both are gated by the same held-out validation score. Set `evolve_memory: false` to consolidate only skills, or `evolve_skill: false` for only memory.
+
 ## Hard rules

 - **Never** hand-edit the user's `CLAUDE.md` / `SKILL.md` as part of this skill.
--- a/plugins/codex/skills/skillopt-sleep/SKILL.md
+++ b/plugins/codex/skills/skillopt-sleep/SKILL.md
@@ -52,7 +52,7 @@ bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" run --project "$(pwd)" --source
 bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" adopt --project "$(pwd)"
 ```

-Actions are `status`, `harvest`, `dry-run`, `run`, and `adopt`.
+Actions are `status`, `harvest`, `dry-run`, `run`, `adopt`, `schedule`, and `unschedule`.

 - Default backend is `mock`, which is deterministic and spends no API budget.
 - `--backend codex` uses the user's Codex budget for real improvement.
@@ -61,6 +61,43 @@ Actions are `status`, `harvest`, `dry-run`, `run`, and `adopt`.
 - Keep `dry-run --backend mock` as the first smoke check unless the user
  explicitly asked for a real optimization run.

+### Scheduling
+
+```bash
+bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" schedule --project "$(pwd)" --hour 3 --minute 17
+bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" unschedule --project "$(pwd)"
+```
+
+Installs a nightly cron entry. `unschedule --all` removes every managed entry.
+
+### All backends
+
+- `--backend mock` — deterministic, no API spend (default)
+- `--backend claude` — uses the Claude CLI
+- `--backend codex` — uses the Codex CLI
+- `--backend copilot` — uses the GitHub Copilot CLI
+
+### Additional flags
+
+| Flag | Description |
+|------|-------------|
+| `--auto-adopt` | Auto-adopt if the gate passes (default: stage only) |
+| `--edit-budget N` | Max bounded edits per night (default: 4) |
+| `--lookback-hours N` | Harvest window in hours (default: 72) |
+| `--json` | Machine-readable JSON output |
+
+### Config keys (`~/.skillopt-sleep/config.json`)
+
+- **`preferences`** — free-text house rules for the optimizer
+- **`gate_mode`** — `on` (validation-gated, default) or `off` (greedy)
+- **`gate_metric`** — `hard` | `soft` | `mixed` (default)
+- **`dream_rollouts`** — >1 for multi-rollout contrastive reflection
+- **`recall_k`** — >0 recalls similar past tasks from the archive
+
+### Memory consolidation
+
+The sleep cycle consolidates both **memory** (AGENTS.md / CLAUDE.md) and **skills** (SKILL.md) by default. Each is independently toggleable via `evolve_memory` / `evolve_skill` config keys. Both are gated by the same held-out validation score.
+
 ## Steps

 1. Run the requested action; capture stdout.
--- a/plugins/copilot/copilot-instructions.snippet.md
+++ b/plugins/copilot/copilot-instructions.snippet.md
@@ -19,6 +19,24 @@ my preferences", or "make the agent improve from past usage", use the MCP tools:
 - `sleep_run` — full cycle, stages a reviewed proposal (nothing live changes)
 - `sleep_adopt` — apply the staged proposal (backs up first)
 - `sleep_harvest` — list mined recurring tasks
+- `sleep_schedule` — install a nightly cron entry (set `hour`/`minute`)
+- `sleep_unschedule` — remove the nightly cron entry
+
+### Key parameters (pass as MCP tool arguments)
+
+- `backend` — `mock` (default, free), `claude`, `codex`, or `copilot`
+- `source` — `claude`, `codex`, or `auto` (where to read transcripts)
+- `target_skill_path` — explicit SKILL.md to evolve
+- `tasks_file` — pre-built TaskRecord JSON (skip harvest)
+- `max_tasks` / `max_sessions` — cap workload
+- `auto_adopt` — auto-adopt if the gate passes
+- `json` — machine-readable output for programmatic use
+
+### Advanced config (`~/.skillopt-sleep/config.json`)
+
+- `preferences` — free-text house rules for the optimizer
+- `gate_mode` — `on` (default) or `off`; `dream_rollouts` — >1 for more signal
+- `evolve_memory` / `evolve_skill` — toggle which docs consolidate

 Always show the user the held-out baseline → candidate score and the proposed
 edits before suggesting `sleep_adopt`. Never hand-edit the user's memory/skill
--- a/plugins/copilot/mcp_server.py
+++ b/plugins/copilot/mcp_server.py
@@ -38,16 +38,48 @@ TOOLS = [
     "description": "Apply the latest staged proposal to CLAUDE.md/SKILL.md (backs up first)."},
    {"name": "sleep_harvest", "action": "harvest",
     "description": "Debug: list the recurring tasks mined from recent sessions."},
+    {"name": "sleep_schedule", "action": "schedule",
+     "description": "Install a nightly cron entry to run the sleep cycle automatically."},
+    {"name": "sleep_unschedule", "action": "unschedule",
+     "description": "Remove the nightly cron entry for a project."},
 ]
 _BY_NAME = {t["name"]: t for t in TOOLS}

 _TOOL_SCHEMA = {
    "type": "object",
    "properties": {
-        "project": {"type": "string", "description": "Project dir to evolve (default: cwd)."},
+        "project": {"type": "string",
+                     "description": "Project dir to evolve (default: cwd)."},
        "backend": {"type": "string", "enum": ["mock", "claude", "codex", "copilot"],
                     "description": "mock = no API spend (default); claude/codex/copilot = real."},
-        "scope": {"type": "string", "enum": ["invoked", "all"]},
+        "scope": {"type": "string", "enum": ["invoked", "all"],
+                  "description": "Harvest scope (default: invoked project only)."},
+        "source": {"type": "string", "enum": ["claude", "codex", "auto"],
+                   "description": "Transcript source (default: claude)."},
+        "model": {"type": "string",
+                  "description": "Backend-specific model override."},
+        "tasks_file": {"type": "string",
+                       "description": "Path to reviewed TaskRecord JSON (skips harvest)."},
+        "target_skill_path": {"type": "string",
+                              "description": "Explicit SKILL.md path to evolve/stage/adopt."},
+        "progress": {"type": "boolean",
+                     "description": "Print phase progress to stderr."},
+        "max_sessions": {"type": "integer",
+                         "description": "Cap harvested sessions per run."},
+        "max_tasks": {"type": "integer",
+                      "description": "Cap mined tasks per run."},
+        "lookback_hours": {"type": "integer",
+                           "description": "Harvest window in hours (default: 72)."},
+        "auto_adopt": {"type": "boolean",
+                       "description": "Auto-adopt if gate passes (default: false)."},
+        "json": {"type": "boolean",
+                 "description": "Return machine-readable JSON output."},
+        "edit_budget": {"type": "integer",
+                        "description": "Max bounded edits per night (default: 4)."},
+        "hour": {"type": "integer",
+                 "description": "Hour for schedule (0-23, default: 3)."},
+        "minute": {"type": "integer",
+                   "description": "Minute for schedule (0-59, default: 17)."},
    },
    "additionalProperties": False,
 }
@@ -56,15 +88,35 @@ _TOOL_SCHEMA = {
 def _run_engine(action: str, args: dict) -> str:
    py = sys.executable or "python3"
    cmd = [py, "-m", "skillopt_sleep", action]
-    if args.get("project"):
-        cmd += ["--project", str(args["project"])]
-    if args.get("backend"):
-        cmd += ["--backend", str(args["backend"])]
-    if args.get("scope"):
-        cmd += ["--scope", str(args["scope"])]
+    # String-valued flags
+    for flag, key in [
+        ("--project", "project"), ("--backend", "backend"),
+        ("--scope", "scope"), ("--source", "source"),
+        ("--model", "model"), ("--tasks-file", "tasks_file"),
+        ("--target-skill-path", "target_skill_path"),
+    ]:
+        val = args.get(key)
+        if val:
+            cmd += [flag, str(val)]
+    # Integer-valued flags
+    for flag, key in [
+        ("--max-sessions", "max_sessions"), ("--max-tasks", "max_tasks"),
+        ("--lookback-hours", "lookback_hours"), ("--edit-budget", "edit_budget"),
+        ("--hour", "hour"), ("--minute", "minute"),
+    ]:
+        val = args.get(key)
+        if val is not None:
+            cmd += [flag, str(int(val))]
+    # Boolean flags
+    for flag, key in [
+        ("--progress", "progress"), ("--auto-adopt", "auto_adopt"),
+        ("--json", "json"),
+    ]:
+        if args.get(key):
+            cmd.append(flag)
    try:
        proc = subprocess.run(cmd, cwd=REPO_ROOT, capture_output=True, text=True, timeout=3600)
-    except Exception as e:  # noqa: BLE001
+    except Exception as e:
        return f"[error] failed to run engine: {e}"
    out = (proc.stdout or "").strip()
    err = (proc.stderr or "").strip()
--- a/plugins/openclaw/SKILL.md
+++ b/plugins/openclaw/SKILL.md
@@ -52,6 +52,39 @@ python3 run_sleep.py --dry-run
 python3 run_sleep.py --tasks tests/research-cron-tasks.json
 ```

+## Scheduling
+
+```bash
+python3 slash_sleep.py schedule --hour 3 --minute 17
+python3 slash_sleep.py unschedule
+python3 slash_sleep.py unschedule --all
+```
+
+Installs a nightly cron entry using the shared SkillOpt-Sleep scheduler. This is an alternative to the external `run_sleep_cron.sh` script.
+
+## Alternative backends
+
+While OpenClaw defaults to `openclaw-deepseek` (DeepSeek V4 Pro + Ollama), the shared engine also supports:
+- `--backend mock` — deterministic, no API spend (for testing)
+- `--backend claude` — uses the Claude CLI
+- `--backend codex` — uses the Codex CLI
+- `--backend copilot` — uses the GitHub Copilot CLI
+
+These can be used via the engine directly (`python -m skillopt_sleep`).
+
+## Shared-engine flags
+
+When invoking the engine directly, all standard flags are available:
+- `--source codex` / `--source auto` — harvest from Codex Desktop sessions
+- `--tasks-file PATH` — use a pre-built task set
+- `--target-skill-path PATH` — explicit SKILL.md target
+- `--max-tasks N` / `--max-sessions N` — cap workload
+- `--progress` — print phase progress
+- `--json` — machine-readable output
+- `--auto-adopt` — auto-adopt if gate passes
+
+Config keys: `preferences`, `gate_mode`, `gate_metric`, `dream_rollouts`, `recall_k`, `evolve_memory`, `evolve_skill`.
+
 ## Config (config.json)

 Key knobs:
--- a/plugins/openclaw/slash_sleep.py
+++ b/plugins/openclaw/slash_sleep.py
@@ -207,6 +207,30 @@ def reject(night: str = None) -> int:
    return 0


+def schedule_cmd(hour: int, minute: int) -> int:
+    """Install a nightly cron entry via the shared SkillOpt-Sleep scheduler."""
+    try:
+        from skillopt_sleep.scheduler import schedule
+    except ImportError:
+        print("ERROR: skillopt_sleep.scheduler not available — is SkillOpt-Sleep installed?")
+        return 1
+    result = schedule(hour=hour, minute=minute)
+    print(result)
+    return 0
+
+
+def unschedule_cmd(all_projects: bool) -> int:
+    """Remove cron entry via the shared SkillOpt-Sleep scheduler."""
+    try:
+        from skillopt_sleep.scheduler import unschedule
+    except ImportError:
+        print("ERROR: skillopt_sleep.scheduler not available — is SkillOpt-Sleep installed?")
+        return 1
+    result = unschedule(all_projects=all_projects)
+    print(result)
+    return 0
+
+
 def cost() -> int:
    """Estimate per-night cost based on the actual measurement from Phase 2.

@@ -265,6 +289,12 @@ def main():
    p_reject = sub.add_parser("reject", help="discard most recent staging")
    p_reject.add_argument("night", nargs="?", default=None)
    sub.add_parser("cost", help="estimate cost")
+    p_schedule = sub.add_parser("schedule", help="install nightly cron entry")
+    p_schedule.add_argument("--hour", type=int, default=3, help="hour (0-23)")
+    p_schedule.add_argument("--minute", type=int, default=0, help="minute (0-59)")
+    p_unschedule = sub.add_parser("unschedule", help="remove cron entry")
+    p_unschedule.add_argument("--all", dest="all_projects", action="store_true",
+                               help="remove entries for all projects")

    args = ap.parse_args()

@@ -282,6 +312,10 @@ def main():
        return reject(args.night)
    if args.cmd == "cost":
        return cost()
+    if args.cmd == "schedule":
+        return schedule_cmd(args.hour, args.minute)
+    if args.cmd == "unschedule":
+        return unschedule_cmd(args.all_projects)
    return 1


--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,6 +58,7 @@ all = [
 [project.scripts]
 skillopt-train = "scripts.train:main"
 skillopt-eval = "scripts.eval_only:main"
+skillopt-sleep = "skillopt_sleep.__main__:main"

 [project.urls]
 Homepage = "https://github.com/microsoft/SkillOpt"
--- a/skillopt_sleep/harvest.py
+++ b/skillopt_sleep/harvest.py
@@ -111,6 +111,56 @@ def _is_meta_prompt(text: str) -> bool:
    return False


+# ── Issue #62: filter headless replay sessions ─────────────────────────
+
+# Prompt markers generated by the engine's own headless `claude -p` calls
+# (judge, reflect, attempt). If the sole user prompt in a single-turn
+# session matches any of these, the session is engine-generated, not a
+# real user task.
+_REPLAY_PROMPT_MARKERS = (
+    "## CURRENT SKILL",
+    "## FAILED TASKS",
+    "## SUCCESSFUL TASKS",
+    "## OUTPUT FORMAT",
+    "You are a strict grader",
+    "Score the response 0.0-1.0",
+    "You are SkillOpt-Sleep",
+    "## TASK\n",
+    "## SKILL\n",
+)
+
+
+def _is_headless_replay(digest: "SessionDigest") -> bool:
+    """Detect sessions created by the engine's own headless replay calls.
+
+    Heuristics (conservatively applied):
+    1. Session has exactly 1 user turn AND
+    2. The sole prompt matches engine-generated patterns (grader/reflect),
+       OR the session lasted < 3 seconds (programmatic, not interactive).
+    Multi-turn sessions are always kept (interactive by definition).
+    """
+    if digest.n_user_turns > 1:
+        return False
+    if digest.n_user_turns == 0:
+        return True
+    prompt = digest.user_prompts[0] if digest.user_prompts else ""
+    for marker in _REPLAY_PROMPT_MARKERS:
+        if marker in prompt:
+            return True
+    # Sub-3-second single-turn sessions are almost certainly programmatic.
+    if digest.started_at and digest.ended_at:
+        try:
+            from datetime import datetime
+            fmt = "%Y-%m-%dT%H:%M:%S"
+            start = datetime.strptime(digest.started_at[:19], fmt)
+            end = datetime.strptime(digest.ended_at[:19], fmt)
+            if (end - start).total_seconds() < 3:
+                return True
+        except (ValueError, TypeError):
+            pass
+    return False
+
+
 def digest_transcript(path: str) -> Optional[SessionDigest]:
    """Build a SessionDigest from one ``<sessionId>.jsonl`` transcript."""
    session_id = os.path.splitext(os.path.basename(path))[0]
@@ -236,6 +286,8 @@ def harvest(
        d = digest_transcript(p)
        if d is None:
            continue
+        if _is_headless_replay(d):
+            continue  # Issue #62: skip engine's own headless replay sessions
        if not _project_matches(d.project or "", scope, invoked_project):
            continue
        if since_iso and d.ended_at and d.ended_at < since_iso:
--- a/tests/test_mcp_schema.py
+++ b/tests/test_mcp_schema.py
@@ -0,0 +1,37 @@
+"""Tests for the Copilot MCP server schema completeness."""
+import os
+import sys
+import unittest
+
+# Allow importing from the plugin directory
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "plugins", "copilot"))
+
+
+class TestMcpSchema(unittest.TestCase):
+    def test_schema_includes_all_engine_flags(self):
+        from mcp_server import _TOOL_SCHEMA
+        required_params = {
+            "project", "backend", "scope", "source", "model",
+            "tasks_file", "target_skill_path", "progress",
+            "max_sessions", "max_tasks", "lookback_hours",
+            "auto_adopt", "json", "edit_budget",
+        }
+        schema_props = set(_TOOL_SCHEMA["properties"].keys())
+        missing = required_params - schema_props
+        self.assertEqual(missing, set(), f"MCP schema missing: {missing}")
+
+    def test_all_backends_in_enum(self):
+        from mcp_server import _TOOL_SCHEMA
+        backends = _TOOL_SCHEMA["properties"]["backend"]["enum"]
+        for b in ["mock", "claude", "codex", "copilot"]:
+            self.assertIn(b, backends)
+
+    def test_schedule_tools_exist(self):
+        from mcp_server import TOOLS
+        names = {t["name"] for t in TOOLS}
+        self.assertIn("sleep_schedule", names)
+        self.assertIn("sleep_unschedule", names)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_plugin_sync.py
+++ b/tests/test_plugin_sync.py
@@ -0,0 +1,87 @@
+"""Cross-plugin parity tests — ensure all plugins document the same features.
+
+Run: python3 -m pytest tests/test_plugin_sync.py -v
+"""
+import os
+import unittest
+
+REPO = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+
+PLUGIN_SKILL_MDS = {
+    "claude-code": os.path.join(REPO, "plugins/claude-code/skills/skillopt-sleep/SKILL.md"),
+    "codex": os.path.join(REPO, "plugins/codex/skills/skillopt-sleep/SKILL.md"),
+    "openclaw": os.path.join(REPO, "plugins/openclaw/SKILL.md"),
+}
+
+MCP_SERVER = os.path.join(REPO, "plugins/copilot/mcp_server.py")
+COPILOT_INSTRUCTIONS = os.path.join(REPO, "plugins/copilot/copilot-instructions.snippet.md")
+
+CANONICAL_BACKENDS = {"mock", "claude", "codex", "copilot"}
+
+
+def _read(path):
+    if not os.path.exists(path):
+        return ""
+    with open(path, encoding="utf-8") as f:
+        return f.read()
+
+
+class TestPluginParity(unittest.TestCase):
+    def test_all_skill_mds_mention_all_backends(self):
+        for name, path in PLUGIN_SKILL_MDS.items():
+            text = _read(path)
+            if not text:
+                self.skipTest(f"{name} SKILL.md not found")
+            for backend in CANONICAL_BACKENDS:
+                self.assertIn(backend, text,
+                              f"{name}/SKILL.md missing backend '{backend}'")
+
+    def test_all_skill_mds_mention_schedule(self):
+        for name, path in PLUGIN_SKILL_MDS.items():
+            text = _read(path)
+            if not text:
+                continue
+            self.assertIn("schedule", text.lower(),
+                          f"{name}/SKILL.md missing 'schedule'")
+            self.assertIn("unschedule", text.lower(),
+                          f"{name}/SKILL.md missing 'unschedule'")
+
+    def test_copilot_instructions_mention_schedule(self):
+        text = _read(COPILOT_INSTRUCTIONS)
+        self.assertIn("sleep_schedule", text)
+        self.assertIn("sleep_unschedule", text)
+
+    def test_copilot_instructions_mention_all_backends(self):
+        text = _read(COPILOT_INSTRUCTIONS)
+        for backend in CANONICAL_BACKENDS:
+            self.assertIn(backend, text,
+                          f"copilot-instructions missing backend '{backend}'")
+
+    def test_mcp_server_has_schedule_tools(self):
+        text = _read(MCP_SERVER)
+        self.assertIn("sleep_schedule", text)
+        self.assertIn("sleep_unschedule", text)
+
+    def test_mcp_schema_has_key_params(self):
+        text = _read(MCP_SERVER)
+        for param in ["source", "tasks_file", "target_skill_path",
+                       "max_sessions", "max_tasks", "auto_adopt", "json"]:
+            self.assertIn(f'"{param}"', text,
+                          f"MCP schema missing param '{param}'")
+
+    def test_all_skill_mds_mention_memory_consolidation(self):
+        for name, path in PLUGIN_SKILL_MDS.items():
+            text = _read(path).lower()
+            if not text:
+                continue
+            has_mention = (
+                "memory consolidation" in text
+                or "evolve_memory" in text
+                or ("consolidate" in text and "memory" in text)
+            )
+            self.assertTrue(has_mention,
+                            f"{name}/SKILL.md missing memory consolidation docs")
+
+
+if __name__ == "__main__":
+    unittest.main()