mirror of
https://github.com/microsoft/SkillOpt.git
synced 2026-07-03 14:02:58 +08:00
Bug fixes: - #52: bundle run-sleep.sh in Claude Code plugin + 4-level fallback - #58: add skillopt-sleep console script entry point in pyproject.toml - #62: filter headless claude -p replay sessions from harvest Plugin sync (Claude Code / Codex / Copilot / OpenClaw): - Document all 22 CLI flags, 7 actions, 4 backends across all SKILL.md files - Document config keys (preferences, gate_mode, dream_rollouts, etc.) - Document memory consolidation (evolve_memory / evolve_skill) - Add schedule/unschedule to all plugins - Copilot MCP: expand schema from 3 → 16 params + schedule tools - OpenClaw: add schedule/unschedule subcommands via shared scheduler Tests: - Cross-plugin parity test (prevents future feature drift) - MCP schema completeness test Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
46
plugins/claude-code/scripts/run-sleep.sh
Executable file
46
plugins/claude-code/scripts/run-sleep.sh
Executable file
@@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env bash
|
||||
# SkillOpt-Sleep shared runner — used by all platform plugins (Claude Code,
|
||||
# Codex, Copilot). Resolves the repo root (which contains the skillopt_sleep
|
||||
# package), picks a Python >= 3.10, and execs the engine CLI.
|
||||
#
|
||||
# Usage: run-sleep.sh <run|dry-run|status|adopt|harvest|...> [args...]
|
||||
set -euo pipefail
|
||||
|
||||
# This script lives at <repo>/plugins/run-sleep.sh, so the repo root (which
|
||||
# holds skillopt_sleep/) is one level up. CLAUDE_PLUGIN_ROOT (if set by Claude
|
||||
# Code) points at the plugin dir; the engine is then two levels above it.
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
if [ -d "$SCRIPT_DIR/../skillopt_sleep" ]; then
|
||||
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
elif [ -n "${CLAUDE_PLUGIN_ROOT:-}" ] && [ -d "$CLAUDE_PLUGIN_ROOT/../../skillopt_sleep" ]; then
|
||||
REPO_ROOT="$(cd "$CLAUDE_PLUGIN_ROOT/../.." && pwd)"
|
||||
elif [ -n "${SKILLOPT_SLEEP_REPO:-}" ] && [ -d "$SKILLOPT_SLEEP_REPO/skillopt_sleep" ]; then
|
||||
REPO_ROOT="$SKILLOPT_SLEEP_REPO"
|
||||
else
|
||||
# last resort: search upward from CWD
|
||||
d="$PWD"
|
||||
while [ "$d" != "/" ]; do
|
||||
[ -d "$d/skillopt_sleep" ] && { REPO_ROOT="$d"; break; }
|
||||
d="$(dirname "$d")"
|
||||
done
|
||||
fi
|
||||
if [ -z "${REPO_ROOT:-}" ]; then
|
||||
echo "[sleep] ERROR: could not locate the skillopt_sleep package. Set SKILLOPT_SLEEP_REPO to the repo root." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
PY=""
|
||||
for cand in python3.12 python3.11 python3.10 python3; do
|
||||
if command -v "$cand" >/dev/null 2>&1; then
|
||||
ver="$("$cand" -c 'import sys; print("%d%d" % sys.version_info[:2])' 2>/dev/null || echo 0)"
|
||||
if [ "${ver:-0}" -ge 310 ]; then PY="$cand"; break; fi
|
||||
fi
|
||||
done
|
||||
if [ -z "$PY" ]; then
|
||||
echo "[sleep] ERROR: need Python >= 3.10 (found none)." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$#" -eq 0 ]; then set -- status; fi
|
||||
cd "$REPO_ROOT"
|
||||
exec "$PY" -m skillopt_sleep "$@"
|
||||
@@ -1,11 +1,30 @@
|
||||
#!/usr/bin/env bash
|
||||
# Claude Code plugin runner — thin wrapper over the shared runner so all three
|
||||
# platform plugins share one engine launcher. The shared runner lives at
|
||||
# <repo>/plugins/run-sleep.sh and handles repo-root + interpreter resolution.
|
||||
# Claude Code plugin runner — thin wrapper over the shared runner so all
|
||||
# platform plugins share one engine launcher.
|
||||
#
|
||||
# After marketplace install the plugin is isolated in a cache directory and
|
||||
# the repo-relative path no longer works. We try four locations:
|
||||
# 1. Co-located run-sleep.sh (bundled copy — works in marketplace cache)
|
||||
# 2. Repo-relative ../../run-sleep.sh (dev checkout)
|
||||
# 3. CLAUDE_PLUGIN_ROOT/../run-sleep.sh (plugin env variable)
|
||||
# 4. SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh (explicit env)
|
||||
set -euo pipefail
|
||||
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # <repo>/plugins/claude-code/scripts
|
||||
SHARED="$(cd "$HERE/../.." && pwd)/run-sleep.sh" # <repo>/plugins/run-sleep.sh
|
||||
if [ ! -f "$SHARED" ] && [ -n "${CLAUDE_PLUGIN_ROOT:-}" ]; then
|
||||
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
SHARED=""
|
||||
if [ -f "$HERE/run-sleep.sh" ]; then
|
||||
SHARED="$HERE/run-sleep.sh"
|
||||
elif [ -f "$(cd "$HERE/../.." 2>/dev/null && pwd)/run-sleep.sh" ]; then
|
||||
SHARED="$(cd "$HERE/../.." && pwd)/run-sleep.sh"
|
||||
elif [ -n "${CLAUDE_PLUGIN_ROOT:-}" ] && [ -f "$(cd "$CLAUDE_PLUGIN_ROOT/.." 2>/dev/null && pwd)/run-sleep.sh" ]; then
|
||||
SHARED="$(cd "$CLAUDE_PLUGIN_ROOT/.." && pwd)/run-sleep.sh"
|
||||
elif [ -n "${SKILLOPT_SLEEP_REPO:-}" ] && [ -f "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" ]; then
|
||||
SHARED="$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh"
|
||||
fi
|
||||
|
||||
if [ -z "$SHARED" ]; then
|
||||
echo "[sleep] ERROR: cannot locate run-sleep.sh." >&2
|
||||
echo "[sleep] Set SKILLOPT_SLEEP_REPO to the SkillOpt repo root, or pip install skillopt." >&2
|
||||
exit 1
|
||||
fi
|
||||
exec bash "$SHARED" "$@"
|
||||
|
||||
@@ -54,6 +54,53 @@ Prefer the `/skillopt-sleep` command. Under the hood it calls the bundled runner
|
||||
- Add `--backend claude` or `--backend codex` to spend the user's real budget for genuine improvement.
|
||||
- Scope defaults to the invoked project; `--scope all` harvests every project.
|
||||
|
||||
### Scheduling
|
||||
|
||||
```bash
|
||||
"${CLAUDE_PLUGIN_ROOT}/scripts/sleep.sh" schedule --project "$(pwd)" --hour 3 --minute 17
|
||||
"${CLAUDE_PLUGIN_ROOT}/scripts/sleep.sh" unschedule --project "$(pwd)"
|
||||
```
|
||||
|
||||
Installs a nightly cron entry. `unschedule --all` removes every managed entry.
|
||||
|
||||
## All CLI flags
|
||||
|
||||
| Flag | Default | Description |
|
||||
|------|---------|-------------|
|
||||
| `--project PATH` | cwd | Project directory to evolve |
|
||||
| `--scope all\|invoked` | invoked | Harvest scope |
|
||||
| `--backend mock\|claude\|codex\|copilot` | mock | Replay backend (mock = no API spend) |
|
||||
| `--model NAME` | backend default | Override the model used for replay |
|
||||
| `--source claude\|codex\|auto` | claude | Transcript source |
|
||||
| `--lookback-hours N` | 72 | Harvest window |
|
||||
| `--max-sessions N` | unlimited | Cap harvested sessions |
|
||||
| `--max-tasks N` | 40 | Cap mined tasks |
|
||||
| `--target-skill-path PATH` | auto | Explicit SKILL.md to evolve |
|
||||
| `--tasks-file PATH` | — | Reviewed TaskRecord JSON (skip harvest) |
|
||||
| `--progress` | off | Print phase progress to stderr |
|
||||
| `--auto-adopt` | off | Auto-adopt if gate passes |
|
||||
| `--edit-budget N` | 4 | Max bounded edits per night |
|
||||
| `--json` | off | Machine-readable JSON output |
|
||||
|
||||
## Config keys (`~/.skillopt-sleep/config.json`)
|
||||
|
||||
Beyond the CLI flags, advanced behavior is controlled via config:
|
||||
|
||||
- **`preferences`** — free-text house rules injected into the optimizer's reflect step (e.g. "Always use async/await", "Answers in `\boxed{}`").
|
||||
- **`gate_mode`** — `on` (default, validation-gated) or `off` (greedy, accept all edits).
|
||||
- **`gate_metric`** — `hard`, `soft`, or `mixed` (default). Controls how the held-out gate scores.
|
||||
- **`dream_rollouts`** — >1 enables multi-rollout contrastive reflection per task.
|
||||
- **`recall_k`** — >0 recalls K similar past tasks into the dream (long-term memory).
|
||||
- **`evolve_memory`** / **`evolve_skill`** — independently toggle CLAUDE.md vs SKILL.md consolidation.
|
||||
|
||||
## Memory consolidation
|
||||
|
||||
The sleep cycle can consolidate both:
|
||||
- **SKILL.md** — the managed skill file (bounded edits: add/delete/replace)
|
||||
- **CLAUDE.md** — the project memory (same bounded edits)
|
||||
|
||||
Both are gated by the same held-out validation score. Set `evolve_memory: false` to consolidate only skills, or `evolve_skill: false` for only memory.
|
||||
|
||||
## Hard rules
|
||||
|
||||
- **Never** hand-edit the user's `CLAUDE.md` / `SKILL.md` as part of this skill.
|
||||
|
||||
@@ -52,7 +52,7 @@ bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" run --project "$(pwd)" --source
|
||||
bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" adopt --project "$(pwd)"
|
||||
```
|
||||
|
||||
Actions are `status`, `harvest`, `dry-run`, `run`, and `adopt`.
|
||||
Actions are `status`, `harvest`, `dry-run`, `run`, `adopt`, `schedule`, and `unschedule`.
|
||||
|
||||
- Default backend is `mock`, which is deterministic and spends no API budget.
|
||||
- `--backend codex` uses the user's Codex budget for real improvement.
|
||||
@@ -61,6 +61,43 @@ Actions are `status`, `harvest`, `dry-run`, `run`, and `adopt`.
|
||||
- Keep `dry-run --backend mock` as the first smoke check unless the user
|
||||
explicitly asked for a real optimization run.
|
||||
|
||||
### Scheduling
|
||||
|
||||
```bash
|
||||
bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" schedule --project "$(pwd)" --hour 3 --minute 17
|
||||
bash "$SKILLOPT_SLEEP_REPO/plugins/run-sleep.sh" unschedule --project "$(pwd)"
|
||||
```
|
||||
|
||||
Installs a nightly cron entry. `unschedule --all` removes every managed entry.
|
||||
|
||||
### All backends
|
||||
|
||||
- `--backend mock` — deterministic, no API spend (default)
|
||||
- `--backend claude` — uses the Claude CLI
|
||||
- `--backend codex` — uses the Codex CLI
|
||||
- `--backend copilot` — uses the GitHub Copilot CLI
|
||||
|
||||
### Additional flags
|
||||
|
||||
| Flag | Description |
|
||||
|------|-------------|
|
||||
| `--auto-adopt` | Auto-adopt if the gate passes (default: stage only) |
|
||||
| `--edit-budget N` | Max bounded edits per night (default: 4) |
|
||||
| `--lookback-hours N` | Harvest window in hours (default: 72) |
|
||||
| `--json` | Machine-readable JSON output |
|
||||
|
||||
### Config keys (`~/.skillopt-sleep/config.json`)
|
||||
|
||||
- **`preferences`** — free-text house rules for the optimizer
|
||||
- **`gate_mode`** — `on` (validation-gated, default) or `off` (greedy)
|
||||
- **`gate_metric`** — `hard` | `soft` | `mixed` (default)
|
||||
- **`dream_rollouts`** — >1 for multi-rollout contrastive reflection
|
||||
- **`recall_k`** — >0 recalls similar past tasks from the archive
|
||||
|
||||
### Memory consolidation
|
||||
|
||||
The sleep cycle consolidates both **memory** (AGENTS.md / CLAUDE.md) and **skills** (SKILL.md) by default. Each is independently toggleable via `evolve_memory` / `evolve_skill` config keys. Both are gated by the same held-out validation score.
|
||||
|
||||
## Steps
|
||||
|
||||
1. Run the requested action; capture stdout.
|
||||
|
||||
@@ -19,6 +19,24 @@ my preferences", or "make the agent improve from past usage", use the MCP tools:
|
||||
- `sleep_run` — full cycle, stages a reviewed proposal (nothing live changes)
|
||||
- `sleep_adopt` — apply the staged proposal (backs up first)
|
||||
- `sleep_harvest` — list mined recurring tasks
|
||||
- `sleep_schedule` — install a nightly cron entry (set `hour`/`minute`)
|
||||
- `sleep_unschedule` — remove the nightly cron entry
|
||||
|
||||
### Key parameters (pass as MCP tool arguments)
|
||||
|
||||
- `backend` — `mock` (default, free), `claude`, `codex`, or `copilot`
|
||||
- `source` — `claude`, `codex`, or `auto` (where to read transcripts)
|
||||
- `target_skill_path` — explicit SKILL.md to evolve
|
||||
- `tasks_file` — pre-built TaskRecord JSON (skip harvest)
|
||||
- `max_tasks` / `max_sessions` — cap workload
|
||||
- `auto_adopt` — auto-adopt if the gate passes
|
||||
- `json` — machine-readable output for programmatic use
|
||||
|
||||
### Advanced config (`~/.skillopt-sleep/config.json`)
|
||||
|
||||
- `preferences` — free-text house rules for the optimizer
|
||||
- `gate_mode` — `on` (default) or `off`; `dream_rollouts` — >1 for more signal
|
||||
- `evolve_memory` / `evolve_skill` — toggle which docs consolidate
|
||||
|
||||
Always show the user the held-out baseline → candidate score and the proposed
|
||||
edits before suggesting `sleep_adopt`. Never hand-edit the user's memory/skill
|
||||
|
||||
@@ -38,16 +38,48 @@ TOOLS = [
|
||||
"description": "Apply the latest staged proposal to CLAUDE.md/SKILL.md (backs up first)."},
|
||||
{"name": "sleep_harvest", "action": "harvest",
|
||||
"description": "Debug: list the recurring tasks mined from recent sessions."},
|
||||
{"name": "sleep_schedule", "action": "schedule",
|
||||
"description": "Install a nightly cron entry to run the sleep cycle automatically."},
|
||||
{"name": "sleep_unschedule", "action": "unschedule",
|
||||
"description": "Remove the nightly cron entry for a project."},
|
||||
]
|
||||
_BY_NAME = {t["name"]: t for t in TOOLS}
|
||||
|
||||
_TOOL_SCHEMA = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"project": {"type": "string", "description": "Project dir to evolve (default: cwd)."},
|
||||
"project": {"type": "string",
|
||||
"description": "Project dir to evolve (default: cwd)."},
|
||||
"backend": {"type": "string", "enum": ["mock", "claude", "codex", "copilot"],
|
||||
"description": "mock = no API spend (default); claude/codex/copilot = real."},
|
||||
"scope": {"type": "string", "enum": ["invoked", "all"]},
|
||||
"scope": {"type": "string", "enum": ["invoked", "all"],
|
||||
"description": "Harvest scope (default: invoked project only)."},
|
||||
"source": {"type": "string", "enum": ["claude", "codex", "auto"],
|
||||
"description": "Transcript source (default: claude)."},
|
||||
"model": {"type": "string",
|
||||
"description": "Backend-specific model override."},
|
||||
"tasks_file": {"type": "string",
|
||||
"description": "Path to reviewed TaskRecord JSON (skips harvest)."},
|
||||
"target_skill_path": {"type": "string",
|
||||
"description": "Explicit SKILL.md path to evolve/stage/adopt."},
|
||||
"progress": {"type": "boolean",
|
||||
"description": "Print phase progress to stderr."},
|
||||
"max_sessions": {"type": "integer",
|
||||
"description": "Cap harvested sessions per run."},
|
||||
"max_tasks": {"type": "integer",
|
||||
"description": "Cap mined tasks per run."},
|
||||
"lookback_hours": {"type": "integer",
|
||||
"description": "Harvest window in hours (default: 72)."},
|
||||
"auto_adopt": {"type": "boolean",
|
||||
"description": "Auto-adopt if gate passes (default: false)."},
|
||||
"json": {"type": "boolean",
|
||||
"description": "Return machine-readable JSON output."},
|
||||
"edit_budget": {"type": "integer",
|
||||
"description": "Max bounded edits per night (default: 4)."},
|
||||
"hour": {"type": "integer",
|
||||
"description": "Hour for schedule (0-23, default: 3)."},
|
||||
"minute": {"type": "integer",
|
||||
"description": "Minute for schedule (0-59, default: 17)."},
|
||||
},
|
||||
"additionalProperties": False,
|
||||
}
|
||||
@@ -56,15 +88,35 @@ _TOOL_SCHEMA = {
|
||||
def _run_engine(action: str, args: dict) -> str:
|
||||
py = sys.executable or "python3"
|
||||
cmd = [py, "-m", "skillopt_sleep", action]
|
||||
if args.get("project"):
|
||||
cmd += ["--project", str(args["project"])]
|
||||
if args.get("backend"):
|
||||
cmd += ["--backend", str(args["backend"])]
|
||||
if args.get("scope"):
|
||||
cmd += ["--scope", str(args["scope"])]
|
||||
# String-valued flags
|
||||
for flag, key in [
|
||||
("--project", "project"), ("--backend", "backend"),
|
||||
("--scope", "scope"), ("--source", "source"),
|
||||
("--model", "model"), ("--tasks-file", "tasks_file"),
|
||||
("--target-skill-path", "target_skill_path"),
|
||||
]:
|
||||
val = args.get(key)
|
||||
if val:
|
||||
cmd += [flag, str(val)]
|
||||
# Integer-valued flags
|
||||
for flag, key in [
|
||||
("--max-sessions", "max_sessions"), ("--max-tasks", "max_tasks"),
|
||||
("--lookback-hours", "lookback_hours"), ("--edit-budget", "edit_budget"),
|
||||
("--hour", "hour"), ("--minute", "minute"),
|
||||
]:
|
||||
val = args.get(key)
|
||||
if val is not None:
|
||||
cmd += [flag, str(int(val))]
|
||||
# Boolean flags
|
||||
for flag, key in [
|
||||
("--progress", "progress"), ("--auto-adopt", "auto_adopt"),
|
||||
("--json", "json"),
|
||||
]:
|
||||
if args.get(key):
|
||||
cmd.append(flag)
|
||||
try:
|
||||
proc = subprocess.run(cmd, cwd=REPO_ROOT, capture_output=True, text=True, timeout=3600)
|
||||
except Exception as e: # noqa: BLE001
|
||||
except Exception as e:
|
||||
return f"[error] failed to run engine: {e}"
|
||||
out = (proc.stdout or "").strip()
|
||||
err = (proc.stderr or "").strip()
|
||||
|
||||
@@ -52,6 +52,39 @@ python3 run_sleep.py --dry-run
|
||||
python3 run_sleep.py --tasks tests/research-cron-tasks.json
|
||||
```
|
||||
|
||||
## Scheduling
|
||||
|
||||
```bash
|
||||
python3 slash_sleep.py schedule --hour 3 --minute 17
|
||||
python3 slash_sleep.py unschedule
|
||||
python3 slash_sleep.py unschedule --all
|
||||
```
|
||||
|
||||
Installs a nightly cron entry using the shared SkillOpt-Sleep scheduler. This is an alternative to the external `run_sleep_cron.sh` script.
|
||||
|
||||
## Alternative backends
|
||||
|
||||
While OpenClaw defaults to `openclaw-deepseek` (DeepSeek V4 Pro + Ollama), the shared engine also supports:
|
||||
- `--backend mock` — deterministic, no API spend (for testing)
|
||||
- `--backend claude` — uses the Claude CLI
|
||||
- `--backend codex` — uses the Codex CLI
|
||||
- `--backend copilot` — uses the GitHub Copilot CLI
|
||||
|
||||
These can be used via the engine directly (`python -m skillopt_sleep`).
|
||||
|
||||
## Shared-engine flags
|
||||
|
||||
When invoking the engine directly, all standard flags are available:
|
||||
- `--source codex` / `--source auto` — harvest from Codex Desktop sessions
|
||||
- `--tasks-file PATH` — use a pre-built task set
|
||||
- `--target-skill-path PATH` — explicit SKILL.md target
|
||||
- `--max-tasks N` / `--max-sessions N` — cap workload
|
||||
- `--progress` — print phase progress
|
||||
- `--json` — machine-readable output
|
||||
- `--auto-adopt` — auto-adopt if gate passes
|
||||
|
||||
Config keys: `preferences`, `gate_mode`, `gate_metric`, `dream_rollouts`, `recall_k`, `evolve_memory`, `evolve_skill`.
|
||||
|
||||
## Config (config.json)
|
||||
|
||||
Key knobs:
|
||||
|
||||
@@ -207,6 +207,30 @@ def reject(night: str = None) -> int:
|
||||
return 0
|
||||
|
||||
|
||||
def schedule_cmd(hour: int, minute: int) -> int:
|
||||
"""Install a nightly cron entry via the shared SkillOpt-Sleep scheduler."""
|
||||
try:
|
||||
from skillopt_sleep.scheduler import schedule
|
||||
except ImportError:
|
||||
print("ERROR: skillopt_sleep.scheduler not available — is SkillOpt-Sleep installed?")
|
||||
return 1
|
||||
result = schedule(hour=hour, minute=minute)
|
||||
print(result)
|
||||
return 0
|
||||
|
||||
|
||||
def unschedule_cmd(all_projects: bool) -> int:
|
||||
"""Remove cron entry via the shared SkillOpt-Sleep scheduler."""
|
||||
try:
|
||||
from skillopt_sleep.scheduler import unschedule
|
||||
except ImportError:
|
||||
print("ERROR: skillopt_sleep.scheduler not available — is SkillOpt-Sleep installed?")
|
||||
return 1
|
||||
result = unschedule(all_projects=all_projects)
|
||||
print(result)
|
||||
return 0
|
||||
|
||||
|
||||
def cost() -> int:
|
||||
"""Estimate per-night cost based on the actual measurement from Phase 2.
|
||||
|
||||
@@ -265,6 +289,12 @@ def main():
|
||||
p_reject = sub.add_parser("reject", help="discard most recent staging")
|
||||
p_reject.add_argument("night", nargs="?", default=None)
|
||||
sub.add_parser("cost", help="estimate cost")
|
||||
p_schedule = sub.add_parser("schedule", help="install nightly cron entry")
|
||||
p_schedule.add_argument("--hour", type=int, default=3, help="hour (0-23)")
|
||||
p_schedule.add_argument("--minute", type=int, default=0, help="minute (0-59)")
|
||||
p_unschedule = sub.add_parser("unschedule", help="remove cron entry")
|
||||
p_unschedule.add_argument("--all", dest="all_projects", action="store_true",
|
||||
help="remove entries for all projects")
|
||||
|
||||
args = ap.parse_args()
|
||||
|
||||
@@ -282,6 +312,10 @@ def main():
|
||||
return reject(args.night)
|
||||
if args.cmd == "cost":
|
||||
return cost()
|
||||
if args.cmd == "schedule":
|
||||
return schedule_cmd(args.hour, args.minute)
|
||||
if args.cmd == "unschedule":
|
||||
return unschedule_cmd(args.all_projects)
|
||||
return 1
|
||||
|
||||
|
||||
|
||||
@@ -58,6 +58,7 @@ all = [
|
||||
[project.scripts]
|
||||
skillopt-train = "scripts.train:main"
|
||||
skillopt-eval = "scripts.eval_only:main"
|
||||
skillopt-sleep = "skillopt_sleep.__main__:main"
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/microsoft/SkillOpt"
|
||||
|
||||
@@ -111,6 +111,56 @@ def _is_meta_prompt(text: str) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
# ── Issue #62: filter headless replay sessions ─────────────────────────
|
||||
|
||||
# Prompt markers generated by the engine's own headless `claude -p` calls
|
||||
# (judge, reflect, attempt). If the sole user prompt in a single-turn
|
||||
# session matches any of these, the session is engine-generated, not a
|
||||
# real user task.
|
||||
_REPLAY_PROMPT_MARKERS = (
|
||||
"## CURRENT SKILL",
|
||||
"## FAILED TASKS",
|
||||
"## SUCCESSFUL TASKS",
|
||||
"## OUTPUT FORMAT",
|
||||
"You are a strict grader",
|
||||
"Score the response 0.0-1.0",
|
||||
"You are SkillOpt-Sleep",
|
||||
"## TASK\n",
|
||||
"## SKILL\n",
|
||||
)
|
||||
|
||||
|
||||
def _is_headless_replay(digest: "SessionDigest") -> bool:
|
||||
"""Detect sessions created by the engine's own headless replay calls.
|
||||
|
||||
Heuristics (conservatively applied):
|
||||
1. Session has exactly 1 user turn AND
|
||||
2. The sole prompt matches engine-generated patterns (grader/reflect),
|
||||
OR the session lasted < 3 seconds (programmatic, not interactive).
|
||||
Multi-turn sessions are always kept (interactive by definition).
|
||||
"""
|
||||
if digest.n_user_turns > 1:
|
||||
return False
|
||||
if digest.n_user_turns == 0:
|
||||
return True
|
||||
prompt = digest.user_prompts[0] if digest.user_prompts else ""
|
||||
for marker in _REPLAY_PROMPT_MARKERS:
|
||||
if marker in prompt:
|
||||
return True
|
||||
# Sub-3-second single-turn sessions are almost certainly programmatic.
|
||||
if digest.started_at and digest.ended_at:
|
||||
try:
|
||||
from datetime import datetime
|
||||
fmt = "%Y-%m-%dT%H:%M:%S"
|
||||
start = datetime.strptime(digest.started_at[:19], fmt)
|
||||
end = datetime.strptime(digest.ended_at[:19], fmt)
|
||||
if (end - start).total_seconds() < 3:
|
||||
return True
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
def digest_transcript(path: str) -> Optional[SessionDigest]:
|
||||
"""Build a SessionDigest from one ``<sessionId>.jsonl`` transcript."""
|
||||
session_id = os.path.splitext(os.path.basename(path))[0]
|
||||
@@ -236,6 +286,8 @@ def harvest(
|
||||
d = digest_transcript(p)
|
||||
if d is None:
|
||||
continue
|
||||
if _is_headless_replay(d):
|
||||
continue # Issue #62: skip engine's own headless replay sessions
|
||||
if not _project_matches(d.project or "", scope, invoked_project):
|
||||
continue
|
||||
if since_iso and d.ended_at and d.ended_at < since_iso:
|
||||
|
||||
37
tests/test_mcp_schema.py
Normal file
37
tests/test_mcp_schema.py
Normal file
@@ -0,0 +1,37 @@
|
||||
"""Tests for the Copilot MCP server schema completeness."""
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
# Allow importing from the plugin directory
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "plugins", "copilot"))
|
||||
|
||||
|
||||
class TestMcpSchema(unittest.TestCase):
|
||||
def test_schema_includes_all_engine_flags(self):
|
||||
from mcp_server import _TOOL_SCHEMA
|
||||
required_params = {
|
||||
"project", "backend", "scope", "source", "model",
|
||||
"tasks_file", "target_skill_path", "progress",
|
||||
"max_sessions", "max_tasks", "lookback_hours",
|
||||
"auto_adopt", "json", "edit_budget",
|
||||
}
|
||||
schema_props = set(_TOOL_SCHEMA["properties"].keys())
|
||||
missing = required_params - schema_props
|
||||
self.assertEqual(missing, set(), f"MCP schema missing: {missing}")
|
||||
|
||||
def test_all_backends_in_enum(self):
|
||||
from mcp_server import _TOOL_SCHEMA
|
||||
backends = _TOOL_SCHEMA["properties"]["backend"]["enum"]
|
||||
for b in ["mock", "claude", "codex", "copilot"]:
|
||||
self.assertIn(b, backends)
|
||||
|
||||
def test_schedule_tools_exist(self):
|
||||
from mcp_server import TOOLS
|
||||
names = {t["name"] for t in TOOLS}
|
||||
self.assertIn("sleep_schedule", names)
|
||||
self.assertIn("sleep_unschedule", names)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
87
tests/test_plugin_sync.py
Normal file
87
tests/test_plugin_sync.py
Normal file
@@ -0,0 +1,87 @@
|
||||
"""Cross-plugin parity tests — ensure all plugins document the same features.
|
||||
|
||||
Run: python3 -m pytest tests/test_plugin_sync.py -v
|
||||
"""
|
||||
import os
|
||||
import unittest
|
||||
|
||||
REPO = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
PLUGIN_SKILL_MDS = {
|
||||
"claude-code": os.path.join(REPO, "plugins/claude-code/skills/skillopt-sleep/SKILL.md"),
|
||||
"codex": os.path.join(REPO, "plugins/codex/skills/skillopt-sleep/SKILL.md"),
|
||||
"openclaw": os.path.join(REPO, "plugins/openclaw/SKILL.md"),
|
||||
}
|
||||
|
||||
MCP_SERVER = os.path.join(REPO, "plugins/copilot/mcp_server.py")
|
||||
COPILOT_INSTRUCTIONS = os.path.join(REPO, "plugins/copilot/copilot-instructions.snippet.md")
|
||||
|
||||
CANONICAL_BACKENDS = {"mock", "claude", "codex", "copilot"}
|
||||
|
||||
|
||||
def _read(path):
|
||||
if not os.path.exists(path):
|
||||
return ""
|
||||
with open(path, encoding="utf-8") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
class TestPluginParity(unittest.TestCase):
|
||||
def test_all_skill_mds_mention_all_backends(self):
|
||||
for name, path in PLUGIN_SKILL_MDS.items():
|
||||
text = _read(path)
|
||||
if not text:
|
||||
self.skipTest(f"{name} SKILL.md not found")
|
||||
for backend in CANONICAL_BACKENDS:
|
||||
self.assertIn(backend, text,
|
||||
f"{name}/SKILL.md missing backend '{backend}'")
|
||||
|
||||
def test_all_skill_mds_mention_schedule(self):
|
||||
for name, path in PLUGIN_SKILL_MDS.items():
|
||||
text = _read(path)
|
||||
if not text:
|
||||
continue
|
||||
self.assertIn("schedule", text.lower(),
|
||||
f"{name}/SKILL.md missing 'schedule'")
|
||||
self.assertIn("unschedule", text.lower(),
|
||||
f"{name}/SKILL.md missing 'unschedule'")
|
||||
|
||||
def test_copilot_instructions_mention_schedule(self):
|
||||
text = _read(COPILOT_INSTRUCTIONS)
|
||||
self.assertIn("sleep_schedule", text)
|
||||
self.assertIn("sleep_unschedule", text)
|
||||
|
||||
def test_copilot_instructions_mention_all_backends(self):
|
||||
text = _read(COPILOT_INSTRUCTIONS)
|
||||
for backend in CANONICAL_BACKENDS:
|
||||
self.assertIn(backend, text,
|
||||
f"copilot-instructions missing backend '{backend}'")
|
||||
|
||||
def test_mcp_server_has_schedule_tools(self):
|
||||
text = _read(MCP_SERVER)
|
||||
self.assertIn("sleep_schedule", text)
|
||||
self.assertIn("sleep_unschedule", text)
|
||||
|
||||
def test_mcp_schema_has_key_params(self):
|
||||
text = _read(MCP_SERVER)
|
||||
for param in ["source", "tasks_file", "target_skill_path",
|
||||
"max_sessions", "max_tasks", "auto_adopt", "json"]:
|
||||
self.assertIn(f'"{param}"', text,
|
||||
f"MCP schema missing param '{param}'")
|
||||
|
||||
def test_all_skill_mds_mention_memory_consolidation(self):
|
||||
for name, path in PLUGIN_SKILL_MDS.items():
|
||||
text = _read(path).lower()
|
||||
if not text:
|
||||
continue
|
||||
has_mention = (
|
||||
"memory consolidation" in text
|
||||
or "evolve_memory" in text
|
||||
or ("consolidate" in text and "memory" in text)
|
||||
)
|
||||
self.assertTrue(has_mention,
|
||||
f"{name}/SKILL.md missing memory consolidation docs")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user