diff --git a/plugins/copilot/README.md b/plugins/copilot/README.md index 769eac5..6171381 100644 --- a/plugins/copilot/README.md +++ b/plugins/copilot/README.md @@ -45,8 +45,17 @@ Ask Copilot things like *"run the sleep cycle"*, *"what did the last sleep propose?"*, *"adopt the staged sleep proposal"*. Copilot calls the MCP tools: `sleep_status`, `sleep_dry_run`, `sleep_run`, `sleep_adopt`, `sleep_harvest`. -Each tool takes optional `project`, `backend` (`mock`/`claude`/`codex`), and -`scope` arguments. Default backend is `mock` (no API spend). +Each tool takes optional `project`, `backend` (`mock`/`claude`/`codex`/`copilot`), and +`scope` arguments. Default backend is `mock` (no API spend). The `copilot` +backend drives the GitHub Copilot CLI (`copilot -p ... --output-format json`) +and requires the `copilot` CLI to be installed and authenticated. + +For speed, the `copilot` backend runs each call against an isolated +`COPILOT_HOME` with built-in MCP servers and custom instructions disabled, so +your user MCP servers (including this project's own) are not spawned per call +(~5x faster). Override with `SKILLOPT_SLEEP_COPILOT_HOME=`, pick a model +with `SKILLOPT_SLEEP_COPILOT_MODEL`, or set `SKILLOPT_SLEEP_COPILOT_FULL_ENV=1` +to use your real Copilot environment instead. ## Verify the server directly (no Copilot needed) diff --git a/plugins/copilot/mcp_server.py b/plugins/copilot/mcp_server.py index d03a95b..2c592ae 100755 --- a/plugins/copilot/mcp_server.py +++ b/plugins/copilot/mcp_server.py @@ -45,8 +45,8 @@ _TOOL_SCHEMA = { "type": "object", "properties": { "project": {"type": "string", "description": "Project dir to evolve (default: cwd)."}, - "backend": {"type": "string", "enum": ["mock", "claude", "codex"], - "description": "mock = no API spend (default); claude/codex = real."}, + "backend": {"type": "string", "enum": ["mock", "claude", "codex", "copilot"], + "description": "mock = no API spend (default); claude/codex/copilot = real."}, "scope": {"type": "string", "enum": ["invoked", "all"]}, }, "additionalProperties": False, diff --git a/skillopt_sleep/__main__.py b/skillopt_sleep/__main__.py index 2666ee6..d947491 100644 --- a/skillopt_sleep/__main__.py +++ b/skillopt_sleep/__main__.py @@ -9,7 +9,7 @@ Common flags: --project PATH project to evolve (default: cwd) --scope all|invoked harvest scope (default: invoked) - --backend mock|claude|codex + --backend mock|claude|codex|copilot --source claude|codex|auto --model NAME --lookback-hours N @@ -36,7 +36,7 @@ from skillopt_sleep.state import SleepState def _add_common(p: argparse.ArgumentParser) -> None: p.add_argument("--project", default="") p.add_argument("--scope", default="", choices=["", "all", "invoked"]) - p.add_argument("--backend", default="", choices=["", "mock", "claude", "codex"]) + p.add_argument("--backend", default="", choices=["", "mock", "claude", "codex", "copilot"]) p.add_argument("--model", default="") p.add_argument("--codex-path", default="", help="path to the real @openai/codex binary") p.add_argument("--claude-home", default="", help="override ~/.claude (also isolates state)") diff --git a/skillopt_sleep/backend.py b/skillopt_sleep/backend.py index 2ec5cdd..8977e0d 100644 --- a/skillopt_sleep/backend.py +++ b/skillopt_sleep/backend.py @@ -24,6 +24,7 @@ import json import os import re import subprocess +import tempfile from typing import Any, Dict, List, Optional, Tuple from skillopt_sleep.types import EditRecord, ReplayResult, TaskRecord @@ -788,6 +789,114 @@ class CodexCliBackend(CliBackend): except Exception: pass +def resolve_copilot_path(explicit: str = "") -> str: + """Find the GitHub Copilot CLI (`copilot`) binary.""" + if explicit: + return explicit + env = os.environ.get("SKILLOPT_SLEEP_COPILOT_PATH") + if env: + return env + import shutil + found = shutil.which("copilot") + return found or "copilot" + + +class CopilotCliBackend(CliBackend): + """Drives the GitHub Copilot CLI in non-interactive mode. + + Uses ``copilot -p --output-format json`` and parses the emitted + JSONL event stream, returning the concatenated ``assistant.message`` + content. The plain-text / ``--silent`` modes do not reliably stream the + response to stdout on all platforms, so JSONL is used for robust capture. + + The call runs in a clean temp cwd with streaming disabled and tools allowed + (so non-interactive mode never blocks on a permission prompt); the prompts + ask for final-answer text only, so no tool use is expected. + + Startup overhead is minimised: each invocation points ``COPILOT_HOME`` at a + dedicated, isolated config dir (no user ``mcp-config.json``, so the user's + MCP servers — including this project's own — are NOT spawned, avoiding a + slow recursive launch), and built-in MCP servers / custom instructions are + disabled. Auth is read from the OS credential store / token env vars, which + live outside ``COPILOT_HOME``, so isolation does not break authentication. + Set ``SKILLOPT_SLEEP_COPILOT_HOME`` to override the isolated home, or set it + empty / ``SKILLOPT_SLEEP_COPILOT_FULL_ENV=1`` to use the user's real + environment instead. + """ + + name = "copilot" + + def __init__(self, model: str = "", copilot_path: str = "", timeout: int = 240) -> None: + super().__init__(model=model or os.environ.get("SKILLOPT_SLEEP_COPILOT_MODEL", ""), + timeout=timeout) + self.copilot_path = resolve_copilot_path(copilot_path) + self.full_env = os.environ.get("SKILLOPT_SLEEP_COPILOT_FULL_ENV", "") == "1" + # Stable isolated home so first-run setup is cached across calls. + if self.full_env: + self.copilot_home = "" + else: + self.copilot_home = os.environ.get("SKILLOPT_SLEEP_COPILOT_HOME") or os.path.join( + tempfile.gettempdir(), "skillopt_sleep_copilot_home" + ) + try: + os.makedirs(self.copilot_home, exist_ok=True) + except Exception: + self.copilot_home = "" + + def _call(self, prompt: str, *, max_tokens: int = 1024) -> str: + clean_cwd = tempfile.mkdtemp(prefix="skillopt_sleep_copilot_") + cmd = [ + self.copilot_path, "-p", prompt, + "--output-format", "json", + "--stream", "off", + "--no-color", + "--log-level", "none", + "--allow-all-tools", + "-C", clean_cwd, + ] + if not self.full_env: + # Drop unneeded startup work: no built-in (github) MCP server and no + # AGENTS.md / custom-instruction loading. With an isolated home that + # has no mcp-config.json, no user MCP servers spawn either. + cmd += ["--disable-builtin-mcps", "--no-custom-instructions"] + if self.model: + cmd += ["--model", self.model] + env = os.environ.copy() + if self.copilot_home: + env["COPILOT_HOME"] = self.copilot_home + try: + proc = subprocess.run( + cmd, capture_output=True, text=True, timeout=self.timeout, cwd=clean_cwd, + encoding="utf-8", errors="replace", env=env, + ) + except Exception: + return "" + finally: + try: + import shutil + shutil.rmtree(clean_cwd, ignore_errors=True) + except Exception: + pass + return self._parse_jsonl_response(proc.stdout or "") + + @staticmethod + def _parse_jsonl_response(raw: str) -> str: + parts: List[str] = [] + for line in raw.splitlines(): + line = line.strip() + if not line or not line.startswith("{"): + continue + try: + obj = json.loads(line) + except Exception: + continue + if obj.get("type") == "assistant.message": + content = (obj.get("data") or {}).get("content") + if isinstance(content, str) and content: + parts.append(content) + return "\n".join(parts).strip() + + class DualBackend(Backend): """Route operations to two backends, à la SkillOpt's target vs optimizer. @@ -1036,6 +1145,8 @@ def get_backend( if n in {"azure-responses", "azure_responses", "aoai-responses", "responses"}: eps = [e.strip() for e in azure_endpoint.split(",") if e.strip()] or None return AzureResponsesBackend(deployment=model, endpoints=eps) + if n in {"copilot", "github_copilot", "copilot_cli", "gh_copilot"}: + return CopilotCliBackend(model=model) return MockBackend() diff --git a/skillopt_sleep/config.py b/skillopt_sleep/config.py index 0bfb5a2..0e7cb04 100644 --- a/skillopt_sleep/config.py +++ b/skillopt_sleep/config.py @@ -36,7 +36,7 @@ DEFAULTS: Dict[str, Any] = { "val_fraction": 0.34, # real tasks reserved to gate updates "test_fraction": 0.0, # real tasks reserved as the final held-out measure # ── optimizer ────────────────────────────────────────────────────────── - "backend": "mock", # "mock" | "claude" | "codex" + "backend": "mock", # "mock" | "claude" | "codex" | "copilot" "model": "", # backend-specific; "" => backend default "gate_mode": "on", # "on" (validation-gated) | "off" (greedy, no hard filter) "codex_path": "", # "" => auto-detect the real @openai/codex binary diff --git a/skillopt_sleep/experiments/run_experiment.py b/skillopt_sleep/experiments/run_experiment.py index 91a9ca9..1110f26 100644 --- a/skillopt_sleep/experiments/run_experiment.py +++ b/skillopt_sleep/experiments/run_experiment.py @@ -134,7 +134,7 @@ def main(argv=None) -> int: ap = argparse.ArgumentParser(description="SkillOpt-Sleep validation experiment") ap.add_argument("--persona", default="researcher", choices=list(PERSONAS.keys())) ap.add_argument("--nights", type=int, default=4) - ap.add_argument("--backend", default="mock", choices=["mock", "claude", "codex"]) + ap.add_argument("--backend", default="mock", choices=["mock", "claude", "codex", "copilot"]) ap.add_argument("--model", default="", help="backend model override") ap.add_argument("--codex-path", default="", help="path to the real @openai/codex binary") ap.add_argument("--edit-budget", type=int, default=4)