mirror of
https://github.com/microsoft/SkillOpt.git
synced 2026-07-03 14:02:58 +08:00
Add GitHub Copilot backend to SkillOpt-Sleep
Add CopilotCliBackend that drives the GitHub Copilot CLI in non-interactive mode (copilot -p ... --output-format json) and parses the JSONL event stream for assistant.message content. Registered as the 'copilot' backend (with aliases) and wired through the CLI, config, experiment harness, and the Copilot MCP server's backend enum. - Force UTF-8 decoding of CLI output (fixes cp1252 UnicodeDecodeError on Windows when responses contain non-cp1252 bytes). - Minimise per-call startup: isolated COPILOT_HOME with built-in MCPs and custom instructions disabled, so user MCP servers are not spawned per call (~5x faster: 36s -> 7.4s). Override via SKILLOPT_SLEEP_COPILOT_HOME / SKILLOPT_SLEEP_COPILOT_MODEL / SKILLOPT_SLEEP_COPILOT_FULL_ENV. Validated end-to-end on real held-out tasks (researcher persona: 0.42 -> 1.00 lift; gate correctly rejects non-improving edits). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -45,8 +45,17 @@ Ask Copilot things like *"run the sleep cycle"*, *"what did the last sleep
|
||||
propose?"*, *"adopt the staged sleep proposal"*. Copilot calls the MCP tools:
|
||||
`sleep_status`, `sleep_dry_run`, `sleep_run`, `sleep_adopt`, `sleep_harvest`.
|
||||
|
||||
Each tool takes optional `project`, `backend` (`mock`/`claude`/`codex`), and
|
||||
`scope` arguments. Default backend is `mock` (no API spend).
|
||||
Each tool takes optional `project`, `backend` (`mock`/`claude`/`codex`/`copilot`), and
|
||||
`scope` arguments. Default backend is `mock` (no API spend). The `copilot`
|
||||
backend drives the GitHub Copilot CLI (`copilot -p ... --output-format json`)
|
||||
and requires the `copilot` CLI to be installed and authenticated.
|
||||
|
||||
For speed, the `copilot` backend runs each call against an isolated
|
||||
`COPILOT_HOME` with built-in MCP servers and custom instructions disabled, so
|
||||
your user MCP servers (including this project's own) are not spawned per call
|
||||
(~5x faster). Override with `SKILLOPT_SLEEP_COPILOT_HOME=<dir>`, pick a model
|
||||
with `SKILLOPT_SLEEP_COPILOT_MODEL`, or set `SKILLOPT_SLEEP_COPILOT_FULL_ENV=1`
|
||||
to use your real Copilot environment instead.
|
||||
|
||||
## Verify the server directly (no Copilot needed)
|
||||
|
||||
|
||||
@@ -45,8 +45,8 @@ _TOOL_SCHEMA = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"project": {"type": "string", "description": "Project dir to evolve (default: cwd)."},
|
||||
"backend": {"type": "string", "enum": ["mock", "claude", "codex"],
|
||||
"description": "mock = no API spend (default); claude/codex = real."},
|
||||
"backend": {"type": "string", "enum": ["mock", "claude", "codex", "copilot"],
|
||||
"description": "mock = no API spend (default); claude/codex/copilot = real."},
|
||||
"scope": {"type": "string", "enum": ["invoked", "all"]},
|
||||
},
|
||||
"additionalProperties": False,
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
Common flags:
|
||||
--project PATH project to evolve (default: cwd)
|
||||
--scope all|invoked harvest scope (default: invoked)
|
||||
--backend mock|claude|codex
|
||||
--backend mock|claude|codex|copilot
|
||||
--source claude|codex|auto
|
||||
--model NAME
|
||||
--lookback-hours N
|
||||
@@ -36,7 +36,7 @@ from skillopt_sleep.state import SleepState
|
||||
def _add_common(p: argparse.ArgumentParser) -> None:
|
||||
p.add_argument("--project", default="")
|
||||
p.add_argument("--scope", default="", choices=["", "all", "invoked"])
|
||||
p.add_argument("--backend", default="", choices=["", "mock", "claude", "codex"])
|
||||
p.add_argument("--backend", default="", choices=["", "mock", "claude", "codex", "copilot"])
|
||||
p.add_argument("--model", default="")
|
||||
p.add_argument("--codex-path", default="", help="path to the real @openai/codex binary")
|
||||
p.add_argument("--claude-home", default="", help="override ~/.claude (also isolates state)")
|
||||
|
||||
@@ -24,6 +24,7 @@ import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import tempfile
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from skillopt_sleep.types import EditRecord, ReplayResult, TaskRecord
|
||||
@@ -788,6 +789,114 @@ class CodexCliBackend(CliBackend):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def resolve_copilot_path(explicit: str = "") -> str:
|
||||
"""Find the GitHub Copilot CLI (`copilot`) binary."""
|
||||
if explicit:
|
||||
return explicit
|
||||
env = os.environ.get("SKILLOPT_SLEEP_COPILOT_PATH")
|
||||
if env:
|
||||
return env
|
||||
import shutil
|
||||
found = shutil.which("copilot")
|
||||
return found or "copilot"
|
||||
|
||||
|
||||
class CopilotCliBackend(CliBackend):
|
||||
"""Drives the GitHub Copilot CLI in non-interactive mode.
|
||||
|
||||
Uses ``copilot -p <prompt> --output-format json`` and parses the emitted
|
||||
JSONL event stream, returning the concatenated ``assistant.message``
|
||||
content. The plain-text / ``--silent`` modes do not reliably stream the
|
||||
response to stdout on all platforms, so JSONL is used for robust capture.
|
||||
|
||||
The call runs in a clean temp cwd with streaming disabled and tools allowed
|
||||
(so non-interactive mode never blocks on a permission prompt); the prompts
|
||||
ask for final-answer text only, so no tool use is expected.
|
||||
|
||||
Startup overhead is minimised: each invocation points ``COPILOT_HOME`` at a
|
||||
dedicated, isolated config dir (no user ``mcp-config.json``, so the user's
|
||||
MCP servers — including this project's own — are NOT spawned, avoiding a
|
||||
slow recursive launch), and built-in MCP servers / custom instructions are
|
||||
disabled. Auth is read from the OS credential store / token env vars, which
|
||||
live outside ``COPILOT_HOME``, so isolation does not break authentication.
|
||||
Set ``SKILLOPT_SLEEP_COPILOT_HOME`` to override the isolated home, or set it
|
||||
empty / ``SKILLOPT_SLEEP_COPILOT_FULL_ENV=1`` to use the user's real
|
||||
environment instead.
|
||||
"""
|
||||
|
||||
name = "copilot"
|
||||
|
||||
def __init__(self, model: str = "", copilot_path: str = "", timeout: int = 240) -> None:
|
||||
super().__init__(model=model or os.environ.get("SKILLOPT_SLEEP_COPILOT_MODEL", ""),
|
||||
timeout=timeout)
|
||||
self.copilot_path = resolve_copilot_path(copilot_path)
|
||||
self.full_env = os.environ.get("SKILLOPT_SLEEP_COPILOT_FULL_ENV", "") == "1"
|
||||
# Stable isolated home so first-run setup is cached across calls.
|
||||
if self.full_env:
|
||||
self.copilot_home = ""
|
||||
else:
|
||||
self.copilot_home = os.environ.get("SKILLOPT_SLEEP_COPILOT_HOME") or os.path.join(
|
||||
tempfile.gettempdir(), "skillopt_sleep_copilot_home"
|
||||
)
|
||||
try:
|
||||
os.makedirs(self.copilot_home, exist_ok=True)
|
||||
except Exception:
|
||||
self.copilot_home = ""
|
||||
|
||||
def _call(self, prompt: str, *, max_tokens: int = 1024) -> str:
|
||||
clean_cwd = tempfile.mkdtemp(prefix="skillopt_sleep_copilot_")
|
||||
cmd = [
|
||||
self.copilot_path, "-p", prompt,
|
||||
"--output-format", "json",
|
||||
"--stream", "off",
|
||||
"--no-color",
|
||||
"--log-level", "none",
|
||||
"--allow-all-tools",
|
||||
"-C", clean_cwd,
|
||||
]
|
||||
if not self.full_env:
|
||||
# Drop unneeded startup work: no built-in (github) MCP server and no
|
||||
# AGENTS.md / custom-instruction loading. With an isolated home that
|
||||
# has no mcp-config.json, no user MCP servers spawn either.
|
||||
cmd += ["--disable-builtin-mcps", "--no-custom-instructions"]
|
||||
if self.model:
|
||||
cmd += ["--model", self.model]
|
||||
env = os.environ.copy()
|
||||
if self.copilot_home:
|
||||
env["COPILOT_HOME"] = self.copilot_home
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
cmd, capture_output=True, text=True, timeout=self.timeout, cwd=clean_cwd,
|
||||
encoding="utf-8", errors="replace", env=env,
|
||||
)
|
||||
except Exception:
|
||||
return ""
|
||||
finally:
|
||||
try:
|
||||
import shutil
|
||||
shutil.rmtree(clean_cwd, ignore_errors=True)
|
||||
except Exception:
|
||||
pass
|
||||
return self._parse_jsonl_response(proc.stdout or "")
|
||||
|
||||
@staticmethod
|
||||
def _parse_jsonl_response(raw: str) -> str:
|
||||
parts: List[str] = []
|
||||
for line in raw.splitlines():
|
||||
line = line.strip()
|
||||
if not line or not line.startswith("{"):
|
||||
continue
|
||||
try:
|
||||
obj = json.loads(line)
|
||||
except Exception:
|
||||
continue
|
||||
if obj.get("type") == "assistant.message":
|
||||
content = (obj.get("data") or {}).get("content")
|
||||
if isinstance(content, str) and content:
|
||||
parts.append(content)
|
||||
return "\n".join(parts).strip()
|
||||
|
||||
|
||||
class DualBackend(Backend):
|
||||
"""Route operations to two backends, à la SkillOpt's target vs optimizer.
|
||||
|
||||
@@ -1036,6 +1145,8 @@ def get_backend(
|
||||
if n in {"azure-responses", "azure_responses", "aoai-responses", "responses"}:
|
||||
eps = [e.strip() for e in azure_endpoint.split(",") if e.strip()] or None
|
||||
return AzureResponsesBackend(deployment=model, endpoints=eps)
|
||||
if n in {"copilot", "github_copilot", "copilot_cli", "gh_copilot"}:
|
||||
return CopilotCliBackend(model=model)
|
||||
return MockBackend()
|
||||
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ DEFAULTS: Dict[str, Any] = {
|
||||
"val_fraction": 0.34, # real tasks reserved to gate updates
|
||||
"test_fraction": 0.0, # real tasks reserved as the final held-out measure
|
||||
# ── optimizer ──────────────────────────────────────────────────────────
|
||||
"backend": "mock", # "mock" | "claude" | "codex"
|
||||
"backend": "mock", # "mock" | "claude" | "codex" | "copilot"
|
||||
"model": "", # backend-specific; "" => backend default
|
||||
"gate_mode": "on", # "on" (validation-gated) | "off" (greedy, no hard filter)
|
||||
"codex_path": "", # "" => auto-detect the real @openai/codex binary
|
||||
|
||||
@@ -134,7 +134,7 @@ def main(argv=None) -> int:
|
||||
ap = argparse.ArgumentParser(description="SkillOpt-Sleep validation experiment")
|
||||
ap.add_argument("--persona", default="researcher", choices=list(PERSONAS.keys()))
|
||||
ap.add_argument("--nights", type=int, default=4)
|
||||
ap.add_argument("--backend", default="mock", choices=["mock", "claude", "codex"])
|
||||
ap.add_argument("--backend", default="mock", choices=["mock", "claude", "codex", "copilot"])
|
||||
ap.add_argument("--model", default="", help="backend model override")
|
||||
ap.add_argument("--codex-path", default="", help="path to the real @openai/codex binary")
|
||||
ap.add_argument("--edit-budget", type=int, default=4)
|
||||
|
||||
Reference in New Issue
Block a user