From e4ea6a6771e797ef820cdd8bfea64c57e0481065 Mon Sep 17 00:00:00 2001 From: CharlesYang030 Date: Thu, 2 Jul 2026 22:11:10 +0800 Subject: [PATCH] chore(release): v0.2.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Highlights since v0.1.0: - feat: SkillOpt-Sleep engine — nightly offline self-evolution (harvest -> mine -> replay -> consolidate behind a validation gate), with multi-objective reward, experience replay + dream rollouts, slow-update long-term memory, and secret redaction in cycle diagnostics. Shipped as the `skillopt-sleep` CLI. - feat: cross-tool backends & plugin shells — Claude, Codex (+Desktop harvest), Copilot, Devin, and OpenClaw. - feat: SearchQA split materialization + rollout fail-fast. - fix: Windows robustness for claude/codex backends, hardened JSON fallback, Qwen timeout/thinking gating, Codex failure surfacing. Packaging: - Bump pyproject / skillopt / skillopt_sleep to 0.2.0. - Restore skillopt_webui to the packaged wheel. See CHANGELOG.md for the full changelog and contributor acknowledgements. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 100 +++++++++++++++++++++++++++++++++++++ README.md | 1 + docs/sleep/README.md | 14 ++++++ pyproject.toml | 9 ++-- skillopt/__init__.py | 2 +- skillopt_sleep/__init__.py | 2 +- 6 files changed, 122 insertions(+), 6 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..2a07c52 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,100 @@ +# Changelog + +All notable changes to SkillOpt are documented here. This project adheres to +[Semantic Versioning](https://semver.org/) and the format is based on +[Keep a Changelog](https://keepachangelog.com/). + +## [0.2.0] — 2026-07-02 + +The headline of this release is **SkillOpt-Sleep**: a nightly offline +self-evolution engine that harvests a coding agent's real session +transcripts, mines recurring tasks, replays them offline, and consolidates +short-term experience into long-term memory and skills — all behind the same +held-out validation gate that keeps SkillOpt training honest. It ships as a +decoupled top-level package (`skillopt_sleep/`, zero dependency on the +research code) and as the new `skillopt-sleep` CLI. + +### Added +- **SkillOpt-Sleep engine** — nightly offline self-evolution cycle + (harvest → mine → replay → consolidate) behind a validation gate, exposed + as the `skillopt-sleep` console script and `python -m skillopt_sleep`. + - Multi-objective reward (accuracy / tokens / latency) with user preferences. + - Multi-rollout contrastive reflection under a token/time budget. + - Experience replay + controllable dream rollouts (opt-in). + - Slow-update long-term memory field (runs even with the gate off). + - 3-way train/val/test split with `gate_mode on|off`. + - Verifier-discipline validation gate, with a stress-test suite + (thanks @Tanmay9223, #87). +- **Cross-tool backends & plugin shells** for Claude Code, Codex, Copilot, + Devin, and OpenClaw: + - Codex Desktop transcript harvesting, skill-first Codex integration, and a + reviewed task-file flow (thanks @Kirchberg, #48, #49, #60). + - GitHub Copilot backend (`CopilotCliBackend`) + research-engine MCP plugin + (thanks @Dongbumlee, #50). + - Devin plugin: MCP server + ATIF-v1.7 harvest (thanks @xerxes-y, #88). + - OpenClaw shell for SkillOpt-Sleep (thanks @Elzlxx, #59). +- **SearchQA** split materialization helper and fail-fast on systemic rollout + failures, with a `searchqa` install extra (thanks @summerview1997, + #63, #64, #65). +- WebUI environment loading and backend preflight (thanks @summerview1997, #63). + +### Changed +- Decoupled the Sleep engine into a standalone top-level `skillopt_sleep/` + package with zero dependency on the research code. +- Made `EnvAdapter.reflect` a shared default so reflect kwargs are no longer + dropped (thanks @imshunsuke, #44). +- English-only pass across the engine, plugins, and docs. + +### Fixed +- Windows robustness for the Claude/Codex backends, plus a hardened JSON + fallback path (thanks @Yif-Yang, #79). +- Reject prose pseudo-JSON wrapped in single quotes/backticks (#82). +- Surface Codex auth/model/version failures instead of silently scoring 0 + (thanks @dmmdea, #92). +- Redact secrets before persisting cycle diagnostics. +- Configure the `qwen_chat`/`minimax` backends so local LLM endpoints work + (thanks @imrehg, #85). +- Forward the Qwen target timeout and gate `enable_thinking` for vLLM targets + (thanks @mvanhorn, #40). +- Make `--bare` conditional on `ANTHROPIC_API_KEY` (#68), add a + `SKILLOPT_SLEEP_PYTHON` override with a lookback-hours first-run fallback + (#74), and fix ALFWorld gamefile paths relative to `ALFWORLD_DATA`. + +### Packaging +- Bump `skillopt`, `skillopt.__version__`, and `skillopt_sleep.__version__` + to `0.2.0`. +- Restore `skillopt_webui` to the built wheel (it was dropped when the + `packages.find` include list was made explicit). +- Add the `searchqa` extra and include `json_repair` in the `claude`, `qwen`, + and `all` extras. + +### Acknowledgements 🙏 +v0.2.0 landed thanks to our community contributors — thank you! + +- @Kirchberg — Codex Desktop harvesting, skill-first Codex integration, + reviewed task-file flow (#48, #49, #60) +- @Dongbumlee — GitHub Copilot backend + research-engine MCP plugin (#50) +- @summerview1997 — SearchQA materialization, rollout fail-fast, WebUI + preflight (#63, #64, #65) +- @xerxes-y — Devin plugin: MCP server + ATIF-v1.7 harvest (#88) +- @Elzlxx — OpenClaw shell for SkillOpt-Sleep (#59) +- @imshunsuke — shared `EnvAdapter.reflect` default + docs fixes (#43, #44) +- @mvanhorn — Qwen timeout forwarding + `enable_thinking` gating (#40) +- @dmmdea — surface Codex auth/model/version failures (#92) +- @Tanmay9223 — verifier-discipline stress test (#87) +- @imrehg — `configure_qwen_chat` for local LLM endpoints (#85) +- @samuelgoofus-boop — community contributions + +Special thanks to @Yif-Yang for driving the SkillOpt-Sleep engine. + +**Full changelog:** https://github.com/microsoft/SkillOpt/compare/v0.1.0...v0.2.0 + +## [0.1.0] — 2026-06-02 + +Initial public release: the full training loop (rollout → reflect → +aggregate → select → update → evaluate), multi-backend support +(OpenAI / Azure / Claude / Qwen / MiniMax), six built-in benchmarks, and the +WebUI dashboard. + +[0.2.0]: https://github.com/microsoft/SkillOpt/releases/tag/v0.2.0 +[0.1.0]: https://github.com/microsoft/SkillOpt/releases/tag/v0.1.0 diff --git a/README.md b/README.md index e5cfce6..6f2c6ff 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ --- ## News 🔥🔥🔥 +- **[2026-07-02]** 🚀 **SkillOpt [v0.2.0](https://github.com/microsoft/SkillOpt/releases/tag/v0.2.0) is out on [PyPI](https://pypi.org/project/skillopt/)!** Headline feature: **SkillOpt-Sleep**, a nightly offline self-evolution engine (harvest → mine → replay → consolidate, all behind a held-out validation gate) with multi-objective reward, experience replay + dream rollouts, and long-term memory — now shipped as the `skillopt-sleep` CLI. This release also adds cross-tool backends and plugin shells for **Claude, Codex, Copilot, Devin, and OpenClaw**, SearchQA split materialization, Windows robustness, and hardened JSON parsing. See the [release notes](https://github.com/microsoft/SkillOpt/releases/tag/v0.2.0) for the full changelog and contributor acknowledgements. - **[2026-06-15]** 😴 **SkillOpt-Sleep (preview)** — a nightly offline self-evolution companion for local coding agents (Claude Code / Codex / Copilot): review past sessions, replay recurring tasks, and consolidate validated skills behind a held-out gate. See **[`docs/sleep/README.md`](docs/sleep/README.md)** for what it is, how to use it, and results. - **[2026-06-03]** 🎉 **[gbrain](https://github.com/garrytan/gbrain), [gbrain-evals](https://github.com/garrytan/gbrain-evals/blob/main/docs/benchmarks/2026-06-03-skillopt.md), and [darwin-skill](https://github.com/alchaincyf/darwin-skill) have all integrated SkillOpt.** - **[2026-06-02]** 🎉 **SkillOpt [v0.1.0](https://github.com/microsoft/SkillOpt/releases/tag/v0.1.0) is now available on [PyPI](https://pypi.org/project/skillopt/)!** Install with `pip install skillopt`. This initial release includes the full training loop (rollout → reflect → aggregate → select → update → evaluate), multi-backend support (OpenAI / Azure / Claude / Qwen / MiniMax), six built-in benchmarks, and WebUI dashboard. diff --git a/docs/sleep/README.md b/docs/sleep/README.md index 13ba94c..b4fd45b 100644 --- a/docs/sleep/README.md +++ b/docs/sleep/README.md @@ -28,6 +28,20 @@ experience → long-term competence). ## How to use it +### Quickest path: the `skillopt-sleep` CLI (pip) + +```bash +pip install skillopt # installs the engine + the `skillopt-sleep` command +skillopt-sleep dry-run # harvest + mine + replay, report only (changes nothing) +skillopt-sleep run # a full nightly cycle; the proposal is staged for review +skillopt-sleep status # show state + the latest staged proposal +skillopt-sleep adopt # apply the latest staged proposal +skillopt-sleep schedule # install a nightly cron entry for this project +``` + +The per-agent plugin shells below (Claude Code / Codex / Copilot) still come from the +repo; the CLI above is the standalone, pip-only way to run a cycle. + One engine, thin per-agent shells (see [`plugins/`](../../plugins)): | Platform | Folder | Install | diff --git a/pyproject.toml b/pyproject.toml index e9dfa71..69abfbf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "skillopt" -version = "0.1.0" +version = "0.2.0" description = "SkillOpt: Agentic Skill Optimization via Reflective Training Loops" readme = "README.md" license = {text = "MIT"} @@ -68,9 +68,10 @@ Repository = "https://github.com/microsoft/SkillOpt" Issues = "https://github.com/microsoft/SkillOpt/issues" [tool.setuptools.packages.find] -# skillopt* = the research package; skillopt_sleep = the open-source Sleep tool -# (decoupled, zero dependency on the research code). -include = ["skillopt", "skillopt.*", "skillopt_sleep", "skillopt_sleep.*", "scripts*"] +# skillopt* = the research package +# skillopt_sleep = the open-source Sleep tool (decoupled, zero research dep) +# skillopt_webui = the Gradio dashboard (installed via the `webui` extra) +include = ["skillopt", "skillopt.*", "skillopt_sleep", "skillopt_sleep.*", "skillopt_webui", "skillopt_webui.*", "scripts*"] [tool.ruff] line-length = 120 diff --git a/skillopt/__init__.py b/skillopt/__init__.py index a41cfaa..d370c6e 100644 --- a/skillopt/__init__.py +++ b/skillopt/__init__.py @@ -12,7 +12,7 @@ Pipeline stages: 6. Evaluate — validate candidate skill, accept/reject """ -__version__ = "0.1.0" +__version__ = "0.2.0" from skillopt.types import ( # noqa: F401 BatchSpec, diff --git a/skillopt_sleep/__init__.py b/skillopt_sleep/__init__.py index 8660d06..9c7581f 100644 --- a/skillopt_sleep/__init__.py +++ b/skillopt_sleep/__init__.py @@ -17,4 +17,4 @@ Public entry points: from __future__ import annotations __all__ = ["__version__"] -__version__ = "0.1.0" +__version__ = "0.2.0"