Files
microsoft-SkillOpt/skillopt/optimizer/meta_skill.py
Cuzyoung 00602df9e9 feat(slow-update): add config-controlled gated / force-injected modes
Add optimizer.slow_update_gate_with_selection to control how epoch-boundary
slow-update guidance is applied:
- false (default): force-injected - inject guidance into current & best
  unconditionally (unchanged behavior).
- true: gated - evaluate the slow-update candidate on the selection set and
  accept/reject via the same validation gate as step-level updates
  (logic follows the SkillReflection ablation).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-31 02:02:23 +00:00

80 lines
2.9 KiB
Python

"""Optimizer-side meta skill memory for cross-epoch optimization guidance.
This module maintains a compact optimizer-facing memory distilled from
adjacent-epoch skill comparisons. Unlike ``slow_update``, it does not
modify the target skill document. Instead, it produces guidance meant to
improve future optimizer behavior when proposing, merging, and ranking edits.
"""
from __future__ import annotations
import traceback
from skillopt.model import chat_optimizer
from skillopt.optimizer.slow_update import format_comparison_text
from skillopt.prompts import load_prompt
from skillopt.utils import extract_json
def format_meta_skill_context(meta_skill_content: str) -> str:
"""Render optimizer memory into a prompt-ready context block."""
content = (meta_skill_content or "").strip()
if not content:
return ""
return (
"## Optimizer Meta Skill\n"
"This is optimizer-side memory distilled from prior epoch transitions in "
"this environment. Use it to improve how you propose, merge, and rank "
"skill edits. Prefer it when the current evidence is ambiguous, but do "
"not force it if the current trajectories clearly contradict it.\n\n"
f"{content}"
)
def run_meta_skill(
prev_skill: str,
curr_skill: str,
comparison_pairs: list[dict],
*,
prev_meta_skill_content: str = "",
system_prompt: str | None = None,
) -> dict | None:
"""Produce updated optimizer-side meta skill from adjacent epochs."""
actual_system = system_prompt if system_prompt is not None else load_prompt("meta_skill")
prev_meta_section = (
prev_meta_skill_content.strip()
if prev_meta_skill_content and prev_meta_skill_content.strip()
else "(No previous optimizer meta skill — this is the first update.)"
)
comparison_text = format_comparison_text(comparison_pairs)
user = (
f"## Previous Epoch Last-Step Skill\n{prev_skill}\n\n"
f"## Current Epoch Last-Step Skill\n{curr_skill}\n\n"
f"## Previous Optimizer Meta Skill\n"
f"The following optimizer memory was available during the current epoch. "
f"Reflect on whether it improved or harmed the quality of edits.\n\n"
f"{prev_meta_section}\n\n"
f"## Longitudinal Comparison (same tasks, two last-step skills)\n"
f"{comparison_text}"
)
try:
response, _ = chat_optimizer(
system=actual_system,
user=user,
max_completion_tokens=16384,
retries=3,
stage="meta_skill",
)
result = extract_json(response)
if result and result.get("meta_skill_content"):
return {
"reasoning": str(result.get("reasoning", "")).strip(),
"meta_skill_content": str(result["meta_skill_content"]).strip(),
}
except Exception: # noqa: BLE001
traceback.print_exc()
return None