mirror of
https://github.com/microsoft/SkillOpt.git
synced 2026-07-03 14:02:58 +08:00
Add optimizer.slow_update_gate_with_selection to control how epoch-boundary slow-update guidance is applied: - false (default): force-injected - inject guidance into current & best unconditionally (unchanged behavior). - true: gated - evaluate the slow-update candidate on the selection set and accept/reject via the same validation gate as step-level updates (logic follows the SkillReflection ablation). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
80 lines
2.9 KiB
Python
80 lines
2.9 KiB
Python
"""Optimizer-side meta skill memory for cross-epoch optimization guidance.
|
|
|
|
This module maintains a compact optimizer-facing memory distilled from
|
|
adjacent-epoch skill comparisons. Unlike ``slow_update``, it does not
|
|
modify the target skill document. Instead, it produces guidance meant to
|
|
improve future optimizer behavior when proposing, merging, and ranking edits.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import traceback
|
|
|
|
from skillopt.model import chat_optimizer
|
|
from skillopt.optimizer.slow_update import format_comparison_text
|
|
from skillopt.prompts import load_prompt
|
|
from skillopt.utils import extract_json
|
|
|
|
|
|
def format_meta_skill_context(meta_skill_content: str) -> str:
|
|
"""Render optimizer memory into a prompt-ready context block."""
|
|
content = (meta_skill_content or "").strip()
|
|
if not content:
|
|
return ""
|
|
return (
|
|
"## Optimizer Meta Skill\n"
|
|
"This is optimizer-side memory distilled from prior epoch transitions in "
|
|
"this environment. Use it to improve how you propose, merge, and rank "
|
|
"skill edits. Prefer it when the current evidence is ambiguous, but do "
|
|
"not force it if the current trajectories clearly contradict it.\n\n"
|
|
f"{content}"
|
|
)
|
|
|
|
|
|
def run_meta_skill(
|
|
prev_skill: str,
|
|
curr_skill: str,
|
|
comparison_pairs: list[dict],
|
|
*,
|
|
prev_meta_skill_content: str = "",
|
|
system_prompt: str | None = None,
|
|
) -> dict | None:
|
|
"""Produce updated optimizer-side meta skill from adjacent epochs."""
|
|
actual_system = system_prompt if system_prompt is not None else load_prompt("meta_skill")
|
|
|
|
prev_meta_section = (
|
|
prev_meta_skill_content.strip()
|
|
if prev_meta_skill_content and prev_meta_skill_content.strip()
|
|
else "(No previous optimizer meta skill — this is the first update.)"
|
|
)
|
|
|
|
comparison_text = format_comparison_text(comparison_pairs)
|
|
user = (
|
|
f"## Previous Epoch Last-Step Skill\n{prev_skill}\n\n"
|
|
f"## Current Epoch Last-Step Skill\n{curr_skill}\n\n"
|
|
f"## Previous Optimizer Meta Skill\n"
|
|
f"The following optimizer memory was available during the current epoch. "
|
|
f"Reflect on whether it improved or harmed the quality of edits.\n\n"
|
|
f"{prev_meta_section}\n\n"
|
|
f"## Longitudinal Comparison (same tasks, two last-step skills)\n"
|
|
f"{comparison_text}"
|
|
)
|
|
|
|
try:
|
|
response, _ = chat_optimizer(
|
|
system=actual_system,
|
|
user=user,
|
|
max_completion_tokens=16384,
|
|
retries=3,
|
|
stage="meta_skill",
|
|
)
|
|
result = extract_json(response)
|
|
if result and result.get("meta_skill_content"):
|
|
return {
|
|
"reasoning": str(result.get("reasoning", "")).strip(),
|
|
"meta_skill_content": str(result["meta_skill_content"]).strip(),
|
|
}
|
|
except Exception: # noqa: BLE001
|
|
traceback.print_exc()
|
|
|
|
return None
|