mirror of
https://github.com/microsoft/SkillOpt.git
synced 2026-07-04 06:29:56 +08:00
Split failure reflections into SKILL_DEFECT (body edit) vs EXECUTION_LAPSE (protected appendix note that re-emphasizes an existing rule, never edited by step-level analysts). Toggle: optimizer.use_skill_aware_reflection (default false; baseline byte-identical when off). - optimizer/appendix.py: protected APPENDIX region (inject/extract/append with dedup), mirrors the slow_update protected-field pattern - optimizer/skill_aware.py: analyst prompt augmentation, appendix_notes parsing, threshold-gated LLM consolidation, and a process-wide runtime switch (configure_skill_aware_reflection) set once by the trainer - gradient/reflect.py: augment error/success analyst prompts at runtime; None-sentinel kwargs resolve from the global switch, so env adapters need no per-benchmark wiring (works for all envs, present and future) - optimizer/skill.py: generalize the protected-region check to (slow_update, appendix); edits inside any protected region are skipped - engine/trainer.py: inject appendix at init, flush per-step EXECUTION_LAPSE notes after the gate settles, optional consolidation - tests: regression suite incl. toggle-off byte-identical guarantee and env-independent global-switch resolution (6/6 passing + live smoke) Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
202 lines
7.2 KiB
Python
202 lines
7.2 KiB
Python
"""ReflACT skill operations — edit application and patch processing.
|
|
|
|
The Update stage (⑤) of the ReflACT pipeline: apply a ranked set of
|
|
edits to the current skill document, producing an updated candidate.
|
|
Analogous to optimizer.step() in neural network training.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from typing import TYPE_CHECKING
|
|
|
|
if TYPE_CHECKING:
|
|
from skillopt.types import Edit as EditType, Patch as PatchType
|
|
|
|
SLOW_UPDATE_START = "<!-- SLOW_UPDATE_START -->"
|
|
SLOW_UPDATE_END = "<!-- SLOW_UPDATE_END -->"
|
|
|
|
# Skill-aware reflection (EmbodiSkill S_app) appendix region. Like the slow
|
|
# update region, it is protected: step-level analyst edits must not modify it.
|
|
APPENDIX_START = "<!-- APPENDIX_START -->"
|
|
APPENDIX_END = "<!-- APPENDIX_END -->"
|
|
|
|
# All protected (start, end) marker pairs. Step-level edits cannot target text
|
|
# inside any of these regions, and `append` / `insert_after`-fallback ops are
|
|
# inserted before the earliest-occurring region so protected blocks stay at the
|
|
# document tail. With only the slow-update region present, every helper reduces
|
|
# to the original slow-update-only behavior (byte-identical skill output).
|
|
_PROTECTED_REGIONS: tuple[tuple[str, str], ...] = (
|
|
(SLOW_UPDATE_START, SLOW_UPDATE_END),
|
|
(APPENDIX_START, APPENDIX_END),
|
|
)
|
|
|
|
|
|
def _earliest_protected_start(skill: str) -> int:
|
|
"""Index of the earliest protected-region start marker, or -1 if none."""
|
|
positions = [
|
|
idx
|
|
for idx in (skill.find(start) for start, _ in _PROTECTED_REGIONS)
|
|
if idx != -1
|
|
]
|
|
return min(positions) if positions else -1
|
|
|
|
|
|
def _is_in_protected_region(skill: str, target: str) -> bool:
|
|
"""Check if *target* text falls within any protected region."""
|
|
if not target:
|
|
return False
|
|
target_idx = skill.find(target)
|
|
if target_idx == -1:
|
|
return False
|
|
for start_marker, end_marker in _PROTECTED_REGIONS:
|
|
start_idx = skill.find(start_marker)
|
|
end_idx = skill.find(end_marker)
|
|
if start_idx == -1 or end_idx == -1:
|
|
continue
|
|
region_end = end_idx + len(end_marker)
|
|
if start_idx <= target_idx < region_end:
|
|
return True
|
|
return False
|
|
|
|
|
|
def _is_in_slow_update_region(skill: str, target: str) -> bool:
|
|
"""Backward-compatible alias kept for any external callers/tests."""
|
|
return _is_in_protected_region(skill, target)
|
|
|
|
|
|
def _strip_slow_update_markers(text: str) -> str:
|
|
"""Remove any protected-region markers from edit content to prevent duplication."""
|
|
return (
|
|
text.replace(SLOW_UPDATE_START, "")
|
|
.replace(SLOW_UPDATE_END, "")
|
|
.replace(APPENDIX_START, "")
|
|
.replace(APPENDIX_END, "")
|
|
)
|
|
|
|
|
|
def _edit_fields(edit: EditType | dict) -> tuple[str, str, str]:
|
|
op = edit.op if hasattr(edit, "op") else edit.get("op", "")
|
|
content = _strip_slow_update_markers(
|
|
(edit.content if hasattr(edit, "content") else edit.get("content", "")).strip()
|
|
)
|
|
target = edit.target if hasattr(edit, "target") else edit.get("target", "")
|
|
return op, content, target
|
|
|
|
|
|
def _apply_edit_with_report(skill: str, edit: EditType | dict) -> tuple[str, dict]:
|
|
op, content, target = _edit_fields(edit)
|
|
report = {
|
|
"op": op,
|
|
"target": target[:200],
|
|
"content_preview": content[:200],
|
|
"status": "unknown",
|
|
}
|
|
|
|
if target and _is_in_protected_region(skill, target):
|
|
report["status"] = "skipped_protected_region"
|
|
return skill, report
|
|
|
|
if op == "append":
|
|
prot_start = _earliest_protected_start(skill)
|
|
if prot_start != -1:
|
|
before = skill[:prot_start].rstrip()
|
|
after = skill[prot_start:]
|
|
report["status"] = "applied_append_before_protected_region"
|
|
return before + "\n\n" + content + "\n\n" + after, report
|
|
report["status"] = "applied_append"
|
|
return skill.rstrip() + "\n\n" + content + "\n", report
|
|
|
|
if op == "insert_after":
|
|
if not target or target not in skill:
|
|
prot_start = _earliest_protected_start(skill)
|
|
if prot_start != -1:
|
|
before = skill[:prot_start].rstrip()
|
|
after = skill[prot_start:]
|
|
report["status"] = "applied_insert_after_fallback_before_protected_region"
|
|
return before + "\n\n" + content + "\n\n" + after, report
|
|
report["status"] = "applied_insert_after_fallback_append"
|
|
return skill.rstrip() + "\n\n" + content + "\n", report
|
|
idx = skill.index(target) + len(target)
|
|
newline = skill.find("\n", idx)
|
|
insert_at = newline + 1 if newline != -1 else len(skill)
|
|
report["status"] = "applied_insert_after"
|
|
return skill[:insert_at] + "\n" + content + "\n" + skill[insert_at:], report
|
|
|
|
if op == "replace":
|
|
if not target:
|
|
report["status"] = "skipped_replace_missing_target"
|
|
return skill, report
|
|
if target not in skill:
|
|
report["status"] = "skipped_replace_target_not_found"
|
|
return skill, report
|
|
report["status"] = "applied_replace"
|
|
return skill.replace(target, content, 1), report
|
|
|
|
if op == "delete":
|
|
if not target:
|
|
report["status"] = "skipped_delete_missing_target"
|
|
return skill, report
|
|
if target not in skill:
|
|
report["status"] = "skipped_delete_target_not_found"
|
|
return skill, report
|
|
report["status"] = "applied_delete"
|
|
return skill.replace(target, "", 1), report
|
|
|
|
report["status"] = "skipped_unknown_op"
|
|
return skill, report
|
|
|
|
|
|
def apply_edit(skill: str, edit: EditType | dict) -> str:
|
|
"""Apply a single edit operation to the skill document.
|
|
|
|
Parameters
|
|
----------
|
|
skill : str
|
|
Current skill document content.
|
|
edit : Edit | dict
|
|
An :class:`~skillopt.types.Edit` instance or a plain dict with
|
|
keys ``op``, ``content``, ``target``.
|
|
|
|
Edits targeting the protected slow-update region are silently skipped.
|
|
"""
|
|
updated_skill, _ = _apply_edit_with_report(skill, edit)
|
|
return updated_skill
|
|
|
|
|
|
def apply_patch_with_report(
|
|
skill: str,
|
|
patch: PatchType | dict,
|
|
) -> tuple[str, list[dict]]:
|
|
"""Apply a patch and return a per-edit report for observability."""
|
|
edits = patch.edits if hasattr(patch, "edits") else patch.get("edits", [])
|
|
reports: list[dict] = []
|
|
for idx, edit in enumerate(edits, 1):
|
|
try:
|
|
skill, report = _apply_edit_with_report(skill, edit)
|
|
report["index"] = idx
|
|
except Exception as exc: # noqa: BLE001
|
|
report = {
|
|
"index": idx,
|
|
"op": "",
|
|
"target": "",
|
|
"content_preview": "",
|
|
"status": "error",
|
|
"error": str(exc),
|
|
}
|
|
reports.append(report)
|
|
return skill, reports
|
|
|
|
|
|
def apply_patch(skill: str, patch: PatchType | dict) -> str:
|
|
"""Apply a patch (list of edits) to the skill document sequentially.
|
|
|
|
Parameters
|
|
----------
|
|
skill : str
|
|
Current skill document content.
|
|
patch : Patch | dict
|
|
A :class:`~skillopt.types.Patch` instance or a plain dict with
|
|
key ``edits`` containing a list of edit operations.
|
|
"""
|
|
updated_skill, _ = apply_patch_with_report(skill, patch)
|
|
return updated_skill
|