Files
microsoft-SkillOpt/skillopt/optimizer/update_modes.py
Cuzyoung 372fd56c1e fix(spreadsheetbench)+optimizer: fix verify-feedback bloat, drop optimizer-side truncation, soft-disable gate
A. SpreadsheetBench verification-feedback bloat
   - rollout.py _auto_verify_output: use official _compare_cell_value (was
     repr() equality, which falsely flagged 5 vs 5.0 / None vs ""); collapse
     correct-and-empty cells into a count so large sparse answer ranges no
     longer flood feedback with MBs of None=None noise.
   - codegen_agent.py _build_eval_feedback: only list WRONG cells, collapse
     correct ones into a count.
   Scoring is unaffected (evaluate() is independent); this only fixes the
   target model's multi-turn solving feedback.

B. Remove optimizer-side truncation (bloat source now fixed)
   - reflect.py: drop _MAX_TRAJ_CHARS cap and all per-field clips.
   - update_modes.py / clip.py / lr_autonomous.py: describe_item /
     short_item_summary no longer truncate; raise ranking/lr token budget.
   - trainer.py _format_step_buffer: full task_ids / target.
   - slow_update.py: full comparison samples.

C. Soft-disable gate
   - config.py / trainer.py: use_gate=false no longer raises; validation still
     runs but candidates are force-accepted (new force_accept branch + log).

Misc: aggregate.py merge token budget 4096 -> 16384.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-06-10 13:03:17 +00:00

136 lines
4.8 KiB
Python

"""Helpers for switching between patch edits and rewrite-from-suggestions."""
from __future__ import annotations
from typing import Any
PATCH_MODE = "patch"
REWRITE_MODE = "rewrite_from_suggestions"
FULL_REWRITE_MINIBATCH_MODE = "full_rewrite_minibatch"
def normalize_update_mode(mode: str | None) -> str:
raw = str(mode or PATCH_MODE).strip().lower()
aliases = {
"patch": PATCH_MODE,
"edits": PATCH_MODE,
"rewrite": REWRITE_MODE,
"rewrite_from_suggestions": REWRITE_MODE,
"suggestions": REWRITE_MODE,
"rewrite_suggestions": REWRITE_MODE,
"full_rewrite": FULL_REWRITE_MINIBATCH_MODE,
"full_rewrite_minibatch": FULL_REWRITE_MINIBATCH_MODE,
"minibatch_full_rewrite": FULL_REWRITE_MINIBATCH_MODE,
"skill_rewrite_minibatch": FULL_REWRITE_MINIBATCH_MODE,
}
return aliases.get(raw, PATCH_MODE)
def is_rewrite_mode(mode: str | None) -> bool:
return normalize_update_mode(mode) == REWRITE_MODE
def is_full_rewrite_minibatch_mode(mode: str | None) -> bool:
return normalize_update_mode(mode) == FULL_REWRITE_MINIBATCH_MODE
def payload_key(mode: str | None) -> str:
if is_full_rewrite_minibatch_mode(mode):
return "skill_candidates"
return "revise_suggestions" if is_rewrite_mode(mode) else "edits"
def payload_label(mode: str | None, *, singular: bool = False, title: bool = False) -> str:
if is_full_rewrite_minibatch_mode(mode):
word = "skill candidate" if singular else "skill candidates"
elif is_rewrite_mode(mode):
word = "suggestion" if singular else "suggestions"
else:
word = "edit" if singular else "edits"
return word.title() if title else word
def get_payload_items(container: dict | None, mode: str | None) -> list[dict]:
if not isinstance(container, dict):
return []
items = container.get(payload_key(mode), [])
return items if isinstance(items, list) else []
def set_payload_items(container: dict, items: list[dict], mode: str | None) -> dict:
container[payload_key(mode)] = items
return container
def truncate_payload(container: dict, max_items: int, mode: str | None) -> dict:
if max_items < 0:
return container
items = get_payload_items(container, mode)
if len(items) > max_items:
set_payload_items(container, items[:max_items], mode)
return container
def describe_item(item: dict, mode: str | None, *, max_chars: int | None = None) -> str:
if not isinstance(item, dict):
return ""
if is_full_rewrite_minibatch_mode(mode):
parts = [
f"title={item.get('title', '')!r}",
f"change_summary={item.get('change_summary', [])!r}",
]
if item.get("source_type"):
parts.append(f"source={item.get('source_type')}")
if item.get("support_count") is not None:
parts.append(f"support={item.get('support_count')}")
new_skill = str(item.get("new_skill", "")).strip()
if new_skill:
parts.append(f"new_skill_preview={new_skill!r}")
text = " ".join(parts)
elif is_rewrite_mode(mode):
parts = [
f"type={item.get('type', '?')}",
f"title={item.get('title', '')!r}",
f"instruction={item.get('instruction', '')!r}",
]
if item.get("priority_hint"):
parts.append(f"priority={item.get('priority_hint')}")
if item.get("support_count") is not None:
parts.append(f"support={item.get('support_count')}")
text = " ".join(parts)
else:
op = item.get("op", "?")
target = item.get("target", "")
content = item.get("content", "")
parts = [f"op={op}"]
if target:
parts.append(f"target={target!r}")
if content:
parts.append(f"content={content!r}")
if item.get("support_count") is not None:
parts.append(f"support={item.get('support_count')}")
text = " ".join(parts)
# Truncation disabled: the optimizer is given the full item description.
return text
def short_item_summary(item: dict, mode: str | None, *, max_chars: int | None = None) -> dict[str, Any]:
if is_full_rewrite_minibatch_mode(mode):
return {
"title": str(item.get("title", "")),
"change_summary": [
str(x) for x in item.get("change_summary", [])
] if isinstance(item.get("change_summary"), list) else [],
"source_type": item.get("source_type", ""),
}
if is_rewrite_mode(mode):
return {
"type": item.get("type", "?"),
"title": str(item.get("title", "")),
"instruction": str(item.get("instruction", "")),
}
return {
"op": item.get("op", "?"),
"content": str(item.get("content", "")),
"target": item.get("target", ""),
}