mirror of
https://github.com/microsoft/SkillOpt.git
synced 2026-07-03 22:24:36 +08:00
A. SpreadsheetBench verification-feedback bloat
- rollout.py _auto_verify_output: use official _compare_cell_value (was
repr() equality, which falsely flagged 5 vs 5.0 / None vs ""); collapse
correct-and-empty cells into a count so large sparse answer ranges no
longer flood feedback with MBs of None=None noise.
- codegen_agent.py _build_eval_feedback: only list WRONG cells, collapse
correct ones into a count.
Scoring is unaffected (evaluate() is independent); this only fixes the
target model's multi-turn solving feedback.
B. Remove optimizer-side truncation (bloat source now fixed)
- reflect.py: drop _MAX_TRAJ_CHARS cap and all per-field clips.
- update_modes.py / clip.py / lr_autonomous.py: describe_item /
short_item_summary no longer truncate; raise ranking/lr token budget.
- trainer.py _format_step_buffer: full task_ids / target.
- slow_update.py: full comparison samples.
C. Soft-disable gate
- config.py / trainer.py: use_gate=false no longer raises; validation still
runs but candidates are force-accepted (new force_accept branch + log).
Misc: aggregate.py merge token budget 4096 -> 16384.
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
136 lines
4.8 KiB
Python
136 lines
4.8 KiB
Python
"""Helpers for switching between patch edits and rewrite-from-suggestions."""
|
|
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
|
|
PATCH_MODE = "patch"
|
|
REWRITE_MODE = "rewrite_from_suggestions"
|
|
FULL_REWRITE_MINIBATCH_MODE = "full_rewrite_minibatch"
|
|
|
|
|
|
def normalize_update_mode(mode: str | None) -> str:
|
|
raw = str(mode or PATCH_MODE).strip().lower()
|
|
aliases = {
|
|
"patch": PATCH_MODE,
|
|
"edits": PATCH_MODE,
|
|
"rewrite": REWRITE_MODE,
|
|
"rewrite_from_suggestions": REWRITE_MODE,
|
|
"suggestions": REWRITE_MODE,
|
|
"rewrite_suggestions": REWRITE_MODE,
|
|
"full_rewrite": FULL_REWRITE_MINIBATCH_MODE,
|
|
"full_rewrite_minibatch": FULL_REWRITE_MINIBATCH_MODE,
|
|
"minibatch_full_rewrite": FULL_REWRITE_MINIBATCH_MODE,
|
|
"skill_rewrite_minibatch": FULL_REWRITE_MINIBATCH_MODE,
|
|
}
|
|
return aliases.get(raw, PATCH_MODE)
|
|
|
|
|
|
def is_rewrite_mode(mode: str | None) -> bool:
|
|
return normalize_update_mode(mode) == REWRITE_MODE
|
|
|
|
|
|
def is_full_rewrite_minibatch_mode(mode: str | None) -> bool:
|
|
return normalize_update_mode(mode) == FULL_REWRITE_MINIBATCH_MODE
|
|
|
|
|
|
def payload_key(mode: str | None) -> str:
|
|
if is_full_rewrite_minibatch_mode(mode):
|
|
return "skill_candidates"
|
|
return "revise_suggestions" if is_rewrite_mode(mode) else "edits"
|
|
|
|
|
|
def payload_label(mode: str | None, *, singular: bool = False, title: bool = False) -> str:
|
|
if is_full_rewrite_minibatch_mode(mode):
|
|
word = "skill candidate" if singular else "skill candidates"
|
|
elif is_rewrite_mode(mode):
|
|
word = "suggestion" if singular else "suggestions"
|
|
else:
|
|
word = "edit" if singular else "edits"
|
|
return word.title() if title else word
|
|
|
|
|
|
def get_payload_items(container: dict | None, mode: str | None) -> list[dict]:
|
|
if not isinstance(container, dict):
|
|
return []
|
|
items = container.get(payload_key(mode), [])
|
|
return items if isinstance(items, list) else []
|
|
|
|
|
|
def set_payload_items(container: dict, items: list[dict], mode: str | None) -> dict:
|
|
container[payload_key(mode)] = items
|
|
return container
|
|
|
|
|
|
def truncate_payload(container: dict, max_items: int, mode: str | None) -> dict:
|
|
if max_items < 0:
|
|
return container
|
|
items = get_payload_items(container, mode)
|
|
if len(items) > max_items:
|
|
set_payload_items(container, items[:max_items], mode)
|
|
return container
|
|
|
|
|
|
def describe_item(item: dict, mode: str | None, *, max_chars: int | None = None) -> str:
|
|
if not isinstance(item, dict):
|
|
return ""
|
|
if is_full_rewrite_minibatch_mode(mode):
|
|
parts = [
|
|
f"title={item.get('title', '')!r}",
|
|
f"change_summary={item.get('change_summary', [])!r}",
|
|
]
|
|
if item.get("source_type"):
|
|
parts.append(f"source={item.get('source_type')}")
|
|
if item.get("support_count") is not None:
|
|
parts.append(f"support={item.get('support_count')}")
|
|
new_skill = str(item.get("new_skill", "")).strip()
|
|
if new_skill:
|
|
parts.append(f"new_skill_preview={new_skill!r}")
|
|
text = " ".join(parts)
|
|
elif is_rewrite_mode(mode):
|
|
parts = [
|
|
f"type={item.get('type', '?')}",
|
|
f"title={item.get('title', '')!r}",
|
|
f"instruction={item.get('instruction', '')!r}",
|
|
]
|
|
if item.get("priority_hint"):
|
|
parts.append(f"priority={item.get('priority_hint')}")
|
|
if item.get("support_count") is not None:
|
|
parts.append(f"support={item.get('support_count')}")
|
|
text = " ".join(parts)
|
|
else:
|
|
op = item.get("op", "?")
|
|
target = item.get("target", "")
|
|
content = item.get("content", "")
|
|
parts = [f"op={op}"]
|
|
if target:
|
|
parts.append(f"target={target!r}")
|
|
if content:
|
|
parts.append(f"content={content!r}")
|
|
if item.get("support_count") is not None:
|
|
parts.append(f"support={item.get('support_count')}")
|
|
text = " ".join(parts)
|
|
# Truncation disabled: the optimizer is given the full item description.
|
|
return text
|
|
|
|
|
|
def short_item_summary(item: dict, mode: str | None, *, max_chars: int | None = None) -> dict[str, Any]:
|
|
if is_full_rewrite_minibatch_mode(mode):
|
|
return {
|
|
"title": str(item.get("title", "")),
|
|
"change_summary": [
|
|
str(x) for x in item.get("change_summary", [])
|
|
] if isinstance(item.get("change_summary"), list) else [],
|
|
"source_type": item.get("source_type", ""),
|
|
}
|
|
if is_rewrite_mode(mode):
|
|
return {
|
|
"type": item.get("type", "?"),
|
|
"title": str(item.get("title", "")),
|
|
"instruction": str(item.get("instruction", "")),
|
|
}
|
|
return {
|
|
"op": item.get("op", "?"),
|
|
"content": str(item.get("content", "")),
|
|
"target": item.get("target", ""),
|
|
}
|