Files
Cuzyoung 372fd56c1e fix(spreadsheetbench)+optimizer: fix verify-feedback bloat, drop optimizer-side truncation, soft-disable gate
A. SpreadsheetBench verification-feedback bloat
   - rollout.py _auto_verify_output: use official _compare_cell_value (was
     repr() equality, which falsely flagged 5 vs 5.0 / None vs ""); collapse
     correct-and-empty cells into a count so large sparse answer ranges no
     longer flood feedback with MBs of None=None noise.
   - codegen_agent.py _build_eval_feedback: only list WRONG cells, collapse
     correct ones into a count.
   Scoring is unaffected (evaluate() is independent); this only fixes the
   target model's multi-turn solving feedback.

B. Remove optimizer-side truncation (bloat source now fixed)
   - reflect.py: drop _MAX_TRAJ_CHARS cap and all per-field clips.
   - update_modes.py / clip.py / lr_autonomous.py: describe_item /
     short_item_summary no longer truncate; raise ranking/lr token budget.
   - trainer.py _format_step_buffer: full task_ids / target.
   - slow_update.py: full comparison samples.

C. Soft-disable gate
   - config.py / trainer.py: use_gate=false no longer raises; validation still
     runs but candidates are force-accepted (new force_accept branch + log).

Misc: aggregate.py merge token budget 4096 -> 16384.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-06-10 13:03:17 +00:00

110 lines
3.9 KiB
Python

"""ReflACT gradient clipping — LLM-driven edit ranking and selection.
Analogous to gradient clipping in neural network training: ranks candidate
edits by importance and selects the top-L to apply, controlling the
effective step size. Previously core/select.py.
"""
from __future__ import annotations
from skillopt.model import chat_optimizer
from skillopt.optimizer.meta_skill import format_meta_skill_context
from skillopt.optimizer.update_modes import (
describe_item,
get_payload_items,
is_rewrite_mode,
normalize_update_mode,
payload_key,
payload_label,
)
from skillopt.prompts import load_prompt
from skillopt.utils import extract_json
# ── Public API ────────────────────────────────────────────────────────────────
def rank_and_select(
skill_content: str,
patch: dict,
max_edits: int,
meta_skill_context: str = "",
update_mode: str = "patch",
) -> dict:
"""Use a optimizer LLM to rank edits by importance, then keep top-L.
If the edit pool is within budget, returns the patch unchanged.
Otherwise, calls the optimizer to rank and select the most impactful edits.
Parameters
----------
skill_content : str
Current skill document.
patch : dict
Merged :class:`~skillopt.types.Patch` dict with ``edits`` list.
max_edits : int
Maximum number of edits to keep (the "edit budget").
Returns
-------
dict
:class:`~skillopt.types.Patch` dict with selected edits and
optional ``ranking_details``.
"""
update_mode = normalize_update_mode(update_mode)
edits = get_payload_items(patch, update_mode)
if len(edits) <= max_edits:
return patch
# Build the edit pool description for the optimizer
edits_desc = []
for i, edit in enumerate(edits):
edits_desc.append(f"[{i}] {describe_item(edit, update_mode)}")
user = (
f"## Current Skill\n{skill_content}\n\n"
f"## {payload_label(update_mode, title=True)} Pool ({len(edits)} {payload_label(update_mode)}, budget={max_edits})\n"
+ "\n".join(edits_desc)
+ f"\n\nSelect the {max_edits} most important {payload_label(update_mode)}. "
f"Return their 0-based indices in priority order."
)
optimizer_ctx = format_meta_skill_context(meta_skill_context)
if optimizer_ctx:
user = f"{optimizer_ctx}\n\n{user}"
prompt_name = "ranking_rewrite" if is_rewrite_mode(update_mode) else "ranking"
try:
response, _ = chat_optimizer(
system=load_prompt(prompt_name), user=user,
max_completion_tokens=16384, retries=3, stage="ranking",
)
result = extract_json(response)
if result and "selected_indices" in result:
indices = result["selected_indices"]
selected = []
seen: set[int] = set()
for idx in indices:
if (
isinstance(idx, int)
and 0 <= idx < len(edits)
and idx not in seen
):
selected.append(edits[idx])
seen.add(idx)
if len(selected) >= max_edits:
break
if selected:
return {
"reasoning": patch.get("reasoning", "")
+ f" [optimizer-ranked: selected {len(selected)}/{len(edits)} {payload_label(update_mode)}]",
payload_key(update_mode): selected,
"ranking_details": result,
}
except Exception: # noqa: BLE001
pass
# Fallback: simple truncation
return {
"reasoning": patch.get("reasoning", "")
+ f" [fallback truncated {len(edits)}->{max_edits} {payload_label(update_mode)}]",
payload_key(update_mode): edits[:max_edits],
}