fix(trainer): flush appendix notes on skip branches — lapse-only steps no longer drop them

A step whose minibatches yield ONLY execution-lapse notes produces no body patches (analysts return empty-edits carriers, dropped by _normalise_patches), so skip_no_patches / skip_no_rewrite would `continue` before the appendix flush and silently discard every note of the step. This hit exactly the feature's target regime (mature skill body, failures classified as lapses): in c1_searchqa_def_g55_sar, 10/40 steps skipped this way and lost 95 notes total. Extract the flush block into _flush_skill_aware_appendix() and call it on the normal update path (unchanged behavior) AND on both skip branches before `continue`, so notes persist and appendix_notes.json / step_rec counters are recorded for skipped steps too. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-07-05 23:30:35 +08:00 · 2026-06-10 11:31:03 +00:00
parent 0dc84162dc
commit 7dcd612361
1 changed files with 84 additions and 47 deletions
--- a/skillopt/engine/trainer.py
+++ b/skillopt/engine/trainer.py
@@ -76,6 +76,74 @@ from skillopt.model import (
 from skillopt.utils import compute_score, skill_hash


+# ── Skill-aware reflection: appendix flush ───────────────────────────────────
+
+def _flush_skill_aware_appendix(
+    current_skill: str,
+    all_raw_patches: list,
+    step_rec: dict,
+    step_dir: str,
+    cfg: dict,
+) -> str:
+    """Append this step's EXECUTION_LAPSE notes into the protected appendix.
+
+    Returns the (possibly) updated skill. Must be called on BOTH the normal
+    update path and the skip branches: a lapse-only step yields no body
+    patches by design (analysts return ``edits: []`` carriers), so the skip
+    paths would otherwise silently drop every note of the step.
+    """
+    step_appendix_notes: list[str] = []
+    for rp in all_raw_patches:
+        if isinstance(rp, dict):
+            step_appendix_notes.extend(extract_appendix_notes_from_result(rp))
+    if not step_appendix_notes:
+        return current_skill
+
+    before_notes = extract_appendix_notes_from_skill(current_skill)
+    current_skill = append_to_appendix_field(
+        current_skill, step_appendix_notes,
+    )
+    after_notes = extract_appendix_notes_from_skill(current_skill)
+    n_added = len(after_notes) - len(before_notes)
+    step_rec["n_execution_lapse_notes"] = len(step_appendix_notes)
+    step_rec["n_appendix_notes_added"] = n_added
+    step_rec["n_appendix_notes_total"] = len(after_notes)
+    with open(os.path.join(step_dir, "appendix_notes.json"), "w") as f:
+        json.dump(
+            {
+                "step_notes": step_appendix_notes,
+                "appendix_after": after_notes,
+            },
+            f, indent=2, ensure_ascii=False,
+        )
+    print(
+        f"    [skill-aware] +{n_added} appendix note(s) "
+        f"(total {len(after_notes)}) from {len(step_appendix_notes)} lapse signal(s)"
+    )
+    # Threshold-gated LLM consolidation (paper Eq.11): when the
+    # appendix grows past N notes, compact it with one optimizer
+    # call (dedupe / merge / shorten). 0 disables it. Any failure
+    # leaves the appendix unchanged.
+    consolidate_threshold = int(
+        cfg.get("skill_aware_consolidate_threshold", 0) or 0
+    )
+    if consolidate_threshold > 0 and len(after_notes) > consolidate_threshold:
+        compacted = consolidate_appendix_notes(
+            after_notes, chat_fn=chat_optimizer,
+        )
+        if compacted and len(compacted) < len(after_notes):
+            current_skill = append_to_appendix_field(
+                _strip_all_appendix_fields(current_skill), compacted,
+            )
+            step_rec["n_appendix_notes_consolidated"] = len(compacted)
+            step_rec["n_appendix_notes_total"] = len(compacted)
+            print(
+                f"    [skill-aware] consolidated appendix "
+                f"{len(after_notes)} -> {len(compacted)} notes"
+            )
+    return current_skill
+
+
 # ── Patch normalization ───────────────────────────────────────────────────────

 def _normalise_patches(
@@ -1131,6 +1199,13 @@ class ReflACTTrainer:

                # ── No patches? Skip ─────────────────────────────────────
                if not all_failure_patches and not all_success_patches:
+                    # Skill-aware: a lapse-only step has no body patches but
+                    # may still carry appendix notes — flush them BEFORE
+                    # skipping, or they would be silently dropped.
+                    if use_skill_aware:
+                        current_skill = _flush_skill_aware_appendix(
+                            current_skill, all_raw_patches, step_rec, step_dir, cfg,
+                        )
                    step_rec["action"] = "skip_no_patches"
                    step_rec["current_score"] = current_score
                    step_rec["best_score"] = best_score
@@ -1319,6 +1394,12 @@ class ReflACTTrainer:
                    is_full_rewrite_minibatch_mode(update_mode)
                    and rewrite_result is None
                ):
+                    # Skill-aware: flush appendix notes before skipping (see
+                    # the skip_no_patches branch above).
+                    if use_skill_aware:
+                        current_skill = _flush_skill_aware_appendix(
+                            current_skill, all_raw_patches, step_rec, step_dir, cfg,
+                        )
                    step_rec["action"] = "skip_no_rewrite"
                    step_rec["current_score"] = current_score
                    step_rec["best_score"] = best_score
@@ -1423,53 +1504,9 @@ class ReflACTTrainer:
                # best_skill. Body candidate evaluation already happened above
                # and is unaffected.
                if use_skill_aware:
-                    step_appendix_notes: list[str] = []
-                    for rp in all_raw_patches:
-                        if isinstance(rp, dict):
-                            step_appendix_notes.extend(extract_appendix_notes_from_result(rp))
-                    if step_appendix_notes:
-                        before_notes = extract_appendix_notes_from_skill(current_skill)
-                        current_skill = append_to_appendix_field(
-                            current_skill, step_appendix_notes,
-                        )
-                        after_notes = extract_appendix_notes_from_skill(current_skill)
-                        n_added = len(after_notes) - len(before_notes)
-                        step_rec["n_execution_lapse_notes"] = len(step_appendix_notes)
-                        step_rec["n_appendix_notes_added"] = n_added
-                        step_rec["n_appendix_notes_total"] = len(after_notes)
-                        with open(os.path.join(step_dir, "appendix_notes.json"), "w") as f:
-                            json.dump(
-                                {
-                                    "step_notes": step_appendix_notes,
-                                    "appendix_after": after_notes,
-                                },
-                                f, indent=2, ensure_ascii=False,
-                            )
-                        print(
-                            f"    [skill-aware] +{n_added} appendix note(s) "
-                            f"(total {len(after_notes)}) from {len(step_appendix_notes)} lapse signal(s)"
-                        )
-                        # Threshold-gated LLM consolidation (paper Eq.11): when the
-                        # appendix grows past N notes, compact it with one optimizer
-                        # call (dedupe / merge / shorten). 0 disables it. Any failure
-                        # leaves the appendix unchanged.
-                        consolidate_threshold = int(
-                            cfg.get("skill_aware_consolidate_threshold", 0) or 0
-                        )
-                        if consolidate_threshold > 0 and len(after_notes) > consolidate_threshold:
-                            compacted = consolidate_appendix_notes(
-                                after_notes, chat_fn=chat_optimizer,
-                            )
-                            if compacted and len(compacted) < len(after_notes):
-                                current_skill = append_to_appendix_field(
-                                    _strip_all_appendix_fields(current_skill), compacted,
-                                )
-                                step_rec["n_appendix_notes_consolidated"] = len(compacted)
-                                step_rec["n_appendix_notes_total"] = len(compacted)
-                                print(
-                                    f"    [skill-aware] consolidated appendix "
-                                    f"{len(after_notes)} -> {len(compacted)} notes"
-                                )
+                    current_skill = _flush_skill_aware_appendix(
+                        current_skill, all_raw_patches, step_rec, step_dir, cfg,
+                    )

                if gate_metric == "hard":
                    score_label = f"hard={cand_hard:.4f}"