mirror of
https://github.com/microsoft/SkillOpt.git
synced 2026-07-03 14:02:58 +08:00
Split failure reflections into SKILL_DEFECT (body edit) vs EXECUTION_LAPSE (protected appendix note that re-emphasizes an existing rule, never edited by step-level analysts). Toggle: optimizer.use_skill_aware_reflection (default false; baseline byte-identical when off). - optimizer/appendix.py: protected APPENDIX region (inject/extract/append with dedup), mirrors the slow_update protected-field pattern - optimizer/skill_aware.py: analyst prompt augmentation, appendix_notes parsing, threshold-gated LLM consolidation, and a process-wide runtime switch (configure_skill_aware_reflection) set once by the trainer - gradient/reflect.py: augment error/success analyst prompts at runtime; None-sentinel kwargs resolve from the global switch, so env adapters need no per-benchmark wiring (works for all envs, present and future) - optimizer/skill.py: generalize the protected-region check to (slow_update, appendix); edits inside any protected region are skipped - engine/trainer.py: inject appendix at init, flush per-step EXECUTION_LAPSE notes after the gate settles, optional consolidation - tests: regression suite incl. toggle-off byte-identical guarantee and env-independent global-switch resolution (6/6 passing + live smoke) Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
104 lines
4.1 KiB
YAML
104 lines
4.1 KiB
YAML
# SkillOpt default configuration — base for all environments.
|
|
# Environment configs should inherit via: _base_: default.yaml
|
|
|
|
model:
|
|
backend: azure_openai
|
|
optimizer: gpt-5.5
|
|
target: gpt-5.5
|
|
optimizer_backend: openai_chat
|
|
target_backend: openai_chat
|
|
reasoning_effort: medium
|
|
rewrite_reasoning_effort: ""
|
|
rewrite_max_completion_tokens: 64000
|
|
codex_exec_path: codex
|
|
codex_exec_sandbox: workspace-write
|
|
codex_exec_profile: ""
|
|
codex_exec_full_auto: false
|
|
codex_exec_reasoning_effort: none
|
|
codex_exec_use_sdk: auto
|
|
codex_exec_network_access: false
|
|
codex_exec_web_search: false
|
|
codex_exec_approval_policy: never
|
|
claude_code_exec_path: claude
|
|
claude_code_exec_profile: ""
|
|
claude_code_exec_use_sdk: auto
|
|
claude_code_exec_effort: medium
|
|
claude_code_exec_max_thinking_tokens: 16384
|
|
codex_trace_to_optimizer: true
|
|
azure_openai_endpoint: "" # e.g. "https://your-resource.openai.azure.com/"
|
|
azure_openai_api_version: "2024-12-01-preview"
|
|
azure_openai_api_key: "" # Fill locally if you do not export AZURE_OPENAI_API_KEY
|
|
azure_openai_auth_mode: "" # empty → fall back to AZURE_OPENAI_AUTH_MODE env (default "azure_cli")
|
|
azure_openai_ad_scope: "https://cognitiveservices.azure.com/.default"
|
|
azure_openai_managed_identity_client_id: ""
|
|
optimizer_azure_openai_endpoint: "" # e.g. "https://your-resource.openai.azure.com/"
|
|
optimizer_azure_openai_api_version: "2024-12-01-preview"
|
|
optimizer_azure_openai_api_key: ""
|
|
optimizer_azure_openai_auth_mode: "" # empty → fall back to OPTIMIZER_AZURE_OPENAI_AUTH_MODE env, then shared
|
|
optimizer_azure_openai_ad_scope: "https://cognitiveservices.azure.com/.default"
|
|
optimizer_azure_openai_managed_identity_client_id: ""
|
|
target_azure_openai_endpoint: "" # e.g. "https://your-resource.openai.azure.com/"
|
|
target_azure_openai_api_version: "2024-12-01-preview"
|
|
target_azure_openai_api_key: ""
|
|
target_azure_openai_auth_mode: "" # empty → fall back to TARGET_AZURE_OPENAI_AUTH_MODE env, then shared
|
|
target_azure_openai_ad_scope: "https://cognitiveservices.azure.com/.default"
|
|
target_azure_openai_managed_identity_client_id: ""
|
|
|
|
# MiniMax backend settings (minimax_chat target)
|
|
minimax_base_url: "" # https://api.minimax.io/v1 if blank
|
|
minimax_api_key: ""
|
|
minimax_model: "MiniMax-M2.7"
|
|
minimax_temperature: "0.7"
|
|
minimax_max_tokens: "8000"
|
|
minimax_enable_thinking: "false"
|
|
optimizer_minimax_base_url: "" # per-role override
|
|
target_minimax_base_url: "" # per-role override
|
|
optimizer_minimax_api_key: ""
|
|
target_minimax_api_key: ""
|
|
|
|
train:
|
|
num_epochs: 4
|
|
train_size: 0 # 0 = derive from dataset split when available
|
|
batch_size: 40
|
|
accumulation: 1
|
|
seed: 42
|
|
|
|
gradient:
|
|
minibatch_size: 8
|
|
merge_batch_size: 8
|
|
analyst_workers: 16
|
|
max_analyst_rounds: 3
|
|
failure_only: false
|
|
|
|
optimizer:
|
|
learning_rate: 4 # max edits per step (edit_budget)
|
|
min_learning_rate: 2 # min edits for decay schedulers
|
|
lr_scheduler: cosine # constant / linear / cosine / autonomous
|
|
lr_control_mode: fixed # fixed / autonomous / none
|
|
skill_update_mode: patch # patch / rewrite_from_suggestions / full_rewrite_minibatch
|
|
use_slow_update: true
|
|
slow_update_samples: 20
|
|
slow_update_gate_with_selection: false
|
|
longitudinal_pair_policy: mixed # mixed / changed / unchanged
|
|
use_meta_skill: true
|
|
use_skill_aware_reflection: false # EmbodiSkill: split failures into SKILL_DEFECT (edit body) vs EXECUTION_LAPSE (protected appendix)
|
|
skill_aware_appendix_source: both # both = success+failure emit appendix notes; failure_only = only EXECUTION_LAPSE (paper-faithful)
|
|
skill_aware_consolidate_threshold: 0 # 0 = off; >0 = LLM-consolidate the appendix when its note count exceeds N
|
|
|
|
evaluation:
|
|
use_gate: true
|
|
sel_env_num: 0
|
|
test_env_num: 0
|
|
eval_test: true
|
|
|
|
env:
|
|
name: ""
|
|
skill_init: ""
|
|
split_mode: ratio # ratio = build deterministic split from data_path; split_dir = use pre-split train/val/test
|
|
split_seed: 42
|
|
split_dir: ""
|
|
data_path: ""
|
|
split_output_dir: ""
|
|
exec_timeout: 120 # per target model/code-agent call timeout in seconds
|
|
out_root: ""
|