From b4850ce418cab3295f61bbd96142ab054e902cf1 Mon Sep 17 00:00:00 2001 From: Yif Yang Date: Sun, 31 May 2026 08:22:20 +0000 Subject: [PATCH] fix(minimax): wire YAML / CLI config through to backend PR #26 added a MiniMax chat backend but left three loose ends that silently dropped any YAML / CLI configuration of minimax_* keys: only the environment-variable path worked. - skillopt/config.py: add 6 model.minimax_* entries to _FLATTEN_MAP so the keys declared in configs/_base_/default.yaml actually survive flatten_config() (mirroring the existing model.qwen_chat_* block). - skillopt/engine/trainer.py: import configure_minimax_chat and call it alongside configure_qwen_chat, so cfg-supplied credentials, temperature, max_tokens, and enable_thinking reach the backend. Also apply cfg["minimax_model"] via set_target_deployment when the active target backend is minimax_chat. - scripts/train.py: add 6 --minimax_* CLI flags + the corresponding _CLI_TO_YAML entries, add 'minimax' / 'minimax_chat' to the --backend choices, auto-route to target_backend=minimax_chat, and pick the right default target_model for the new backend. Default behavior on existing backends (openai, claude, qwen, codex, claude_code_exec) is unchanged; all 8 shipped configs continue to load with gate_metric falling back to 'hard' for paper reproduction. --- scripts/train.py | 26 +++++++++++++++++++++++++- skillopt/config.py | 6 ++++++ skillopt/engine/trainer.py | 11 +++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/scripts/train.py b/scripts/train.py index 4d9473f..d4acce6 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -137,7 +137,7 @@ def parse_args() -> argparse.Namespace: # Legacy flat CLI overrides (still work, prefer --cfg-options for new usage) p.add_argument("--env", type=str) p.add_argument("--backend", type=str, - choices=["azure_openai", "codex", "codex_exec", "claude", "claude_chat", "claude_code_exec", "qwen", "qwen_chat"]) + choices=["azure_openai", "codex", "codex_exec", "claude", "claude_chat", "claude_code_exec", "qwen", "qwen_chat", "minimax", "minimax_chat"]) p.add_argument("--optimizer_model", type=str) p.add_argument("--target_model", type=str) p.add_argument("--optimizer_backend", type=str) @@ -173,6 +173,12 @@ def parse_args() -> argparse.Namespace: p.add_argument("--qwen_chat_timeout_seconds", type=float) p.add_argument("--qwen_chat_max_tokens", type=int) p.add_argument("--qwen_chat_enable_thinking", type=_BOOL) + p.add_argument("--minimax_base_url", type=str) + p.add_argument("--minimax_api_key", type=str) + p.add_argument("--minimax_model", type=str) + p.add_argument("--minimax_temperature", type=float) + p.add_argument("--minimax_max_tokens", type=int) + p.add_argument("--minimax_enable_thinking", type=_BOOL) p.add_argument("--codex_exec_path", type=str) p.add_argument("--codex_exec_sandbox", type=str) p.add_argument("--codex_exec_profile", type=str) @@ -289,6 +295,12 @@ _LEGACY_TO_STRUCTURED: dict[str, str] = { "qwen_chat_timeout_seconds": "model.qwen_chat_timeout_seconds", "qwen_chat_max_tokens": "model.qwen_chat_max_tokens", "qwen_chat_enable_thinking": "model.qwen_chat_enable_thinking", + "minimax_base_url": "model.minimax_base_url", + "minimax_api_key": "model.minimax_api_key", + "minimax_model": "model.minimax_model", + "minimax_temperature": "model.minimax_temperature", + "minimax_max_tokens": "model.minimax_max_tokens", + "minimax_enable_thinking": "model.minimax_enable_thinking", "codex_exec_path": "model.codex_exec_path", "codex_exec_sandbox": "model.codex_exec_sandbox", "codex_exec_profile": "model.codex_exec_profile", @@ -403,6 +415,9 @@ def load_config(args: argparse.Namespace) -> dict: elif backend in {"qwen", "qwen_chat"}: flat.setdefault("optimizer_backend", "openai_chat") flat.setdefault("target_backend", "qwen_chat") + elif backend in {"minimax", "minimax_chat"}: + flat.setdefault("optimizer_backend", "openai_chat") + flat.setdefault("target_backend", "minimax_chat") else: flat.setdefault("optimizer_backend", "openai_chat") flat.setdefault("target_backend", "openai_chat") @@ -434,6 +449,15 @@ def load_config(args: argparse.Namespace) -> dict: and not _has_model_override("model.target", "target_model") ): flat["target_model"] = default_model_for_backend("qwen_chat") + if flat.get("target_backend") == "minimax_chat": + if ( + str(flat.get("target_model", "") or "").strip() in _OPENAI_DEFAULT_MODEL_SENTINELS + and not _has_model_override("model.target", "target_model") + ): + flat["target_model"] = ( + flat.get("minimax_model") + or default_model_for_backend("minimax_chat") + ) # Auto-generate output root if not flat.get("out_root"): diff --git a/skillopt/config.py b/skillopt/config.py index c8328ac..211d020 100644 --- a/skillopt/config.py +++ b/skillopt/config.py @@ -79,6 +79,12 @@ _FLATTEN_MAP: dict[str, str] = { "model.qwen_chat_timeout_seconds": "qwen_chat_timeout_seconds", "model.qwen_chat_max_tokens": "qwen_chat_max_tokens", "model.qwen_chat_enable_thinking": "qwen_chat_enable_thinking", + "model.minimax_base_url": "minimax_base_url", + "model.minimax_api_key": "minimax_api_key", + "model.minimax_model": "minimax_model", + "model.minimax_temperature": "minimax_temperature", + "model.minimax_max_tokens": "minimax_max_tokens", + "model.minimax_enable_thinking": "minimax_enable_thinking", "train.num_epochs": "num_epochs", "train.train_size": "train_size", "train.steps_per_epoch": "steps_per_epoch", diff --git a/skillopt/engine/trainer.py b/skillopt/engine/trainer.py index 25058c0..8c887e6 100644 --- a/skillopt/engine/trainer.py +++ b/skillopt/engine/trainer.py @@ -51,6 +51,7 @@ from skillopt.model import ( configure_azure_openai, configure_claude_code_exec, configure_codex_exec, + configure_minimax_chat, configure_qwen_chat, get_token_summary, reset_token_tracker, @@ -636,6 +637,16 @@ class ReflACTTrainer: max_tokens=cfg.get("qwen_chat_max_tokens"), enable_thinking=cfg.get("qwen_chat_enable_thinking"), ) + configure_minimax_chat( + base_url=cfg.get("minimax_base_url") or None, + api_key=cfg.get("minimax_api_key") or None, + temperature=cfg.get("minimax_temperature"), + max_tokens=cfg.get("minimax_max_tokens"), + enable_thinking=cfg.get("minimax_enable_thinking"), + ) + minimax_model_cfg = cfg.get("minimax_model") + if minimax_model_cfg and cfg.get("target_backend") == "minimax_chat": + set_target_deployment(str(minimax_model_cfg)) os.environ["REFLACT_CODEX_TRACE_TO_OPTIMIZER"] = ( "1" if target_backend == "codex_exec" and cfg.get("codex_trace_to_optimizer", False)