From b4850ce418cab3295f61bbd96142ab054e902cf1 Mon Sep 17 00:00:00 2001
From: Yif Yang <yif_yang@qq.com>
Date: Sun, 31 May 2026 08:22:20 +0000
Subject: [PATCH] fix(minimax): wire YAML / CLI config through to backend

PR #26 added a MiniMax chat backend but left three loose ends that
silently dropped any YAML / CLI configuration of minimax_* keys: only
the environment-variable path worked.

- skillopt/config.py: add 6 model.minimax_* entries to _FLATTEN_MAP so
  the keys declared in configs/_base_/default.yaml actually survive
  flatten_config() (mirroring the existing model.qwen_chat_* block).
- skillopt/engine/trainer.py: import configure_minimax_chat and call
  it alongside configure_qwen_chat, so cfg-supplied credentials,
  temperature, max_tokens, and enable_thinking reach the backend. Also
  apply cfg["minimax_model"] via set_target_deployment when the active
  target backend is minimax_chat.
- scripts/train.py: add 6 --minimax_* CLI flags + the corresponding
  _CLI_TO_YAML entries, add 'minimax' / 'minimax_chat' to the --backend
  choices, auto-route to target_backend=minimax_chat, and pick the
  right default target_model for the new backend.

Default behavior on existing backends (openai, claude, qwen, codex,
claude_code_exec) is unchanged; all 8 shipped configs continue to load
with gate_metric falling back to 'hard' for paper reproduction.
---
 scripts/train.py           | 26 +++++++++++++++++++++++++-
 skillopt/config.py         |  6 ++++++
 skillopt/engine/trainer.py | 11 +++++++++++
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/scripts/train.py b/scripts/train.py
index 4d9473f..d4acce6 100644
--- a/scripts/train.py
+++ b/scripts/train.py
@@ -137,7 +137,7 @@ def parse_args() -> argparse.Namespace:
     # Legacy flat CLI overrides (still work, prefer --cfg-options for new usage)
     p.add_argument("--env", type=str)
     p.add_argument("--backend", type=str,
-                   choices=["azure_openai", "codex", "codex_exec", "claude", "claude_chat", "claude_code_exec", "qwen", "qwen_chat"])
+                   choices=["azure_openai", "codex", "codex_exec", "claude", "claude_chat", "claude_code_exec", "qwen", "qwen_chat", "minimax", "minimax_chat"])
     p.add_argument("--optimizer_model", type=str)
     p.add_argument("--target_model", type=str)
     p.add_argument("--optimizer_backend", type=str)
@@ -173,6 +173,12 @@ def parse_args() -> argparse.Namespace:
     p.add_argument("--qwen_chat_timeout_seconds", type=float)
     p.add_argument("--qwen_chat_max_tokens", type=int)
     p.add_argument("--qwen_chat_enable_thinking", type=_BOOL)
+    p.add_argument("--minimax_base_url", type=str)
+    p.add_argument("--minimax_api_key", type=str)
+    p.add_argument("--minimax_model", type=str)
+    p.add_argument("--minimax_temperature", type=float)
+    p.add_argument("--minimax_max_tokens", type=int)
+    p.add_argument("--minimax_enable_thinking", type=_BOOL)
     p.add_argument("--codex_exec_path", type=str)
     p.add_argument("--codex_exec_sandbox", type=str)
     p.add_argument("--codex_exec_profile", type=str)
@@ -289,6 +295,12 @@ _LEGACY_TO_STRUCTURED: dict[str, str] = {
     "qwen_chat_timeout_seconds": "model.qwen_chat_timeout_seconds",
     "qwen_chat_max_tokens": "model.qwen_chat_max_tokens",
     "qwen_chat_enable_thinking": "model.qwen_chat_enable_thinking",
+    "minimax_base_url": "model.minimax_base_url",
+    "minimax_api_key": "model.minimax_api_key",
+    "minimax_model": "model.minimax_model",
+    "minimax_temperature": "model.minimax_temperature",
+    "minimax_max_tokens": "model.minimax_max_tokens",
+    "minimax_enable_thinking": "model.minimax_enable_thinking",
     "codex_exec_path": "model.codex_exec_path",
     "codex_exec_sandbox": "model.codex_exec_sandbox",
     "codex_exec_profile": "model.codex_exec_profile",
@@ -403,6 +415,9 @@ def load_config(args: argparse.Namespace) -> dict:
         elif backend in {"qwen", "qwen_chat"}:
             flat.setdefault("optimizer_backend", "openai_chat")
             flat.setdefault("target_backend", "qwen_chat")
+        elif backend in {"minimax", "minimax_chat"}:
+            flat.setdefault("optimizer_backend", "openai_chat")
+            flat.setdefault("target_backend", "minimax_chat")
         else:
             flat.setdefault("optimizer_backend", "openai_chat")
             flat.setdefault("target_backend", "openai_chat")
@@ -434,6 +449,15 @@ def load_config(args: argparse.Namespace) -> dict:
             and not _has_model_override("model.target", "target_model")
         ):
             flat["target_model"] = default_model_for_backend("qwen_chat")
+    if flat.get("target_backend") == "minimax_chat":
+        if (
+            str(flat.get("target_model", "") or "").strip() in _OPENAI_DEFAULT_MODEL_SENTINELS
+            and not _has_model_override("model.target", "target_model")
+        ):
+            flat["target_model"] = (
+                flat.get("minimax_model")
+                or default_model_for_backend("minimax_chat")
+            )
 
     # Auto-generate output root
     if not flat.get("out_root"):
diff --git a/skillopt/config.py b/skillopt/config.py
index c8328ac..211d020 100644
--- a/skillopt/config.py
+++ b/skillopt/config.py
@@ -79,6 +79,12 @@ _FLATTEN_MAP: dict[str, str] = {
     "model.qwen_chat_timeout_seconds": "qwen_chat_timeout_seconds",
     "model.qwen_chat_max_tokens": "qwen_chat_max_tokens",
     "model.qwen_chat_enable_thinking": "qwen_chat_enable_thinking",
+    "model.minimax_base_url": "minimax_base_url",
+    "model.minimax_api_key": "minimax_api_key",
+    "model.minimax_model": "minimax_model",
+    "model.minimax_temperature": "minimax_temperature",
+    "model.minimax_max_tokens": "minimax_max_tokens",
+    "model.minimax_enable_thinking": "minimax_enable_thinking",
     "train.num_epochs": "num_epochs",
     "train.train_size": "train_size",
     "train.steps_per_epoch": "steps_per_epoch",
diff --git a/skillopt/engine/trainer.py b/skillopt/engine/trainer.py
index 25058c0..8c887e6 100644
--- a/skillopt/engine/trainer.py
+++ b/skillopt/engine/trainer.py
@@ -51,6 +51,7 @@ from skillopt.model import (
     configure_azure_openai,
     configure_claude_code_exec,
     configure_codex_exec,
+    configure_minimax_chat,
     configure_qwen_chat,
     get_token_summary,
     reset_token_tracker,
@@ -636,6 +637,16 @@ class ReflACTTrainer:
             max_tokens=cfg.get("qwen_chat_max_tokens"),
             enable_thinking=cfg.get("qwen_chat_enable_thinking"),
         )
+        configure_minimax_chat(
+            base_url=cfg.get("minimax_base_url") or None,
+            api_key=cfg.get("minimax_api_key") or None,
+            temperature=cfg.get("minimax_temperature"),
+            max_tokens=cfg.get("minimax_max_tokens"),
+            enable_thinking=cfg.get("minimax_enable_thinking"),
+        )
+        minimax_model_cfg = cfg.get("minimax_model")
+        if minimax_model_cfg and cfg.get("target_backend") == "minimax_chat":
+            set_target_deployment(str(minimax_model_cfg))
         os.environ["REFLACT_CODEX_TRACE_TO_OPTIMIZER"] = (
             "1"
             if target_backend == "codex_exec" and cfg.get("codex_trace_to_optimizer", False)