diff --git a/scripts/train.py b/scripts/train.py index 4d9473f..d4acce6 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -137,7 +137,7 @@ def parse_args() -> argparse.Namespace: # Legacy flat CLI overrides (still work, prefer --cfg-options for new usage) p.add_argument("--env", type=str) p.add_argument("--backend", type=str, - choices=["azure_openai", "codex", "codex_exec", "claude", "claude_chat", "claude_code_exec", "qwen", "qwen_chat"]) + choices=["azure_openai", "codex", "codex_exec", "claude", "claude_chat", "claude_code_exec", "qwen", "qwen_chat", "minimax", "minimax_chat"]) p.add_argument("--optimizer_model", type=str) p.add_argument("--target_model", type=str) p.add_argument("--optimizer_backend", type=str) @@ -173,6 +173,12 @@ def parse_args() -> argparse.Namespace: p.add_argument("--qwen_chat_timeout_seconds", type=float) p.add_argument("--qwen_chat_max_tokens", type=int) p.add_argument("--qwen_chat_enable_thinking", type=_BOOL) + p.add_argument("--minimax_base_url", type=str) + p.add_argument("--minimax_api_key", type=str) + p.add_argument("--minimax_model", type=str) + p.add_argument("--minimax_temperature", type=float) + p.add_argument("--minimax_max_tokens", type=int) + p.add_argument("--minimax_enable_thinking", type=_BOOL) p.add_argument("--codex_exec_path", type=str) p.add_argument("--codex_exec_sandbox", type=str) p.add_argument("--codex_exec_profile", type=str) @@ -289,6 +295,12 @@ _LEGACY_TO_STRUCTURED: dict[str, str] = { "qwen_chat_timeout_seconds": "model.qwen_chat_timeout_seconds", "qwen_chat_max_tokens": "model.qwen_chat_max_tokens", "qwen_chat_enable_thinking": "model.qwen_chat_enable_thinking", + "minimax_base_url": "model.minimax_base_url", + "minimax_api_key": "model.minimax_api_key", + "minimax_model": "model.minimax_model", + "minimax_temperature": "model.minimax_temperature", + "minimax_max_tokens": "model.minimax_max_tokens", + "minimax_enable_thinking": "model.minimax_enable_thinking", "codex_exec_path": "model.codex_exec_path", "codex_exec_sandbox": "model.codex_exec_sandbox", "codex_exec_profile": "model.codex_exec_profile", @@ -403,6 +415,9 @@ def load_config(args: argparse.Namespace) -> dict: elif backend in {"qwen", "qwen_chat"}: flat.setdefault("optimizer_backend", "openai_chat") flat.setdefault("target_backend", "qwen_chat") + elif backend in {"minimax", "minimax_chat"}: + flat.setdefault("optimizer_backend", "openai_chat") + flat.setdefault("target_backend", "minimax_chat") else: flat.setdefault("optimizer_backend", "openai_chat") flat.setdefault("target_backend", "openai_chat") @@ -434,6 +449,15 @@ def load_config(args: argparse.Namespace) -> dict: and not _has_model_override("model.target", "target_model") ): flat["target_model"] = default_model_for_backend("qwen_chat") + if flat.get("target_backend") == "minimax_chat": + if ( + str(flat.get("target_model", "") or "").strip() in _OPENAI_DEFAULT_MODEL_SENTINELS + and not _has_model_override("model.target", "target_model") + ): + flat["target_model"] = ( + flat.get("minimax_model") + or default_model_for_backend("minimax_chat") + ) # Auto-generate output root if not flat.get("out_root"): diff --git a/skillopt/config.py b/skillopt/config.py index c8328ac..211d020 100644 --- a/skillopt/config.py +++ b/skillopt/config.py @@ -79,6 +79,12 @@ _FLATTEN_MAP: dict[str, str] = { "model.qwen_chat_timeout_seconds": "qwen_chat_timeout_seconds", "model.qwen_chat_max_tokens": "qwen_chat_max_tokens", "model.qwen_chat_enable_thinking": "qwen_chat_enable_thinking", + "model.minimax_base_url": "minimax_base_url", + "model.minimax_api_key": "minimax_api_key", + "model.minimax_model": "minimax_model", + "model.minimax_temperature": "minimax_temperature", + "model.minimax_max_tokens": "minimax_max_tokens", + "model.minimax_enable_thinking": "minimax_enable_thinking", "train.num_epochs": "num_epochs", "train.train_size": "train_size", "train.steps_per_epoch": "steps_per_epoch", diff --git a/skillopt/engine/trainer.py b/skillopt/engine/trainer.py index 25058c0..8c887e6 100644 --- a/skillopt/engine/trainer.py +++ b/skillopt/engine/trainer.py @@ -51,6 +51,7 @@ from skillopt.model import ( configure_azure_openai, configure_claude_code_exec, configure_codex_exec, + configure_minimax_chat, configure_qwen_chat, get_token_summary, reset_token_tracker, @@ -636,6 +637,16 @@ class ReflACTTrainer: max_tokens=cfg.get("qwen_chat_max_tokens"), enable_thinking=cfg.get("qwen_chat_enable_thinking"), ) + configure_minimax_chat( + base_url=cfg.get("minimax_base_url") or None, + api_key=cfg.get("minimax_api_key") or None, + temperature=cfg.get("minimax_temperature"), + max_tokens=cfg.get("minimax_max_tokens"), + enable_thinking=cfg.get("minimax_enable_thinking"), + ) + minimax_model_cfg = cfg.get("minimax_model") + if minimax_model_cfg and cfg.get("target_backend") == "minimax_chat": + set_target_deployment(str(minimax_model_cfg)) os.environ["REFLACT_CODEX_TRACE_TO_OPTIMIZER"] = ( "1" if target_backend == "codex_exec" and cfg.get("codex_trace_to_optimizer", False)