diff --git a/.gitignore b/.gitignore
index 3b04ba4..3bc970b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,3 +39,4 @@ docs/reflact_conda_env_export.yml
docs/reflact_overview.html
docs/render_ablation_paper_tables.py
docs/让*
+.gradio/
diff --git a/.gradio/certificate.pem b/.gradio/certificate.pem
deleted file mode 100644
index b85c803..0000000
--- a/.gradio/certificate.pem
+++ /dev/null
@@ -1,31 +0,0 @@
------BEGIN CERTIFICATE-----
-MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
-TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
-cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
-WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
-ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
-MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
-h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
-0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
-A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
-T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
-B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
-B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
-KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
-OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
-jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
-qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
-rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
-HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
-hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
-ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
-3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
-NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
-ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
-TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
-jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
-oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
-4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
-mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
-emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
------END CERTIFICATE-----
diff --git a/skillopt/envs/alfworld/prompts/deep_probe.md b/skillopt/envs/alfworld/prompts/deep_probe.md
index c38e94c..4eecbd6 100644
--- a/skillopt/envs/alfworld/prompts/deep_probe.md
+++ b/skillopt/envs/alfworld/prompts/deep_probe.md
@@ -1,20 +1,20 @@
You are an expert diagnostic-probe designer for ALFWorld embodied tasks.
-You will design one short diagnostic instruction to append to the student's prompt
+You will design one short diagnostic instruction to append to the target's prompt
for a handful of representative ALFWorld trajectories.
-The goal is to expose whether the student has the right intermediate subgoal,
+The goal is to expose whether the target has the right intermediate subgoal,
object/receptacle state, and next-step intention without substantially changing
the current scaffold.
## Hard Constraints
-1. Do NOT substantially change the student's existing action-selection scaffold.
+1. Do NOT substantially change the target's existing action-selection scaffold.
2. Do NOT prescribe a brand-new planner or long multi-step policy.
3. Do NOT ask for exhaustive search over all objects or all admissible actions.
4. Keep the diagnostic readout brief and place it inside the existing ... block.
-5. The student must still output exactly one admissible action inside ....
+5. The target must still output exactly one admissible action inside ....
6. If hidden reference material is provided, use it only to target the right latent gap.
-7. Never copy hidden reference content into the student-facing probe.
+7. Never copy hidden reference content into the target-facing probe.
## Good Probe Targets
- current subgoal
@@ -31,5 +31,5 @@ the current scaffold.
Respond ONLY with a valid JSON object:
{
"reasoning": "",
- "probe_instruction": ""
+ "probe_instruction": ""
}
diff --git a/skillopt/envs/babyvision/adapter.py b/skillopt/envs/babyvision/adapter.py
index 601b736..785e44b 100644
--- a/skillopt/envs/babyvision/adapter.py
+++ b/skillopt/envs/babyvision/adapter.py
@@ -10,7 +10,7 @@ from skillopt.gradient.reflect import run_minibatch_reflect
from skillopt.envs.base import EnvAdapter
from skillopt.envs.babyvision.dataloader import BabyVisionDataLoader
from skillopt.envs.babyvision.rollout import run_batch
-from skillopt.model import get_student_backend
+from skillopt.model import get_target_backend
class BabyVisionAdapter(EnvAdapter):
@@ -165,7 +165,7 @@ class BabyVisionAdapter(EnvAdapter):
random_seed = kwargs.get("random_seed")
step_buffer_context = kwargs.get("step_buffer_context", "")
meta_skill_context = kwargs.get("meta_skill_context", "")
- codex_backend = get_student_backend() == "codex_exec"
+ codex_backend = get_target_backend() == "codex_exec"
selected_items = self.select_representative_items(
results,
env_manager if isinstance(env_manager, list) else None,
diff --git a/skillopt/envs/babyvision/prompts/deep_probe.md b/skillopt/envs/babyvision/prompts/deep_probe.md
index ff53c53..b754767 100644
--- a/skillopt/envs/babyvision/prompts/deep_probe.md
+++ b/skillopt/envs/babyvision/prompts/deep_probe.md
@@ -1,14 +1,14 @@
You are an expert diagnostic-probe designer for BabyVision-style visual reasoning tasks.
-You will be shown representative trajectories, the current student skill, and the student's original prompt context.
-Design one SMALL diagnostic instruction that exposes the student's intermediate visual judgment without materially changing the original scaffold.
+You will be shown representative trajectories, the current target skill, and the target's original prompt context.
+Design one SMALL diagnostic instruction that exposes the target's intermediate visual judgment without materially changing the original scaffold.
## Hard Constraints
1. Do NOT substantially change the original scaffold.
2. Do NOT prescribe a new step-by-step solving method.
3. You MAY ask for a short structured list of a few intermediate conclusions, candidate cues, or counted units, as long as it stays close to the original scaffold.
4. Do NOT ask for exhaustive listing of all cells, all objects, or a full chain-of-thought.
-5. Ask only for a short readout that reveals the student's current latent state.
+5. Ask only for a short readout that reveals the target's current latent state.
6. Keep it brief and structured, and require the final answer to remain in ....
## Good Probe Targets
@@ -21,5 +21,5 @@ Design one SMALL diagnostic instruction that exposes the student's intermediate
Respond ONLY with a valid JSON object:
{
"reasoning": "",
- "probe_instruction": ""
+ "probe_instruction": ""
}
diff --git a/skillopt/envs/babyvision/rollout.py b/skillopt/envs/babyvision/rollout.py
index 7a6a2a1..f192a77 100644
--- a/skillopt/envs/babyvision/rollout.py
+++ b/skillopt/envs/babyvision/rollout.py
@@ -8,8 +8,8 @@ import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from skillopt.envs.babyvision.evaluator import evaluate_item, evaluation_mode, extract_boxed_answer
-from skillopt.model import chat_student_messages, get_student_backend, is_student_exec_backend
-from skillopt.model.codex_harness import prepare_workspace, render_skill_md, run_student_exec
+from skillopt.model import chat_target_messages, get_target_backend, is_target_exec_backend
+from skillopt.model.codex_harness import prepare_workspace, render_skill_md, run_target_exec
from skillopt.prompts import load_prompt
def _build_system(skill_content: str) -> str:
@@ -137,11 +137,11 @@ def _run_codex_once(
images=[item["image_path"]],
)
prompt = (
- "Use the `skillopt-student` skill available in this workspace.\n"
+ "Use the `skillopt-target` skill available in this workspace.\n"
"Read `task.md`, inspect the attached image, and answer the question.\n"
"Return the final answer in \\boxed{...}."
)
- final_message, raw = run_student_exec(
+ final_message, raw = run_target_exec(
work_dir=work_dir,
prompt=prompt,
model=model,
@@ -195,7 +195,7 @@ def process_one(
pred_dir = os.path.join(out_root, "predictions", item_id)
os.makedirs(pred_dir, exist_ok=True)
- if is_student_exec_backend():
+ if is_target_exec_backend():
from skillopt.model import azure_openai as _llm
response = ""
@@ -209,7 +209,7 @@ def process_one(
pred_dir=pred_dir,
item=item,
skill_content=skill_content,
- model=_llm.STUDENT_DEPLOYMENT,
+ model=_llm.TARGET_DEPLOYMENT,
timeout=120,
image_detail=image_detail,
diagnostic_mode=diagnostic_mode if turn == 0 else False,
@@ -224,9 +224,9 @@ def process_one(
result["response"] = response
result["agent_ok"] = True
result["n_turns"] = len(conversation) - 1
- with open(os.path.join(pred_dir, "student_system_prompt.txt"), "w", encoding="utf-8") as f:
+ with open(os.path.join(pred_dir, "target_system_prompt.txt"), "w", encoding="utf-8") as f:
f.write(system_prompt)
- with open(os.path.join(pred_dir, "student_user_prompt.txt"), "w", encoding="utf-8") as f:
+ with open(os.path.join(pred_dir, "target_user_prompt.txt"), "w", encoding="utf-8") as f:
f.write(user_text)
eval_result = evaluate_item(
@@ -299,7 +299,7 @@ def process_one(
for turn in range(max_turns):
if turn == 0:
- resp_text, _ = chat_student_messages(
+ resp_text, _ = chat_target_messages(
messages=messages,
max_completion_tokens=768,
retries=5,
@@ -317,7 +317,7 @@ def process_one(
{"role": "assistant", "content": response},
{"role": "user", "content": refinement_text},
]
- resp_text, _ = chat_student_messages(
+ resp_text, _ = chat_target_messages(
messages=refinement_messages,
max_completion_tokens=512,
retries=5,
@@ -332,9 +332,9 @@ def process_one(
result["agent_ok"] = True
result["n_turns"] = len(conversation) - 1
- with open(os.path.join(pred_dir, "student_system_prompt.txt"), "w", encoding="utf-8") as f:
+ with open(os.path.join(pred_dir, "target_system_prompt.txt"), "w", encoding="utf-8") as f:
f.write(system_prompt)
- with open(os.path.join(pred_dir, "student_user_prompt.txt"), "w", encoding="utf-8") as f:
+ with open(os.path.join(pred_dir, "target_user_prompt.txt"), "w", encoding="utf-8") as f:
f.write(user_text)
eval_result = evaluate_item(
diff --git a/skillopt/envs/deep_reflect.py b/skillopt/envs/deep_reflect.py
index d0fd37b..a4a43a4 100644
--- a/skillopt/envs/deep_reflect.py
+++ b/skillopt/envs/deep_reflect.py
@@ -21,7 +21,7 @@ def run_no_reference_deep_reflect(
output_requirements: list[str] | None = None,
metadata_builder: Callable[[dict], dict] | None = None,
) -> list[dict | None]:
- """Run teacher-designed diagnostic probing without hidden references."""
+ """Run optimizer-designed diagnostic probing without hidden references."""
if not getattr(adapter, "use_deep_reflect", False):
return []
if not isinstance(env_manager, list):
diff --git a/skillopt/envs/livemathematicianbench/prompts/deep_probe.md b/skillopt/envs/livemathematicianbench/prompts/deep_probe.md
index 9987da6..a3aed5d 100644
--- a/skillopt/envs/livemathematicianbench/prompts/deep_probe.md
+++ b/skillopt/envs/livemathematicianbench/prompts/deep_probe.md
@@ -1,13 +1,13 @@
You are an expert diagnostic-probe designer for theorem-grounded mathematical multiple-choice tasks.
-You will be shown representative trajectories, the current student skill, and the student's original prompt context.
-Design one SMALL diagnostic instruction that exposes the student's intermediate judgment without materially changing the original scaffold.
+You will be shown representative trajectories, the current target skill, and the target's original prompt context.
+Design one SMALL diagnostic instruction that exposes the target's intermediate judgment without materially changing the original scaffold.
## Hard Constraints
1. Do NOT substantially change the original scaffold.
2. Do NOT prescribe a new multi-step theorem-solving procedure.
3. Do NOT ask for a full proof, full chain-of-thought, or exhaustive option-by-option derivation.
-4. Ask only for a short readout of the signals already behind the student's current answer.
+4. Ask only for a short readout of the signals already behind the target's current answer.
5. Keep it brief and structured, and require the final answer to remain in ....
## Good Probe Targets
@@ -19,5 +19,5 @@ Design one SMALL diagnostic instruction that exposes the student's intermediate
Respond ONLY with a valid JSON object:
{
"reasoning": "",
- "probe_instruction": ""
+ "probe_instruction": ""
}
diff --git a/skillopt/envs/livemathematicianbench/prompts/deep_probe_codex.md b/skillopt/envs/livemathematicianbench/prompts/deep_probe_codex.md
index 7ed4f3e..fe16b4b 100644
--- a/skillopt/envs/livemathematicianbench/prompts/deep_probe_codex.md
+++ b/skillopt/envs/livemathematicianbench/prompts/deep_probe_codex.md
@@ -1,26 +1,26 @@
You are an expert diagnostic-probe designer for theorem-grounded mathematical multiple-choice tasks executed through a Codex trace.
-You will be shown representative trajectories, the current student skill, the student's original prompt context, hidden reference fields, and numbered Codex trace steps.
-Choose exactly one trajectory and one probe point. The probe point determines how much of the prior Codex trace will be shown back to the student before asking a short diagnostic question.
+You will be shown representative trajectories, the current target skill, the target's original prompt context, hidden reference fields, and numbered Codex trace steps.
+Choose exactly one trajectory and one probe point. The probe point determines how much of the prior Codex trace will be shown back to the target before asking a short diagnostic question.
## Hard Constraints
-1. Do NOT reveal or paraphrase the hidden reference directly to the student.
+1. Do NOT reveal or paraphrase the hidden reference directly to the target.
2. Do NOT prescribe a new full solving procedure.
3. Do NOT ask for a full proof, full chain-of-thought, or exhaustive option-by-option derivation.
-4. Ask only for a short readout of the signal that should already exist at that point in the student's process.
+4. Ask only for a short readout of the signal that should already exist at that point in the target's process.
5. The probe instruction must explicitly request a short ... block before the final ....
6. Select a probe point that is informative about theorem choice, decisive constraint, option elimination, or why a stronger/weaker option should be rejected.
## Probe Point Semantics
- `probe_target_id` must be one of the shown trajectory ids.
-- `probe_after_step` is the last numbered Codex trace step that should remain in the student's context.
-- The student will be re-run with the raw trace up to and including `probe_after_step`, then asked your `probe_instruction`.
+- `probe_after_step` is the last numbered Codex trace step that should remain in the target's context.
+- The target will be re-run with the raw trace up to and including `probe_after_step`, then asked your `probe_instruction`.
- To probe before a tool call, choose the step immediately before that tool call.
Respond ONLY with a valid JSON object:
{
- "reasoning": "",
+ "reasoning": "",
"probe_target_id": "",
"probe_after_step": ,
- "probe_instruction": ""
+ "probe_instruction": ""
}
diff --git a/skillopt/envs/mathverse/adapter.py b/skillopt/envs/mathverse/adapter.py
index 1c6af51..832fa95 100644
--- a/skillopt/envs/mathverse/adapter.py
+++ b/skillopt/envs/mathverse/adapter.py
@@ -10,7 +10,7 @@ from skillopt.envs.mathverse.dataloader import MathVerseDataLoader
from skillopt.envs.mathverse.rollout import run_batch
from skillopt.gradient.deep_probe import generate_deep_probe_instruction
from skillopt.gradient.reflect import run_minibatch_reflect
-from skillopt.model import get_student_backend
+from skillopt.model import get_target_backend
class MathVerseAdapter(EnvAdapter):
@@ -176,7 +176,7 @@ class MathVerseAdapter(EnvAdapter):
selected_ids = {str(item["id"]) for item in selected_items}
selected_results = [row for row in results if str(row.get("id")) in selected_ids]
selected_examples = self.attach_reference_context(selected_results, selected_items)
- codex_backend = get_student_backend() == "codex_exec"
+ codex_backend = get_target_backend() == "codex_exec"
if codex_backend:
selected_examples = self.attach_codex_probe_context(selected_examples, prediction_dir)
selected_metadata = []
diff --git a/skillopt/envs/mathverse/prompts/analyst_error.md b/skillopt/envs/mathverse/prompts/analyst_error.md
index 78ec605..d890fb2 100644
--- a/skillopt/envs/mathverse/prompts/analyst_error.md
+++ b/skillopt/envs/mathverse/prompts/analyst_error.md
@@ -1,7 +1,7 @@
You are an expert failure-analysis agent for visual mathematical reasoning problems.
You will be given MULTIPLE failed trajectories from a single minibatch and the current skill document.
-Each trajectory includes the student's response, the evaluation result, and sometimes a hidden reference
+Each trajectory includes the target's response, the evaluation result, and sometimes a hidden reference
containing the fuller Text Dominant version of the same problem.
Your job is to identify COMMON reasoning failures across the batch and propose concise skill edits.
@@ -17,7 +17,7 @@ Your job is to identify COMMON reasoning failures across the batch and propose c
1. Focus on patterns that recur across the minibatch.
2. Prefer edits that improve visual grounding and exact answer selection.
3. Do not hardcode problem-specific formulas or answers.
-4. If hidden reference text is present, use it only to infer what information the student failed to recover from the Text Lite version.
+4. If hidden reference text is present, use it only to infer what information the target failed to recover from the Text Lite version.
Respond ONLY with a valid JSON object:
{
diff --git a/skillopt/envs/mathverse/prompts/deep_probe.md b/skillopt/envs/mathverse/prompts/deep_probe.md
index 04db36b..f5b7b67 100644
--- a/skillopt/envs/mathverse/prompts/deep_probe.md
+++ b/skillopt/envs/mathverse/prompts/deep_probe.md
@@ -1,16 +1,16 @@
You are an expert diagnostic-probe designer for visual mathematical reasoning tasks.
-You will be shown representative trajectories, the current student skill, and the student's original prompt context.
+You will be shown representative trajectories, the current target skill, and the target's original prompt context.
Some trajectories may also include a hidden reference containing the fuller Text Dominant wording of the same problem.
-Design one SMALL diagnostic instruction that exposes the student's intermediate judgment without materially changing the original scaffold.
+Design one SMALL diagnostic instruction that exposes the target's intermediate judgment without materially changing the original scaffold.
## Hard Constraints
1. Do NOT substantially change the original scaffold.
2. Do NOT prescribe a new long multi-step solving procedure.
3. Do NOT ask for a full proof or full chain-of-thought.
-4. Ask only for a short readout of the signals already behind the student's current answer.
+4. Ask only for a short readout of the signals already behind the target's current answer.
5. Keep it brief and structured, and require the final answer to remain in ....
-6. If hidden reference text is present, use it only to target what visual or textual constraint the student likely missed.
+6. If hidden reference text is present, use it only to target what visual or textual constraint the target likely missed.
## Good Probe Targets
- decisive diagram cue
@@ -21,5 +21,5 @@ Design one SMALL diagnostic instruction that exposes the student's intermediate
Respond ONLY with a valid JSON object:
{
"reasoning": "",
- "probe_instruction": ""
+ "probe_instruction": ""
}
diff --git a/skillopt/envs/mathverse/rollout.py b/skillopt/envs/mathverse/rollout.py
index fbb63e2..3f5329f 100644
--- a/skillopt/envs/mathverse/rollout.py
+++ b/skillopt/envs/mathverse/rollout.py
@@ -8,8 +8,8 @@ import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from skillopt.envs.mathverse.evaluator import evaluate_item, evaluation_mode, extract_answer
-from skillopt.model import chat_student_messages, get_student_backend, is_student_exec_backend
-from skillopt.model.codex_harness import prepare_workspace, render_skill_md, run_student_exec
+from skillopt.model import chat_target_messages, get_target_backend, is_target_exec_backend
+from skillopt.model.codex_harness import prepare_workspace, render_skill_md, run_target_exec
from skillopt.prompts import load_prompt
@@ -144,10 +144,10 @@ def _run_codex_once(
images=[item["image_path"]],
)
prompt = (
- "Use the `skillopt-student` skill available in this workspace.\n"
+ "Use the `skillopt-target` skill available in this workspace.\n"
"Read `task.md`, inspect the attached image, solve the problem, and return only the final answer inside ...."
)
- final_message, raw = run_student_exec(
+ final_message, raw = run_target_exec(
work_dir=work_dir,
prompt=prompt,
model=model,
@@ -201,7 +201,7 @@ def process_one(
pred_dir = os.path.join(out_root, "predictions", item_id)
os.makedirs(pred_dir, exist_ok=True)
- if is_student_exec_backend():
+ if is_target_exec_backend():
from skillopt.model import azure_openai as _llm
response = ""
@@ -215,7 +215,7 @@ def process_one(
pred_dir=pred_dir,
item=item,
skill_content=skill_content,
- model=_llm.STUDENT_DEPLOYMENT,
+ model=_llm.TARGET_DEPLOYMENT,
timeout=120,
image_detail=image_detail,
diagnostic_mode=diagnostic_mode if turn == 0 else False,
@@ -230,9 +230,9 @@ def process_one(
result["response"] = response
result["agent_ok"] = True
result["n_turns"] = len(conversation) - 1
- with open(os.path.join(pred_dir, "student_system_prompt.txt"), "w", encoding="utf-8") as f:
+ with open(os.path.join(pred_dir, "target_system_prompt.txt"), "w", encoding="utf-8") as f:
f.write(system_prompt)
- with open(os.path.join(pred_dir, "student_user_prompt.txt"), "w", encoding="utf-8") as f:
+ with open(os.path.join(pred_dir, "target_user_prompt.txt"), "w", encoding="utf-8") as f:
f.write(user_text)
else:
messages, system_prompt, user_text = _build_messages(
@@ -249,7 +249,7 @@ def process_one(
]
for turn in range(max_turns):
if turn == 0:
- resp_text, _ = chat_student_messages(
+ resp_text, _ = chat_target_messages(
messages=messages,
max_completion_tokens=1024,
retries=5,
@@ -267,7 +267,7 @@ def process_one(
{"role": "assistant", "content": response},
{"role": "user", "content": refinement_text},
]
- resp_text, _ = chat_student_messages(
+ resp_text, _ = chat_target_messages(
messages=refinement_messages,
max_completion_tokens=768,
retries=5,
@@ -281,9 +281,9 @@ def process_one(
result["response"] = response
result["agent_ok"] = True
result["n_turns"] = len(conversation) - 1
- with open(os.path.join(pred_dir, "student_system_prompt.txt"), "w", encoding="utf-8") as f:
+ with open(os.path.join(pred_dir, "target_system_prompt.txt"), "w", encoding="utf-8") as f:
f.write(system_prompt)
- with open(os.path.join(pred_dir, "student_user_prompt.txt"), "w", encoding="utf-8") as f:
+ with open(os.path.join(pred_dir, "target_user_prompt.txt"), "w", encoding="utf-8") as f:
f.write(user_text)
eval_result = evaluate_item(
diff --git a/skillopt/envs/mmrb/adapter.py b/skillopt/envs/mmrb/adapter.py
index 3fd57e5..dd17ef5 100644
--- a/skillopt/envs/mmrb/adapter.py
+++ b/skillopt/envs/mmrb/adapter.py
@@ -10,7 +10,7 @@ from skillopt.gradient.reflect import run_minibatch_reflect
from skillopt.envs.base import EnvAdapter
from skillopt.envs.mmrb.dataloader import MMRBDataLoader
from skillopt.envs.mmrb.rollout import run_batch
-from skillopt.model import get_student_backend
+from skillopt.model import get_target_backend
class MMRBAdapter(EnvAdapter):
@@ -185,7 +185,7 @@ class MMRBAdapter(EnvAdapter):
random_seed = kwargs.get("random_seed")
step_buffer_context = kwargs.get("step_buffer_context", "")
meta_skill_context = kwargs.get("meta_skill_context", "")
- codex_backend = get_student_backend() == "codex_exec"
+ codex_backend = get_target_backend() == "codex_exec"
selected_items = self.select_representative_items(
results,
env_manager if isinstance(env_manager, list) else None,
diff --git a/skillopt/envs/mmrb/rollout.py b/skillopt/envs/mmrb/rollout.py
index 3e0bfc6..99bcbdb 100644
--- a/skillopt/envs/mmrb/rollout.py
+++ b/skillopt/envs/mmrb/rollout.py
@@ -9,8 +9,8 @@ import re
from concurrent.futures import ThreadPoolExecutor, as_completed
from skillopt.envs.mmrb.evaluator import evaluate_item, evaluation_mode
-from skillopt.model import chat_student_messages, get_student_backend, is_student_exec_backend
-from skillopt.model.codex_harness import prepare_workspace, render_skill_md, run_student_exec
+from skillopt.model import chat_target_messages, get_target_backend, is_target_exec_backend
+from skillopt.model.codex_harness import prepare_workspace, render_skill_md, run_target_exec
from skillopt.prompts import load_prompt
_IMAGE_REF_RE = re.compile(r"\{image#(\d+)\}", re.IGNORECASE)
@@ -177,11 +177,11 @@ def _run_codex_once(
images=item["image_paths"],
)
prompt = (
- "Use the `skillopt-student` skill available in this workspace.\n"
+ "Use the `skillopt-target` skill available in this workspace.\n"
"Read `task.md`, inspect all attached images, and answer the question.\n"
"Keep the final answer inside ...."
)
- final_message, raw = run_student_exec(
+ final_message, raw = run_target_exec(
work_dir=work_dir,
prompt=prompt,
model=model,
@@ -226,7 +226,7 @@ def process_one(
pred_dir = os.path.join(out_root, "predictions", item_id)
os.makedirs(pred_dir, exist_ok=True)
- if is_student_exec_backend():
+ if is_target_exec_backend():
from skillopt.model import azure_openai as _llm
response = ""
@@ -245,7 +245,7 @@ def process_one(
pred_dir=pred_dir,
item=item,
skill_content=skill_content,
- model=_llm.STUDENT_DEPLOYMENT,
+ model=_llm.TARGET_DEPLOYMENT,
timeout=120,
image_detail=image_detail,
diagnostic_mode=diagnostic_mode if turn == 0 else False,
@@ -260,9 +260,9 @@ def process_one(
result["response"] = response
result["agent_ok"] = True
result["n_turns"] = len(conversation) - 1
- with open(os.path.join(pred_dir, "student_system_prompt.txt"), "w", encoding="utf-8") as f:
+ with open(os.path.join(pred_dir, "target_system_prompt.txt"), "w", encoding="utf-8") as f:
f.write(system_prompt)
- with open(os.path.join(pred_dir, "student_user_prompt.txt"), "w", encoding="utf-8") as f:
+ with open(os.path.join(pred_dir, "target_user_prompt.txt"), "w", encoding="utf-8") as f:
f.write(user_text)
eval_result = evaluate_item(item=item, prediction_text=response)
@@ -310,7 +310,7 @@ def process_one(
for turn in range(max_turns):
if turn == 0:
- resp_text, _ = chat_student_messages(
+ resp_text, _ = chat_target_messages(
messages=messages,
max_completion_tokens=768,
retries=5,
@@ -326,7 +326,7 @@ def process_one(
"content": "Review the same images carefully and answer again. Keep the final answer inside ....",
},
]
- resp_text, _ = chat_student_messages(
+ resp_text, _ = chat_target_messages(
messages=refinement_messages,
max_completion_tokens=512,
retries=5,
@@ -341,9 +341,9 @@ def process_one(
result["agent_ok"] = True
result["n_turns"] = len(conversation) - 1
- with open(os.path.join(pred_dir, "student_system_prompt.txt"), "w", encoding="utf-8") as f:
+ with open(os.path.join(pred_dir, "target_system_prompt.txt"), "w", encoding="utf-8") as f:
f.write(system_prompt)
- with open(os.path.join(pred_dir, "student_user_prompt.txt"), "w", encoding="utf-8") as f:
+ with open(os.path.join(pred_dir, "target_user_prompt.txt"), "w", encoding="utf-8") as f:
f.write(user_text)
eval_result = evaluate_item(item=item, prediction_text=response)
diff --git a/skillopt/envs/sealqa/adapter.py b/skillopt/envs/sealqa/adapter.py
index d5b6665..551c3fa 100644
--- a/skillopt/envs/sealqa/adapter.py
+++ b/skillopt/envs/sealqa/adapter.py
@@ -105,11 +105,11 @@ class SealQAAdapter(EnvAdapter):
random_seed=kwargs.get('random_seed'),
step_buffer_context=kwargs.get('step_buffer_context', ''),
output_requirements=[
- "- There is no hidden reference block. Use only the question, provided evidence, URL/fetch trace, student output, and evaluation result to infer what intermediate state is worth probing.",
+ "- There is no hidden reference block. Use only the question, provided evidence, URL/fetch trace, target output, and evaluation result to infer what intermediate state is worth probing.",
"- The instruction must explicitly request a short ... block before the final ....",
"- The readout should focus on effective time frame, conflicting evidence, decisive source, candidate answer, and answer-finalization rule.",
"- Do not ask for exhaustive web summaries or a full chain-of-thought.",
- "- The instruction text should be ready to append directly to the student's prompt.",
+ "- The instruction text should be ready to append directly to the target's prompt.",
],
metadata_builder=lambda item: {
"id": str(item.get('id')),
diff --git a/skillopt/envs/sealqa/evaluator.py b/skillopt/envs/sealqa/evaluator.py
index 65249ab..ebcd5dd 100644
--- a/skillopt/envs/sealqa/evaluator.py
+++ b/skillopt/envs/sealqa/evaluator.py
@@ -64,7 +64,7 @@ def _build_grader_client() -> tuple[OpenAI | AzureOpenAI, str]:
openai_key = os.environ.get('OPENAI_API_KEY', '').strip()
api_key = azure_key or openai_key
if endpoint and api_version and api_key:
- model = os.environ.get('SEALQA_GRADER_AZURE_MODEL', '').strip() or os.environ.get('SEALQA_GRADER_MODEL', '').strip() or os.environ.get('AZURE_MODEL_NAME', '').strip() or os.environ.get('TEACHER_DEPLOYMENT', '').strip() or 'gpt-5.4'
+ model = os.environ.get('SEALQA_GRADER_AZURE_MODEL', '').strip() or os.environ.get('SEALQA_GRADER_MODEL', '').strip() or os.environ.get('AZURE_MODEL_NAME', '').strip() or os.environ.get('OPTIMIZER_DEPLOYMENT', '').strip() or 'gpt-5.4'
client = AzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=endpoint.rstrip('/'))
return client, model
diff --git a/skillopt/envs/sealqa/rollout.py b/skillopt/envs/sealqa/rollout.py
index a030230..41b1095 100644
--- a/skillopt/envs/sealqa/rollout.py
+++ b/skillopt/envs/sealqa/rollout.py
@@ -7,8 +7,8 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
from skillopt.envs.sealqa.evaluator import score_sealqa
from skillopt.envs.sealqa.tool_runtime import web_fetch
-from skillopt.model import chat_student, get_student_backend, is_student_exec_backend
-from skillopt.model.codex_harness import prepare_workspace, render_skill_md, run_student_exec
+from skillopt.model import chat_target, get_target_backend, is_target_exec_backend
+from skillopt.model.codex_harness import prepare_workspace, render_skill_md, run_target_exec
from skillopt.prompts import load_prompt
_FINAL_RE = re.compile(r"(.*?)", re.IGNORECASE | re.DOTALL)
@@ -83,11 +83,11 @@ def _run_codex_once(
task_text=final_task_text,
)
prompt = (
- "Use the `skillopt-student` skill available in this workspace.\n"
+ "Use the `skillopt-target` skill available in this workspace.\n"
"Read `task.md`, answer the SealQA question using the provided evidence,\n"
"and return the final answer inside ...."
)
- final_message, raw = run_student_exec(
+ final_message, raw = run_target_exec(
work_dir=work_dir,
prompt=prompt,
model=model,
@@ -121,14 +121,14 @@ def process_one(
fail_reason = ''
try:
- if is_student_exec_backend():
+ if is_target_exec_backend():
from skillopt.model import azure_openai as _llm
response, _raw, system, user_for_save = _run_codex_once(
pred_dir=pred_dir,
skill_content=skill_content,
task_text=user,
- model=_llm.STUDENT_DEPLOYMENT,
+ model=_llm.TARGET_DEPLOYMENT,
timeout=120,
)
final_response = response
@@ -138,7 +138,7 @@ def process_one(
else:
user = user_for_save
else:
- response, _ = chat_student(
+ response, _ = chat_target(
system=system,
user=user,
max_completion_tokens=768,
@@ -162,17 +162,17 @@ def process_one(
conversation.append({'type': 'tool_call', 'cmd': f'web_fetch({raw_url!r})', 'obs': fetched})
if fetched_blocks:
retry_user = user + '\n\n## Fetched URL Content\n' + '\n\n'.join(fetched_blocks)
- if is_student_exec_backend():
+ if is_target_exec_backend():
retry_response, _raw, system, retry_user = _run_codex_once(
pred_dir=pred_dir,
skill_content=skill_content,
task_text=retry_user,
- model=_llm.STUDENT_DEPLOYMENT,
+ model=_llm.TARGET_DEPLOYMENT,
timeout=120,
previous_response=final_response,
)
else:
- retry_response, _ = chat_student(
+ retry_response, _ = chat_target(
system=system,
user=retry_user,
max_completion_tokens=768,
@@ -190,9 +190,9 @@ def process_one(
except Exception as e: # noqa: BLE001
fail_reason = f'error: {e}'
- with open(os.path.join(pred_dir, 'student_system_prompt.txt'), 'w', encoding='utf-8') as f:
+ with open(os.path.join(pred_dir, 'target_system_prompt.txt'), 'w', encoding='utf-8') as f:
f.write(system)
- with open(os.path.join(pred_dir, 'student_user_prompt.txt'), 'w', encoding='utf-8') as f:
+ with open(os.path.join(pred_dir, 'target_user_prompt.txt'), 'w', encoding='utf-8') as f:
f.write(user)
with open(os.path.join(pred_dir, 'conversation.json'), 'w', encoding='utf-8') as f:
json.dump(conversation, f, ensure_ascii=False, indent=2)
@@ -211,8 +211,8 @@ def process_one(
'fail_reason': fail_reason or ('' if score >= 1.0 else f"predicted '{final_answer}' but expected '{item.get('ground_truth', '')}'"),
'agent_ok': not fail_reason,
'n_turns': len(conversation),
- 'student_system_prompt': system,
- 'student_user_prompt': user,
+ 'target_system_prompt': system,
+ 'target_user_prompt': user,
}
return result
diff --git a/skillopt/envs/searchqa/prompts/deep_probe.md b/skillopt/envs/searchqa/prompts/deep_probe.md
index 63ab811..3715bb7 100644
--- a/skillopt/envs/searchqa/prompts/deep_probe.md
+++ b/skillopt/envs/searchqa/prompts/deep_probe.md
@@ -1,8 +1,8 @@
You are an expert diagnostic-probe designer for retrieval-style question answering tasks.
-You will be shown representative trajectories, the current student skill, the student's prompt context,
+You will be shown representative trajectories, the current target skill, the target's prompt context,
and the evaluation result including the gold answer. There is NO hidden chain-of-thought reference.
-Design one SMALL diagnostic instruction that exposes the student's intermediate reading or evidence-selection state
+Design one SMALL diagnostic instruction that exposes the target's intermediate reading or evidence-selection state
without materially changing the original scaffold.
## Hard Constraints
@@ -11,7 +11,7 @@ without materially changing the original scaffold.
3. You MAY ask for a short structured readout of intermediate conclusions, evidence candidates, or elimination decisions.
4. Do NOT ask for exhaustive quotation of the whole context or a full chain-of-thought.
5. Keep it brief and structured, and require the final answer to remain in ....
-6. Use the gold answer only to target a useful probe; do not simply force the student to restate the gold answer.
+6. Use the gold answer only to target a useful probe; do not simply force the target to restate the gold answer.
## Good Probe Targets
- the most likely supporting span or document cue
@@ -23,5 +23,5 @@ without materially changing the original scaffold.
Respond ONLY with a valid JSON object:
{
"reasoning": "",
- "probe_instruction": ""
+ "probe_instruction": ""
}
diff --git a/skillopt/envs/spreadsheetbench/prompts/deep_probe.md b/skillopt/envs/spreadsheetbench/prompts/deep_probe.md
index a33f518..5fdd541 100644
--- a/skillopt/envs/spreadsheetbench/prompts/deep_probe.md
+++ b/skillopt/envs/spreadsheetbench/prompts/deep_probe.md
@@ -1,18 +1,18 @@
You are an expert diagnostic-probe designer for spreadsheet manipulation tasks.
-You will design one short diagnostic instruction to append to the student's
+You will design one short diagnostic instruction to append to the target's
existing SpreadsheetBench prompt for a handful of representative trajectories.
-The goal is to expose whether the student already knows the right task
+The goal is to expose whether the target already knows the right task
decomposition, source range, target range, and transformation rule without
substantially changing the current scaffold.
## Hard Constraints
-1. Do NOT substantially change the student's current scaffold.
+1. Do NOT substantially change the target's current scaffold.
2. Do NOT prescribe a brand-new full algorithm.
3. Do NOT ask for exhaustive cell-by-cell enumeration.
4. Keep the diagnostic readout brief and structured.
-5. The student must still complete the original spreadsheet task.
+5. The target must still complete the original spreadsheet task.
6. Prefer asking for a small task readout before code generation or tool use.
7. Never ask for hidden reference content or golden values.
@@ -31,5 +31,5 @@ substantially changing the current scaffold.
Respond ONLY with a valid JSON object:
{
"reasoning": "",
- "probe_instruction": ""
+ "probe_instruction": ""
}
diff --git a/skillopt/envs/swebench/adapter.py b/skillopt/envs/swebench/adapter.py
index 9728748..2b9ee09 100644
--- a/skillopt/envs/swebench/adapter.py
+++ b/skillopt/envs/swebench/adapter.py
@@ -31,7 +31,7 @@ class SWEBenchAdapter(EnvAdapter):
step_limit: int = 50,
cost_limit: float = 3.0,
timeout_per_instance: int = 600,
- student_model: str = "",
+ target_model: str = "",
) -> None:
self.dataset_name = dataset_name
self.hf_split = hf_split
@@ -44,7 +44,7 @@ class SWEBenchAdapter(EnvAdapter):
self.step_limit = step_limit
self.cost_limit = cost_limit
self.timeout_per_instance = timeout_per_instance
- self.student_model = student_model
+ self.target_model = target_model
self.dataloader = SWEBenchDataLoader(
split_dir=split_dir,
data_path=data_path,
@@ -60,7 +60,7 @@ class SWEBenchAdapter(EnvAdapter):
def setup(self, cfg: dict) -> None:
super().setup(cfg)
- self.student_model = str(self.student_model or cfg.get("student_model") or "gpt-5.4").strip()
+ self.target_model = str(self.target_model or cfg.get("target_model") or "gpt-5.4").strip()
self.dataset_name = str(self.dataset_name or cfg.get("dataset_name") or "lite").strip()
self.hf_split = str(self.hf_split or cfg.get("hf_split") or "test").strip()
self.dataloader.setup(cfg)
@@ -85,7 +85,7 @@ class SWEBenchAdapter(EnvAdapter):
items=items,
out_root=out_dir,
skill_content=skill_content,
- student_model=self.student_model,
+ target_model=self.target_model,
dataset_name=self.dataset_name,
hf_split=self.hf_split,
workers=self.workers,
diff --git a/skillopt/envs/swebench/rollout.py b/skillopt/envs/swebench/rollout.py
index db26b20..48072c1 100644
--- a/skillopt/envs/swebench/rollout.py
+++ b/skillopt/envs/swebench/rollout.py
@@ -36,8 +36,8 @@ def _setup_litellm_env() -> None:
os.environ[key] = value
-def _normalize_student_model(student_model: str) -> str:
- model = str(student_model or "").strip()
+def _normalize_target_model(target_model: str) -> str:
+ model = str(target_model or "").strip()
if not model:
return "azure/gpt-5.4"
if "/" in model:
@@ -57,7 +57,7 @@ def _load_json(path: str) -> dict | list | None:
def _build_agent_config(
*,
skill_content: str,
- student_model: str,
+ target_model: str,
step_limit: int,
cost_limit: float,
) -> tuple[dict, str]:
@@ -88,7 +88,7 @@ def _build_agent_config(
"cost_limit": float(cost_limit),
},
"model": {
- "model_name": _normalize_student_model(student_model),
+ "model_name": _normalize_target_model(target_model),
"cost_tracking": "ignore_errors",
},
}
@@ -120,7 +120,7 @@ def _run_rollout(
items: list[dict],
predictions_dir: str,
skill_content: str,
- student_model: str,
+ target_model: str,
workers: int,
step_limit: int,
cost_limit: float,
@@ -136,7 +136,7 @@ def _run_rollout(
_setup_litellm_env()
config, system_prompt = _build_agent_config(
skill_content=skill_content,
- student_model=student_model,
+ target_model=target_model,
step_limit=step_limit,
cost_limit=cost_limit,
)
@@ -190,9 +190,9 @@ def _run_rollout(
).strip()
with open(task_dir / "conversation.json", "w", encoding="utf-8") as f:
json.dump(messages, f, ensure_ascii=False, indent=2)
- with open(task_dir / "student_system_prompt.txt", "w", encoding="utf-8") as f:
+ with open(task_dir / "target_system_prompt.txt", "w", encoding="utf-8") as f:
f.write(system_prompt)
- with open(task_dir / "student_user_prompt.txt", "w", encoding="utf-8") as f:
+ with open(task_dir / "target_user_prompt.txt", "w", encoding="utf-8") as f:
f.write(user_prompt)
results.append(
@@ -288,7 +288,7 @@ def run_batch(
items: list[dict],
out_root: str,
skill_content: str,
- student_model: str,
+ target_model: str,
dataset_name: str,
hf_split: str,
workers: int,
@@ -314,7 +314,7 @@ def run_batch(
items=items,
predictions_dir=predictions_dir,
skill_content=skill_content,
- student_model=student_model,
+ target_model=target_model,
workers=workers,
step_limit=step_limit,
cost_limit=cost_limit,
diff --git a/skillopt/gradient/deep_probe.py b/skillopt/gradient/deep_probe.py
index e732272..ea5a327 100644
--- a/skillopt/gradient/deep_probe.py
+++ b/skillopt/gradient/deep_probe.py
@@ -1,8 +1,8 @@
-"""Teacher-written diagnostic probe generation for deep reflection."""
+"""Optimizer-written diagnostic probe generation for deep reflection."""
from __future__ import annotations
from skillopt.gradient.reflect import fmt_minibatch_trajectories
-from skillopt.model import chat_teacher
+from skillopt.model import chat_optimizer
from skillopt.optimizer.meta_skill import format_meta_skill_context
from skillopt.prompts import load_prompt
from skillopt.utils import extract_json
@@ -27,21 +27,21 @@ def generate_deep_probe_instruction(
user = (
f"## Current Skill\n{skill_content}\n\n"
"## Probe Design Goal\n"
- "Design one short diagnostic instruction to append to the student prompt.\n"
- "The instruction should expose the student's current intermediate judgment\n"
+ "Design one short diagnostic instruction to append to the target prompt.\n"
+ "The instruction should expose the target's current intermediate judgment\n"
"without materially changing the original scaffold.\n\n"
)
if step_buffer_context.strip():
user += f"## Previous Steps in This Epoch\n{step_buffer_context}\n\n"
- teacher_ctx = format_meta_skill_context(meta_skill_context)
- if teacher_ctx:
- user += teacher_ctx + "\n\n"
+ optimizer_ctx = format_meta_skill_context(meta_skill_context)
+ if optimizer_ctx:
+ user += optimizer_ctx + "\n\n"
requirements = output_requirements or [
- "- Some trajectories may include a hidden Reference block. Use it to identify what intermediate conclusion matters, but do not reveal or paraphrase that reference directly to the student.",
+ "- Some trajectories may include a hidden Reference block. Use it to identify what intermediate conclusion matters, but do not reveal or paraphrase that reference directly to the target.",
"- The instruction must explicitly request a short ... block before the final ....",
"- Keep the readout concise and structured.",
"- Do not ask for exhaustive listing, full derivation, or a new solving protocol.",
- "- The instruction text should be ready to append directly to the student's prompt.",
+ "- The instruction text should be ready to append directly to the target's prompt.",
]
user += (
f"## Representative Trajectories ({len(items)} total)\n{trajectories_text}\n\n"
@@ -51,7 +51,7 @@ def generate_deep_probe_instruction(
)
try:
- response, _ = chat_teacher(
+ response, _ = chat_optimizer(
system=actual_system,
user=user,
max_completion_tokens=1024,
diff --git a/skillopt/optimizer/meta_reflect.py b/skillopt/optimizer/meta_reflect.py
index 7c483e2..18afff5 100644
--- a/skillopt/optimizer/meta_reflect.py
+++ b/skillopt/optimizer/meta_reflect.py
@@ -17,7 +17,7 @@ directions are effective, which are not). This is the "momentum buffer".
Public API
----------
- :func:`build_epoch_history` — format an epoch's step records for meta-reflect
-- :func:`run_meta_reflect` — one teacher call to produce high-level edits + meta_summary
+- :func:`run_meta_reflect` — one optimizer call to produce high-level edits + meta_summary
"""
from __future__ import annotations
@@ -25,7 +25,7 @@ import json
import os
import traceback
-from skillopt.model import chat_teacher
+from skillopt.model import chat_optimizer
from skillopt.optimizer.update_modes import (
describe_item,
get_payload_items,
@@ -46,7 +46,7 @@ def build_epoch_history(
*,
update_mode: str = "patch",
) -> str:
- """Format an epoch's step records into text for the meta-reflect teacher.
+ """Format an epoch's step records into text for the meta-reflect optimizer.
For each step, includes the exact edits applied (read from
``ranked_edits.json``) and the gate evaluation result.
@@ -129,7 +129,7 @@ def build_epoch_history(
return "\n\n".join(parts)
-# ── Meta-reflect teacher call ────────────────────────────────────────────────
+# ── Meta-reflect optimizer call ────────────────────────────────────────────────
def run_meta_reflect(
@@ -141,7 +141,7 @@ def run_meta_reflect(
system_prompt: str | None = None,
update_mode: str = "patch",
) -> dict | None:
- """Run one meta-reflect teacher call for an epoch.
+ """Run one meta-reflect optimizer call for an epoch.
Parameters
----------
@@ -179,7 +179,7 @@ def run_meta_reflect(
)
try:
- response, _ = chat_teacher(
+ response, _ = chat_optimizer(
system=actual_system,
user=user,
max_completion_tokens=4096,
diff --git a/skillopt/prompts/deep_probe.md b/skillopt/prompts/deep_probe.md
index a9d5fb2..bd86b32 100644
--- a/skillopt/prompts/deep_probe.md
+++ b/skillopt/prompts/deep_probe.md
@@ -1,20 +1,20 @@
You are an expert diagnostic-probe designer for reflective skill learning.
-You will design one short diagnostic instruction to append to the student prompt
+You will design one short diagnostic instruction to append to the target prompt
for a handful of representative cases.
-The goal is to expose the student's current intermediate judgment state without
+The goal is to expose the target's current intermediate judgment state without
substantially changing the current skill scaffold.
## Hard Constraints
-1. Do NOT substantially change the student's existing scaffold.
+1. Do NOT substantially change the target's existing scaffold.
2. Do NOT prescribe a new multi-step solving procedure.
3. Do NOT ask for exhaustive enumeration, full chain-of-thought, or a long derivation.
-4. Ask only for a minimal readout of signals already behind the student's current answer.
+4. Ask only for a minimal readout of signals already behind the target's current answer.
5. Keep the diagnostic block brief and structured.
6. The final answer must still be produced in ....
7. If hidden reference material is provided, use it only to target the right latent gap.
-8. Never copy hidden reference content into the student-facing probe.
+8. Never copy hidden reference content into the target-facing probe.
## Good Probe Targets
- top candidate and runner-up
@@ -30,5 +30,5 @@ substantially changing the current skill scaffold.
Respond ONLY with a valid JSON object:
{
"reasoning": "",
- "probe_instruction": ""
+ "probe_instruction": ""
}
diff --git a/skillopt/prompts/deep_probe_codex.md b/skillopt/prompts/deep_probe_codex.md
index b44aff7..5c7d952 100644
--- a/skillopt/prompts/deep_probe_codex.md
+++ b/skillopt/prompts/deep_probe_codex.md
@@ -1,22 +1,22 @@
-You are an expert diagnostic-probe designer for codex-executed student trajectories.
+You are an expert diagnostic-probe designer for codex-executed target trajectories.
-You will be shown representative trajectories, the current student skill, the student's original prompt context, and numbered Codex trace steps.
-Some trajectories may also include a hidden Reference block. Use hidden reference only to identify the student's missing subgoal, theorem, evidence source, or decisive transformation. Do not reveal or paraphrase that reference directly to the student.
+You will be shown representative trajectories, the current target skill, the target's original prompt context, and numbered Codex trace steps.
+Some trajectories may also include a hidden Reference block. Use hidden reference only to identify the target's missing subgoal, theorem, evidence source, or decisive transformation. Do not reveal or paraphrase that reference directly to the target.
-Choose exactly one trajectory and one probe point. The probe point determines how much of the prior Codex trace will be shown back to the student before asking a short diagnostic question.
+Choose exactly one trajectory and one probe point. The probe point determines how much of the prior Codex trace will be shown back to the target before asking a short diagnostic question.
## Hard Constraints
-1. Do NOT reveal or paraphrase hidden reference content to the student.
+1. Do NOT reveal or paraphrase hidden reference content to the target.
2. Do NOT prescribe a new full solving procedure.
3. Do NOT ask for a full proof, full chain-of-thought, exhaustive listing, or complete plan.
-4. Ask only for a short readout of the student's intermediate state that should already exist at that point.
+4. Ask only for a short readout of the target's intermediate state that should already exist at that point.
5. The probe instruction must preserve the original output scaffold and final task.
-6. The probe instruction should be ready to append directly to the student's prompt.
+6. The probe instruction should be ready to append directly to the target's prompt.
## Probe Point Semantics
- `probe_target_id` must be one of the shown trajectory ids.
-- `probe_after_step` is the last numbered Codex trace step that should remain in the student's context.
-- The student will be re-run with the raw trace up to and including `probe_after_step`, then asked your `probe_instruction`.
+- `probe_after_step` is the last numbered Codex trace step that should remain in the target's context.
+- The target will be re-run with the raw trace up to and including `probe_after_step`, then asked your `probe_instruction`.
- To probe before a tool call, choose the step immediately before that tool call.
## Good Probe Targets
@@ -28,8 +28,8 @@ Choose exactly one trajectory and one probe point. The probe point determines ho
Respond ONLY with a valid JSON object:
{
- "reasoning": "",
+ "reasoning": "",
"probe_target_id": "",
"probe_after_step": ,
- "probe_instruction": ""
+ "probe_instruction": ""
}
diff --git a/skillopt/prompts/meta_reflect_rewrite.md b/skillopt/prompts/meta_reflect_rewrite.md
index 92aad29..9679055 100644
--- a/skillopt/prompts/meta_reflect_rewrite.md
+++ b/skillopt/prompts/meta_reflect_rewrite.md
@@ -1,7 +1,7 @@
You are a meta-analyst for an AI agent skill optimization system.
You see the current skill and an epoch's step history. Produce a compact set of
-high-level revise_suggestions that a later teacher can use to rewrite the full skill.
+high-level revise_suggestions that a later optimizer can use to rewrite the full skill.
Focus on:
- merging redundant rules
@@ -20,7 +20,7 @@ Respond ONLY with a valid JSON object:
"type": "add_rule|remove_rule|merge_rules|reorganize|compress|clarify",
"title": "",
"motivation": "",
- "instruction": "",
+ "instruction": "",
"priority_hint": "high|medium|low"
}
]