mirror of
https://github.com/microsoft/SkillOpt.git
synced 2026-07-03 14:02:58 +08:00
176 lines
6.5 KiB
Bash
Executable File
176 lines
6.5 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
REPO="/home/azureuser/workspace-gzy/SkillReflection"
|
|
PYTHON="/home/azureuser/workspace-gzy/miniconda3/envs/skillopt/bin/python"
|
|
|
|
cd "$REPO"
|
|
|
|
export ANTHROPIC_BASE_URL="${ANTHROPIC_BASE_URL:-http://127.0.0.1:4343}"
|
|
export ANTHROPIC_AUTH_TOKEN="${ANTHROPIC_AUTH_TOKEN:-dummy}"
|
|
export ANTHROPIC_MODEL="${ANTHROPIC_MODEL:-claude-sonnet-4-6}"
|
|
export ANTHROPIC_SMALL_FAST_MODEL="${ANTHROPIC_SMALL_FAST_MODEL:-claude-sonnet-4-6}"
|
|
export DISABLE_NON_ESSENTIAL_MODEL_CALLS="${DISABLE_NON_ESSENTIAL_MODEL_CALLS:-1}"
|
|
export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC="${CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC:-1}"
|
|
|
|
if [[ -f ".secrets/teacher_oaidr9.env" ]]; then
|
|
# shellcheck disable=SC1091
|
|
source ".secrets/teacher_oaidr9.env"
|
|
else
|
|
echo "missing .secrets/teacher_oaidr9.env" >&2
|
|
exit 1
|
|
fi
|
|
|
|
stamp="$(date -u +%Y%m%d_%H%M%S)"
|
|
RUN_ROOT="${1:-outputs/spreadsheet_full_replacements_workers2_timeout1020_${stamp}_run}"
|
|
SESSION="${2:-spreadsheet_full_replacements_${stamp}}"
|
|
|
|
mkdir -p "$RUN_ROOT/logs" "$RUN_ROOT/commands"
|
|
|
|
tmux_started=0
|
|
|
|
launch_run() {
|
|
local run_id="$1"
|
|
shift
|
|
|
|
local cmd_file="$RUN_ROOT/commands/${run_id}.sh"
|
|
local log_file="$RUN_ROOT/logs/${run_id}.log"
|
|
|
|
{
|
|
echo "#!/usr/bin/env bash"
|
|
echo "set -euo pipefail"
|
|
echo "cd '$REPO'"
|
|
printf 'export ANTHROPIC_BASE_URL=%q\n' "$ANTHROPIC_BASE_URL"
|
|
printf 'export ANTHROPIC_AUTH_TOKEN=%q\n' "$ANTHROPIC_AUTH_TOKEN"
|
|
printf 'export ANTHROPIC_MODEL=%q\n' "$ANTHROPIC_MODEL"
|
|
printf 'export ANTHROPIC_SMALL_FAST_MODEL=%q\n' "$ANTHROPIC_SMALL_FAST_MODEL"
|
|
printf 'export DISABLE_NON_ESSENTIAL_MODEL_CALLS=%q\n' "$DISABLE_NON_ESSENTIAL_MODEL_CALLS"
|
|
printf 'export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=%q\n' "$CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC"
|
|
printf '%q ' "$@"
|
|
printf ' >%q 2>&1 < /dev/null\n' "$log_file"
|
|
} > "$cmd_file"
|
|
chmod +x "$cmd_file"
|
|
|
|
if [[ "$tmux_started" -eq 0 ]]; then
|
|
tmux new-session -d -s "$SESSION" -n "$run_id" "bash '$cmd_file'; code=\$?; echo EXIT:\$code; sleep 3600"
|
|
tmux_started=1
|
|
else
|
|
tmux new-window -t "$SESSION" -n "$run_id" "bash '$cmd_file'; code=\$?; echo EXIT:\$code; sleep 3600"
|
|
fi
|
|
echo "$run_id"
|
|
}
|
|
|
|
OPENAI_COMMON=(
|
|
"$PYTHON" -u scripts/train.py
|
|
--config configs/spreadsheetbench/default.yaml
|
|
--cfg-options
|
|
model.teacher_backend=openai_chat
|
|
model.student_backend=openai_chat
|
|
model.teacher=gpt-5.5
|
|
model.student=gpt-5.5
|
|
model.teacher_azure_openai_endpoint="${TEACHER_AZURE_OPENAI_ENDPOINT}"
|
|
model.teacher_azure_openai_api_version="${TEACHER_AZURE_OPENAI_API_VERSION}"
|
|
model.teacher_azure_openai_auth_mode="${TEACHER_AZURE_OPENAI_AUTH_MODE}"
|
|
model.teacher_azure_openai_managed_identity_client_id="${TEACHER_AZURE_OPENAI_MANAGED_IDENTITY_CLIENT_ID}"
|
|
model.teacher_azure_openai_ad_scope="${TEACHER_AZURE_OPENAI_AD_SCOPE}"
|
|
model.student_azure_openai_endpoint="${STUDENT_AZURE_OPENAI_ENDPOINT:-$TEACHER_AZURE_OPENAI_ENDPOINT}"
|
|
model.student_azure_openai_api_version="${STUDENT_AZURE_OPENAI_API_VERSION:-$TEACHER_AZURE_OPENAI_API_VERSION}"
|
|
model.student_azure_openai_auth_mode="${STUDENT_AZURE_OPENAI_AUTH_MODE:-$TEACHER_AZURE_OPENAI_AUTH_MODE}"
|
|
model.student_azure_openai_managed_identity_client_id="${STUDENT_AZURE_OPENAI_MANAGED_IDENTITY_CLIENT_ID:-$TEACHER_AZURE_OPENAI_MANAGED_IDENTITY_CLIENT_ID}"
|
|
model.student_azure_openai_ad_scope="${STUDENT_AZURE_OPENAI_AD_SCOPE:-$TEACHER_AZURE_OPENAI_AD_SCOPE}"
|
|
model.reasoning_effort=medium
|
|
train.num_epochs=4
|
|
train.train_size=0
|
|
train.batch_size=40
|
|
train.accumulation=1
|
|
train.seed=42
|
|
gradient.minibatch_size=8
|
|
gradient.merge_batch_size=8
|
|
gradient.analyst_workers=16
|
|
gradient.use_deep_reflect=false
|
|
optimizer.learning_rate=4
|
|
optimizer.min_learning_rate=2
|
|
optimizer.lr_scheduler=cosine
|
|
optimizer.use_slow_update=true
|
|
optimizer.slow_update_samples=20
|
|
optimizer.use_meta_reflect=false
|
|
evaluation.use_gate=true
|
|
evaluation.eval_test=true
|
|
env.split_mode=split_dir
|
|
env.workers=2
|
|
env.exec_timeout=1020
|
|
env.split_dir=data/ablation_splits/spreadsheetbench/2-1-7_seed42
|
|
env.data_root=data/spreadsheetbench_verified_400
|
|
env.mode=multi
|
|
)
|
|
|
|
HARNESS_RUN="HARNESS-ClaudeInit-Spreadsheet-lr4-multi-full"
|
|
launch_run "$HARNESS_RUN" \
|
|
"$PYTHON" -u scripts/train.py \
|
|
--config configs/spreadsheetbench/default.yaml \
|
|
--cfg-options \
|
|
model.teacher_backend=openai_chat \
|
|
model.teacher=gpt-5.5 \
|
|
model.teacher_azure_openai_endpoint="${TEACHER_AZURE_OPENAI_ENDPOINT}" \
|
|
model.teacher_azure_openai_api_version="${TEACHER_AZURE_OPENAI_API_VERSION}" \
|
|
model.teacher_azure_openai_auth_mode="${TEACHER_AZURE_OPENAI_AUTH_MODE}" \
|
|
model.teacher_azure_openai_managed_identity_client_id="${TEACHER_AZURE_OPENAI_MANAGED_IDENTITY_CLIENT_ID}" \
|
|
model.teacher_azure_openai_ad_scope="${TEACHER_AZURE_OPENAI_AD_SCOPE}" \
|
|
model.reasoning_effort=medium \
|
|
train.num_epochs=4 \
|
|
train.train_size=0 \
|
|
train.batch_size=40 \
|
|
train.accumulation=1 \
|
|
train.seed=42 \
|
|
gradient.minibatch_size=8 \
|
|
gradient.merge_batch_size=8 \
|
|
gradient.analyst_workers=16 \
|
|
gradient.use_deep_reflect=false \
|
|
optimizer.lr_control_mode=fixed \
|
|
optimizer.skill_update_mode=patch \
|
|
optimizer.use_slow_update=true \
|
|
optimizer.slow_update_samples=20 \
|
|
optimizer.use_meta_skill=true \
|
|
optimizer.use_meta_reflect=false \
|
|
evaluation.use_gate=true \
|
|
evaluation.eval_test=true \
|
|
env.split_mode=split_dir \
|
|
env.workers=2 \
|
|
env.exec_timeout=1020 \
|
|
model.student_backend=claude_code_exec \
|
|
model.student=claude-sonnet-4-6 \
|
|
model.claude_code_exec_use_sdk=sdk \
|
|
model.claude_code_exec_effort=medium \
|
|
model.claude_code_exec_max_thinking_tokens=16384 \
|
|
model.codex_trace_to_teacher=false \
|
|
env.out_root="$RUN_ROOT/$HARNESS_RUN" \
|
|
env.split_dir=data/spreadsheetbench/splits \
|
|
env.data_root=data/spreadsheetbench/files \
|
|
env.mode=multi \
|
|
optimizer.learning_rate=4 \
|
|
optimizer.min_learning_rate=1 \
|
|
optimizer.lr_scheduler=constant
|
|
|
|
for repeat in r1 r2 r3; do
|
|
run_id="LRCTRL-spreadsheetbench-full-rewrite-neutral3-full-${repeat}"
|
|
launch_run "$run_id" \
|
|
"${OPENAI_COMMON[@]}" \
|
|
optimizer.lr_control_mode=none \
|
|
optimizer.skill_update_mode=full_rewrite_minibatch \
|
|
optimizer.use_meta_skill=true \
|
|
env.out_root="$RUN_ROOT/$run_id"
|
|
done
|
|
|
|
for repeat in r1 r2 r3; do
|
|
run_id="SLOWMETA-spreadsheetbench-true-false-full-${repeat}"
|
|
launch_run "$run_id" \
|
|
"${OPENAI_COMMON[@]}" \
|
|
optimizer.lr_control_mode=fixed \
|
|
optimizer.skill_update_mode=patch \
|
|
optimizer.use_meta_skill=false \
|
|
env.out_root="$RUN_ROOT/$run_id"
|
|
done
|
|
|
|
echo "RUN_ROOT=$RUN_ROOT"
|
|
echo "SESSION=$SESSION"
|