-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathrun_swe_bench_phase1.sh
More file actions
executable file
·121 lines (111 loc) · 4.3 KB
/
run_swe_bench_phase1.sh
File metadata and controls
executable file
·121 lines (111 loc) · 4.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/bin/bash
#
# Phase 1: Generate patches for SWE-bench instances.
#
# Supports two strategies:
# - naive: Single-shot vLLM inference from pre-built prompts.
# - agent: Agentic loop using mini-swe-agent (or any agent via YAML config)
# running inside K8s Jobs with SWE-bench container images.
#
# Prerequisites:
# - RayCluster deployed
# - vLLM server deployed and accessible from the cluster
# - Port-forward active: oc port-forward svc/<ray-head-svc> 8265:8265
# - For naive: prompted dataset built
# - For agent: SWE-bench images available (DockerHub or internal registry)
#
# Usage:
# # Naive strategy (default)
# bash run_swe_bench_phase1.sh
#
# # Agent strategy
# STRATEGY=agent bash run_swe_bench_phase1.sh
#
# # Quick test with 2 instances
# STRATEGY=agent INSTANCE_LIMIT=2 bash run_swe_bench_phase1.sh
set -euo pipefail
# ── Common config ───────────────────────────────────────────────
STRATEGY="${STRATEGY:-naive}"
RAY_ADDRESS="${RAY_ADDRESS:-http://127.0.0.1:8265}"
VLLM_URL="${VLLM_URL:-http://vllm-server:8000/v1}"
MODEL_NAME="${MODEL_NAME:-Qwen/Qwen3-1.7B}"
DATASET="${DATASET:-SWE-bench/SWE-bench_Verified}"
SPLIT="${SPLIT:-test}"
NUM_WORKERS="${NUM_WORKERS:-2}"
INSTANCE_LIMIT="${INSTANCE_LIMIT:-0}"
RUN_ID="${RUN_ID:-eval-run}"
OUTPUT_DIR="${OUTPUT_DIR:-/tmp/swe-bench-results/${RUN_ID}}"
S3_BUCKET="${S3_BUCKET:-swe-bench}"
S3_OUTPUT="${S3_OUTPUT:-s3://${S3_BUCKET}/runs/${RUN_ID}/predictions.jsonl}"
MLFLOW_TRACKING_URI="${MLFLOW_TRACKING_URI:-}"
# ── Naive strategy config ──────────────────────────────────────
PROMPTS="${PROMPTS:-s3://${S3_BUCKET}/verified/prompts/style-3-oracle.jsonl}"
MAX_TOKENS="${MAX_TOKENS:-16000}"
TEMPERATURE="${TEMPERATURE:-0.15}"
# ── Agent strategy config ──────────────────────────────────────
AGENT_CONFIG="${AGENT_CONFIG:-evals/swe_bench/agents/mini_swe_agent.yaml}"
MODEL_API_KEY="${MODEL_API_KEY:-dummy}"
STEP_LIMIT="${STEP_LIMIT:-150}"
COST_LIMIT="${COST_LIMIT:-3.0}"
K8S_NAMESPACE="${K8S_NAMESPACE:-}"
SERVICE_ACCOUNT="${SERVICE_ACCOUNT:-swe-bench-eval}"
IMAGE_REGISTRY="${IMAGE_REGISTRY:-}"
SWEBENCH_NAMESPACE="${SWEBENCH_NAMESPACE:-swebench}"
MAX_CONCURRENT_JOBS="${MAX_CONCURRENT_JOBS:-4}"
JOB_TIMEOUT="${JOB_TIMEOUT:-600}"
RUN_EVAL="${RUN_EVAL:-1}"
if [[ "${DEBUG:-0}" == "1" ]]; then
set -x
fi
# ── Build command ───────────────────────────────────────────────
CMD_ARGS=(
python3 -m evals.swe_bench.run_patch_generation
--strategy "${STRATEGY}"
--vllm-url "${VLLM_URL}"
--model-name "${MODEL_NAME}"
--dataset "${DATASET}"
--split "${SPLIT}"
--num-workers "${NUM_WORKERS}"
--output-dir "${OUTPUT_DIR}"
--instance-limit "${INSTANCE_LIMIT}"
--s3-output "${S3_OUTPUT}"
--run-id "${RUN_ID}"
)
if [[ "${STRATEGY}" == "naive" ]]; then
CMD_ARGS+=(
--prompts "${PROMPTS}"
--max-tokens "${MAX_TOKENS}"
--temperature "${TEMPERATURE}"
)
elif [[ "${STRATEGY}" == "agent" ]]; then
CMD_ARGS+=(
--agent-config "${AGENT_CONFIG}"
--model-api-key "${MODEL_API_KEY}"
--step-limit "${STEP_LIMIT}"
--cost-limit "${COST_LIMIT}"
--service-account "${SERVICE_ACCOUNT}"
--swebench-namespace "${SWEBENCH_NAMESPACE}"
--max-concurrent-jobs "${MAX_CONCURRENT_JOBS}"
--job-timeout "${JOB_TIMEOUT}"
)
if [[ -n "${K8S_NAMESPACE}" ]]; then
CMD_ARGS+=(--k8s-namespace "${K8S_NAMESPACE}")
fi
if [[ -n "${IMAGE_REGISTRY}" ]]; then
CMD_ARGS+=(--image-registry "${IMAGE_REGISTRY}")
fi
if [[ "${RUN_EVAL}" == "0" ]]; then
CMD_ARGS+=(--skip-eval)
fi
fi
# Pass MLflow tracking URI via Ray runtime env so workers pick it up
ENV_ARGS=()
if [[ -n "${MLFLOW_TRACKING_URI}" ]]; then
ENV_ARGS+=(--runtime-env-json "{\"env_vars\": {\"MLFLOW_TRACKING_URI\": \"${MLFLOW_TRACKING_URI}\"}}")
fi
echo "Strategy: ${STRATEGY}"
echo "Running: ray job submit -- ${CMD_ARGS[*]}"
ray job submit \
--address="${RAY_ADDRESS}" \
${ENV_ARGS[@]+"${ENV_ARGS[@]}"} \
-- "${CMD_ARGS[@]}"