Skip to content

Commit 59ca7e3

Browse files
authored
Add OpenRouter Molmo throughput compare and lint fixes (#1080)
* add adaptive/static/baseline for throughput check * add adaptive/static/baseline for throughput check * Fix formatting and add molmo throughput compare script * fix(vllm): pass chat_template directly for lint stability
1 parent 88849da commit 59ca7e3

7 files changed

Lines changed: 638 additions & 158 deletions

File tree

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#!/usr/bin/env bash
2+
3+
# OpenRouter Molmo Throughput adaptive concurrency benchmark
4+
5+
set -euo pipefail
6+
7+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
8+
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
9+
cd "$REPO_ROOT"
10+
11+
export HF_HOME="${HF_HOME:-/tmp/huggingface}"
12+
export HF_DATASETS_CACHE="${HF_DATASETS_CACHE:-${HF_HOME}/datasets}"
13+
export HF_HUB_CACHE="${HF_HUB_CACHE:-${HF_HOME}/hub}"
14+
export OPENAI_API_KEY="${OPENROUTER_API_KEY:?Error: OPENROUTER_API_KEY not set}"
15+
export OPENAI_API_BASE="https://openrouter.ai/api/v1"
16+
17+
MODEL_VERSION="bytedance-seed/seed-1.6-flash"
18+
TASKS="mme"
19+
LIMIT="${1:-40}"
20+
BATCH_SIZE="1"
21+
VERBOSITY="INFO"
22+
MODEL_TIMEOUT=10
23+
MODEL_MAX_RETRIES=1
24+
OUTPUT_BASE="./logs/openrouter_molmo_throughput"
25+
26+
ADAPTIVE_START=16
27+
ADAPTIVE_MIN=1
28+
ADAPTIVE_MAX=64
29+
TARGET_LATENCY=15.0
30+
INCREASE_STEP=0.15
31+
DECREASE_FACTOR=0.75
32+
FAILURE_THRESHOLD=0.05
33+
34+
RUN_NAME="adaptive"
35+
RUN_DIR="${OUTPUT_BASE}/${RUN_NAME}"
36+
SUMMARY_FILE="${RUN_DIR}/summary.csv"
37+
mkdir -p "$RUN_DIR"
38+
39+
START_NS=$(date +%s%N)
40+
python3 -m lmms_eval \
41+
--model openai_compatible \
42+
--model_args model_version=$MODEL_VERSION,num_concurrent=$ADAPTIVE_START,timeout=$MODEL_TIMEOUT,max_retries=$MODEL_MAX_RETRIES,adaptive_concurrency=true,adaptive_min_concurrency=$ADAPTIVE_MIN,adaptive_max_concurrency=$ADAPTIVE_MAX,adaptive_target_latency_s=$TARGET_LATENCY,adaptive_increase_step=$INCREASE_STEP,adaptive_decrease_factor=$DECREASE_FACTOR,adaptive_failure_threshold=$FAILURE_THRESHOLD \
43+
--tasks "$TASKS" \
44+
--batch_size "$BATCH_SIZE" \
45+
--limit "$LIMIT" \
46+
--output_path "${RUN_DIR}/results" \
47+
--verbosity "$VERBOSITY" \
48+
--log_samples 2>&1 | tee "${RUN_DIR}/run.log"
49+
END_NS=$(date +%s%N)
50+
51+
WALL_TIME_S=$(awk -v start="$START_NS" -v end="$END_NS" 'BEGIN { printf "%.6f", (end - start) / 1000000000 }')
52+
REQ_PER_S=$(awk -v limit="$LIMIT" -v wall="$WALL_TIME_S" 'BEGIN { if (wall > 0) printf "%.6f", limit / wall; else print "0" }')
53+
54+
cat > "$SUMMARY_FILE" <<EOF
55+
mode,concurrency,limit,wall_time_s,requests_per_sec,log_path
56+
adaptive,$ADAPTIVE_START,$LIMIT,$WALL_TIME_S,$REQ_PER_S,${RUN_DIR}/run.log
57+
EOF
58+
59+
printf "ADAPTIVE done: %s\n" "$SUMMARY_FILE"
60+
printf "requests_per_sec=%s\n" "$REQ_PER_S"
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#!/usr/bin/env bash
2+
3+
# OpenRouter Molmo Throughput Baseline (single concurrency)
4+
5+
set -euo pipefail
6+
7+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
8+
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
9+
cd "$REPO_ROOT"
10+
11+
export HF_HOME="${HF_HOME:-/tmp/huggingface}"
12+
export HF_DATASETS_CACHE="${HF_DATASETS_CACHE:-${HF_HOME}/datasets}"
13+
export HF_HUB_CACHE="${HF_HUB_CACHE:-${HF_HOME}/hub}"
14+
export OPENAI_API_KEY="${OPENROUTER_API_KEY:?Error: OPENROUTER_API_KEY not set}"
15+
export OPENAI_API_BASE="https://openrouter.ai/api/v1"
16+
17+
MODEL_VERSION="bytedance-seed/seed-1.6-flash"
18+
MODEL_TIMEOUT=10
19+
MODEL_MAX_RETRIES=1
20+
TASKS="mme"
21+
LIMIT="${1:-40}"
22+
BATCH_SIZE="1"
23+
VERBOSITY="INFO"
24+
OUTPUT_BASE="./logs/openrouter_molmo_throughput"
25+
26+
RUN_NAME="baseline"
27+
RUN_DIR="${OUTPUT_BASE}/${RUN_NAME}"
28+
SUMMARY_FILE="${RUN_DIR}/summary.csv"
29+
mkdir -p "$RUN_DIR"
30+
31+
START_NS=$(date +%s%N)
32+
python3 -m lmms_eval \
33+
--model openai_compatible \
34+
--model_args model_version=$MODEL_VERSION,num_concurrent=1,timeout=$MODEL_TIMEOUT,max_retries=$MODEL_MAX_RETRIES,adaptive_concurrency=false \
35+
--tasks "$TASKS" \
36+
--batch_size "$BATCH_SIZE" \
37+
--limit "$LIMIT" \
38+
--output_path "${RUN_DIR}/results" \
39+
--verbosity "$VERBOSITY" \
40+
--log_samples 2>&1 | tee "${RUN_DIR}/run.log"
41+
END_NS=$(date +%s%N)
42+
43+
WALL_TIME_S=$(awk -v start="$START_NS" -v end="$END_NS" 'BEGIN { printf "%.6f", (end - start) / 1000000000 }')
44+
REQ_PER_S=$(awk -v limit="$LIMIT" -v wall="$WALL_TIME_S" 'BEGIN { if (wall > 0) printf "%.6f", limit / wall; else print "0" }')
45+
46+
cat > "$SUMMARY_FILE" <<EOF
47+
mode,concurrency,limit,wall_time_s,requests_per_sec,log_path
48+
baseline,1,$LIMIT,$WALL_TIME_S,$REQ_PER_S,${RUN_DIR}/run.log
49+
EOF
50+
51+
printf "BASELINE done: %s\n" "$SUMMARY_FILE"
52+
printf "requests_per_sec=%s\n" "$REQ_PER_S"
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/usr/bin/env bash
2+
3+
# OpenRouter Molmo Throughput Baseline vs static concurrency sweep
4+
5+
set -euo pipefail
6+
7+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
8+
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
9+
cd "$REPO_ROOT"
10+
11+
export HF_HOME="${HF_HOME:-/tmp/huggingface}"
12+
export HF_DATASETS_CACHE="${HF_DATASETS_CACHE:-${HF_HOME}/datasets}"
13+
export HF_HUB_CACHE="${HF_HUB_CACHE:-${HF_HOME}/hub}"
14+
export OPENAI_API_KEY="${OPENROUTER_API_KEY:?Error: OPENROUTER_API_KEY not set}"
15+
export OPENAI_API_BASE="https://openrouter.ai/api/v1"
16+
17+
MODEL_VERSION="bytedance-seed/seed-1.6-flash"
18+
TASKS="mme"
19+
LIMIT="${1:-40}"
20+
BATCH_SIZE="1"
21+
VERBOSITY="INFO"
22+
MODEL_TIMEOUT=10
23+
MODEL_MAX_RETRIES=1
24+
OUTPUT_BASE="./logs/openrouter_molmo_throughput"
25+
CONCURRENCIES=(2 4 8 16 24)
26+
27+
RUN_NAME="static_concurrency"
28+
RUN_DIR="${OUTPUT_BASE}/${RUN_NAME}"
29+
SUMMARY_FILE="${RUN_DIR}/summary.csv"
30+
mkdir -p "$RUN_DIR"
31+
32+
echo "mode,concurrency,limit,wall_time_s,requests_per_sec,log_path" > "$SUMMARY_FILE"
33+
34+
for CONCURRENCY in "${CONCURRENCIES[@]}"; do
35+
CONCURRENCY_DIR="${RUN_DIR}/c${CONCURRENCY}"
36+
mkdir -p "$CONCURRENCY_DIR"
37+
38+
START_NS=$(date +%s%N)
39+
python3 -m lmms_eval \
40+
--model openai_compatible \
41+
--model_args model_version=$MODEL_VERSION,num_concurrent=$CONCURRENCY,timeout=$MODEL_TIMEOUT,max_retries=$MODEL_MAX_RETRIES,adaptive_concurrency=false \
42+
--tasks "$TASKS" \
43+
--batch_size "$BATCH_SIZE" \
44+
--limit "$LIMIT" \
45+
--output_path "${CONCURRENCY_DIR}/results" \
46+
--verbosity "$VERBOSITY" \
47+
--log_samples 2>&1 | tee "${CONCURRENCY_DIR}/run.log"
48+
END_NS=$(date +%s%N)
49+
50+
WALL_TIME_S=$(awk -v start="$START_NS" -v end="$END_NS" 'BEGIN { printf "%.6f", (end - start) / 1000000000 }')
51+
REQ_PER_S=$(awk -v limit="$LIMIT" -v wall="$WALL_TIME_S" 'BEGIN { if (wall > 0) printf "%.6f", limit / wall; else print "0" }')
52+
53+
echo "static,$CONCURRENCY,$LIMIT,$WALL_TIME_S,$REQ_PER_S,${CONCURRENCY_DIR}/run.log" >> "$SUMMARY_FILE"
54+
done
55+
56+
printf "STATIC_CONCURRENCY done: %s\n" "$SUMMARY_FILE"
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#!/usr/bin/env bash
2+
3+
# Orchestrated benchmark and throughput comparison for OpenRouter Molmo.
4+
5+
set -euo pipefail
6+
7+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
8+
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
9+
cd "$REPO_ROOT"
10+
11+
export OPENAI_API_KEY="${OPENROUTER_API_KEY:?Error: OPENROUTER_API_KEY not set}"
12+
13+
LIMIT="${1:-40}"
14+
OUTPUT_BASE="./logs/openrouter_molmo_throughput"
15+
mkdir -p "$OUTPUT_BASE"
16+
17+
bash "${SCRIPT_DIR}/openrouter_molmo_baseline.sh" "$LIMIT"
18+
bash "${SCRIPT_DIR}/openrouter_molmo_static_concurrency.sh" "$LIMIT"
19+
bash "${SCRIPT_DIR}/openrouter_molmo_adaptive.sh" "$LIMIT"
20+
21+
BASELINE_SUMMARY="${OUTPUT_BASE}/baseline/summary.csv"
22+
STATIC_SUMMARY="${OUTPUT_BASE}/static_concurrency/summary.csv"
23+
ADAPTIVE_SUMMARY="${OUTPUT_BASE}/adaptive/summary.csv"
24+
COMPARISON_SUMMARY="${OUTPUT_BASE}/throughput_comparison.csv"
25+
26+
BASELINE_RPS="$(awk -F, 'NR==2 {print $5}' "$BASELINE_SUMMARY")"
27+
28+
if [[ -z "$BASELINE_RPS" || "$BASELINE_RPS" == "0" ]]; then
29+
echo "baseline requests_per_sec not found or zero, aborting comparison"
30+
exit 1
31+
fi
32+
33+
echo "run_type,concurrency,requests_per_sec,wall_time_s,improvement_pct,log_path" > "$COMPARISON_SUMMARY"
34+
35+
echo "baseline,1,$BASELINE_RPS,0,0,${OUTPUT_BASE}/baseline/run.log" >> "$COMPARISON_SUMMARY"
36+
37+
dedupe_summary() {
38+
local summary_path="$1"
39+
awk -F, '
40+
NR > 1 {
41+
key = $1 SUBSEP $2;
42+
if (!(key in seen_order)) {
43+
order[++n] = key;
44+
seen_order[key] = n;
45+
rows[key] = $0;
46+
rates[key] = $5;
47+
} else if ($5 + 0 > rates[key] + 0) {
48+
rows[key] = $0;
49+
rates[key] = $5;
50+
}
51+
}
52+
END {
53+
for (i = 1; i <= n; i++) {
54+
print rows[order[i]];
55+
}
56+
}
57+
' "$summary_path"
58+
}
59+
60+
for SUMMARY_PATH in "$STATIC_SUMMARY" "$ADAPTIVE_SUMMARY"; do
61+
while IFS=, read -r MODE CONCURRENCY LIMIT_FIELD WALL REQUESTS_PER_SEC LOG_PATH; do
62+
# skip malformed lines
63+
if [[ "$MODE" == "mode" || "$MODE" == "" ]]; then
64+
continue
65+
fi
66+
IMPROVEMENT_PCT="$(awk -v base="$BASELINE_RPS" -v current="$REQUESTS_PER_SEC" 'BEGIN { printf "%.2f", (current / base - 1) * 100 }')"
67+
echo "$MODE,$CONCURRENCY,$REQUESTS_PER_SEC,$WALL,$IMPROVEMENT_PCT,$LOG_PATH" >> "$COMPARISON_SUMMARY"
68+
done < <(dedupe_summary "$SUMMARY_PATH")
69+
done
70+
71+
echo "Comparison saved to: $COMPARISON_SUMMARY"
72+
cat "$COMPARISON_SUMMARY"

0 commit comments

Comments
 (0)