Skip to content

Commit b34b068

Browse files
committed
fix(benchmark): forcefully capture native curl HTTP exceptions and python pipeline decodes to universally propagate fatal Local Runner connection errors instead of blindly marking failing tests as passed; restore CI Audio matrix bounds substituting native Gemma 4 configurations strictly as dictated
1 parent 6bfd83b commit b34b068

3 files changed

Lines changed: 39 additions & 20 deletions

File tree

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ jobs:
7676
strategy:
7777
fail-fast: false
7878
matrix:
79-
modality: [server, vision]
79+
modality: [server, vision, audio]
8080
steps:
8181
- uses: actions/checkout@v4
8282
with:

run_benchmark.sh

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,10 @@ echo ""
107107
PS3="Select a model to use: "
108108
if [ "$suite_opt" == "4" ]; then
109109
options=(
110-
"gemma-4-26b-a4b-it-8bit"
111-
"gemma-4-31b-it-8bit"
112-
"gemma-4-e4b-it-8bit"
113-
"gemma-4-2b-a4b-it-4bit"
110+
"mlx-community/gemma-4-26b-a4b-it-8bit"
111+
"mlx-community/gemma-4-31b-it-8bit"
112+
"mlx-community/gemma-4-e4b-it-8bit"
113+
"mlx-community/gemma-4-2b-a4b-it-4bit"
114114
"mlx-community/Qwen3.5-9B-MLX-4bit"
115115
"mlx-community/Qwen3.5-27B-4bit"
116116
"mlx-community/LFM2-VL-1.6B-4bit"
@@ -122,24 +122,24 @@ if [ "$suite_opt" == "4" ]; then
122122
)
123123
elif [ "$suite_opt" == "5" ]; then
124124
options=(
125-
"gemma-4-e4b-it-8bit"
126-
"gemma-4-2b-a4b-it-4bit"
127-
"mlx-community/Qwen2-Audio-7B-Instruct"
125+
"mlx-community/gemma-4-e4b-it-8bit"
126+
"mlx-community/gemma-4-2b-a4b-it-4bit"
127+
"mlx-community/Qwen2-Audio-7B-Instruct-4bit"
128128
"Custom (Enter your own Hub ID)"
129129
"Quit"
130130
)
131131
else
132132
options=(
133-
"gemma-4-26b-a4b-it-8bit"
134-
"gemma-4-31b-it-8bit"
135-
"gemma-4-e4b-it-8bit"
136-
"gemma-4-26b-a4b-it-4bit"
137-
"gemma-4-2b-a4b-it-4bit"
138-
"Qwen2.5-7B-Instruct-4bit"
139-
"Qwen2.5-14B-Instruct-4bit"
140-
"phi-4-mlx-4bit"
133+
"mlx-community/gemma-4-26b-a4b-it-8bit"
134+
"mlx-community/gemma-4-31b-it-8bit"
135+
"mlx-community/gemma-4-e4b-it-8bit"
136+
"mlx-community/gemma-4-26b-a4b-it-4bit"
137+
"mlx-community/gemma-4-2b-a4b-it-4bit"
138+
"mlx-community/Qwen2.5-7B-Instruct-4bit"
139+
"mlx-community/Qwen2.5-14B-Instruct-4bit"
140+
"mlx-community/phi-4-mlx-4bit"
141141
"baa-ai/GLM-5.1-RAM-270GB-MLX"
142-
"GLM-5.1-4bit"
142+
"baa-ai/GLM-5.1-4bit"
143143
"Custom (Enter your own Hub ID)"
144144
"Quit"
145145
)
@@ -322,7 +322,16 @@ EOF
322322
echo ""
323323
echo "Server is up! Sending payload..."
324324
echo "=== VLM Request ==="
325-
VLM_RES=$(curl -sS --max-time 180 http://127.0.0.1:5431/v1/chat/completions -H "Content-Type: application/json" -d @/tmp/vlm_payload.json | python3 -c "import sys,json;d=json.load(sys.stdin);print(d.get('choices',[{}])[0].get('message',{}).get('content', 'ERROR').replace('\n', '<br/>'))")
325+
RAW_OUT=$(curl -sS --max-time 180 http://127.0.0.1:5431/v1/chat/completions -H "Content-Type: application/json" -d @/tmp/vlm_payload.json)
326+
if [ -z "$RAW_OUT" ] || [[ "$RAW_OUT" == *"curl: "* ]]; then
327+
echo "❌ ERROR: Server dropped the connection or crashed!"
328+
exit 1
329+
fi
330+
VLM_RES=$(echo "$RAW_OUT" | python3 -c "import sys,json;d=json.load(sys.stdin);print(d.get('choices',[{}])[0].get('message',{}).get('content', 'ERROR').replace('\n', '<br/>'))")
331+
if [ -z "$VLM_RES" ] || [[ "$VLM_RES" == *"ERROR"* ]]; then
332+
echo "❌ ERROR: JSON Decode failed!"
333+
exit 1
334+
fi
326335

327336
echo -e "\n🤖 VLM Output: $VLM_RES"
328337

@@ -418,7 +427,17 @@ EOF
418427
echo ""
419428
echo "Server is up! Sending payload..."
420429
echo "=== ALM Request ==="
421-
curl -sS --max-time 180 http://127.0.0.1:5431/v1/chat/completions -H "Content-Type: application/json" -d @/tmp/alm_payload.json | python3 -c "import sys,json;d=json.load(sys.stdin);print('\n🎤 ALM Output:', d.get('choices',[{}])[0].get('message',{}).get('content', 'ERROR'))"
430+
RAW_ALM_OUT=$(curl -sS --max-time 180 http://127.0.0.1:5431/v1/chat/completions -H "Content-Type: application/json" -d @/tmp/alm_payload.json)
431+
if [ -z "$RAW_ALM_OUT" ] || [[ "$RAW_ALM_OUT" == *"curl: "* ]]; then
432+
echo "❌ ERROR: Server dropped the connection or crashed!"
433+
exit 1
434+
fi
435+
ALM_RES=$(echo "$RAW_ALM_OUT" | python3 -c "import sys,json;d=json.load(sys.stdin);print('\n🎤 ALM Output:', d.get('choices',[{}])[0].get('message',{}).get('content', 'ERROR'))")
436+
if [ -z "$ALM_RES" ] || [[ "$ALM_RES" == *"ERROR"* ]]; then
437+
echo "❌ ERROR: JSON Decode failed!"
438+
exit 1
439+
fi
440+
echo "$ALM_RES"
422441

423442
echo ""
424443
echo "✅ Test Complete!"

tests/test-audio.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ set -euo pipefail
99
BINARY="${1:-.build/release/SwiftLM}"
1010
PORT="${2:-15413}"
1111
HOST="127.0.0.1"
12-
MODEL="mlx-community/Qwen2-Audio-7B-Instruct-4bit" # CI Small ALM
12+
MODEL="mlx-community/gemma-4-2b-a4b-it-4bit" # CI Small ALM
1313
URL="http://${HOST}:${PORT}"
1414
PASS=0
1515
FAIL=0

0 commit comments

Comments
 (0)