Skip to content

Commit 9f7a87e

Browse files
test: align Test 13 MTP benchmark flow with Test 1
1 parent 61ab81b commit 9f7a87e

1 file changed

Lines changed: 40 additions & 37 deletions

File tree

run_benchmark.sh

Lines changed: 40 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -221,43 +221,7 @@ if [ "$suite_opt" == "12" ]; then
221221
exit $?
222222
fi
223223

224-
if [ "$suite_opt" == "13" ]; then
225-
echo ""
226-
echo "=> Starting Test 13: Gemma-4 MTP Speculative Decoding Benchmark"
227-
echo "Building benchmark binary..."
228-
swift build -c release --product Gemma4MTPBench
229-
230-
echo ""
231-
echo "--- Test 13A: Small Context (max-kv-size=512) on E2B Model ---"
232-
swift run -c release Gemma4MTPBench \
233-
--main-model mlx-community/gemma-4-e2b-it-4bit \
234-
--asst-model mlx-community/gemma-4-E2B-it-assistant-bf16 \
235-
--prompt "What is the capital of France? Please tell me the history of it in 3 sentences." \
236-
--max-tokens 100 \
237-
--max-kv-size 512 | grep -v "ASST DEBUG"
238-
239-
echo ""
240-
echo "--- Test 13B: Medium Context (max-kv-size=4096) on E2B Model ---"
241-
swift run -c release Gemma4MTPBench \
242-
--main-model mlx-community/gemma-4-e2b-it-4bit \
243-
--asst-model mlx-community/gemma-4-E2B-it-assistant-bf16 \
244-
--prompt "Write a detailed 3-paragraph essay on the impact of the Industrial Revolution on modern supply chain logistics. Ensure you include dates and specific technological advancements." \
245-
--max-tokens 100 \
246-
--max-kv-size 4096 | grep -v "ASST DEBUG"
247-
248-
echo ""
249-
echo "--- Test 13C: Large Context (max-kv-size=8192) on E2B Model ---"
250-
swift run -c release Gemma4MTPBench \
251-
--main-model mlx-community/gemma-4-e2b-it-4bit \
252-
--asst-model mlx-community/gemma-4-E2B-it-assistant-bf16 \
253-
--prompt "Explain quantum computing as if I were a 10 year old. Then, explain it to a physics graduate student." \
254-
--max-tokens 100 \
255-
--max-kv-size 8192 | grep -v "ASST DEBUG"
256-
257-
echo ""
258-
echo "✅ Gemma-4 MTP Speculative Decoding Benchmarks Complete."
259-
exit 0
260-
fi
224+
261225

262226
echo ""
263227
PS3="Select a model to use: "
@@ -1379,6 +1343,45 @@ if [ "$suite_opt" == "10" ]; then
13791343
fi
13801344
fi
13811345

1346+
if [ "$suite_opt" == "13" ]; then
1347+
echo ""
1348+
echo "=> Starting Test 13: Gemma-4 MTP Speculative Decoding Benchmark"
1349+
1350+
# Infer assistant model
1351+
if [[ "$FULL_MODEL" == *"gemma-4-26b"* ]]; then
1352+
ASST_MODEL="mlx-community/gemma-4-26B-A4B-it-assistant-bf16"
1353+
elif [[ "$FULL_MODEL" == *"gemma-4-e2b"* ]]; then
1354+
ASST_MODEL="mlx-community/gemma-4-E2B-it-assistant-bf16"
1355+
else
1356+
read -p "Enter assistant model Hub ID: " ASST_MODEL
1357+
fi
1358+
1359+
echo ""
1360+
read -p "Enter context lengths to test [default: 512,40000,100000]: " CONTEXTS
1361+
CONTEXTS=${CONTEXTS:-"512,40000,100000"}
1362+
1363+
echo ""
1364+
echo "Building benchmark binary..."
1365+
swift build -c release --product Gemma4MTPBench
1366+
1367+
IFS=',' read -ra ADDR <<< "$CONTEXTS"
1368+
for ctx in "${ADDR[@]}"; do
1369+
ctx=$(echo "$ctx" | tr -d ' ')
1370+
echo ""
1371+
echo "--- Test 13: Context (max-kv-size=$ctx) on $FULL_MODEL ---"
1372+
swift run -c release Gemma4MTPBench \
1373+
--main-model "$FULL_MODEL" \
1374+
--asst-model "$ASST_MODEL" \
1375+
--prompt "Write a detailed 3-paragraph essay on the impact of the Industrial Revolution on modern supply chain logistics. Ensure you include dates and specific technological advancements." \
1376+
--max-tokens 100 \
1377+
--max-kv-size "$ctx" | grep -v "ASST DEBUG"
1378+
done
1379+
1380+
echo ""
1381+
echo "✅ Gemma-4 MTP Speculative Decoding Benchmarks Complete."
1382+
exit 0
1383+
fi
1384+
13821385
# Fallback to Test 1 for anything else
13831386
echo ""
13841387
read -p "Enter context lengths to test [default: 512,40000,100000]: " CONTEXTS

0 commit comments

Comments
 (0)