Skip to content

Commit ffe9b2d

Browse files
Fix stable stack test hang while server crash
1 parent 5c398f6 commit ffe9b2d

2 files changed

Lines changed: 27 additions & 6 deletions

File tree

.github/workflows/run_maxtext_jetstream_tests.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
- name: Cleanup old docker images
3636
run: docker system prune --all --force
3737
- name: Authenticate gcloud
38-
run: gcloud auth configure-docker us-docker.pkg.dev --quiet
38+
run: gcloud auth configure-docker gcr.io --quiet
3939

4040
build_stable_stack:
4141
name: Build Stable Stack

experimental/jetstream-maxtext-stable-stack/test_script/benchmark_serving_example.sh

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
# TODO: need a public path
2-
export PARAM_PATH=${PARAM_PATH}
1+
#!/bin/bash
2+
3+
SERVER_PID=""
4+
CLIENT_PID=""
35

46
python -c "import nltk; nltk.download('punkt')"
57

@@ -26,8 +28,10 @@ python -m MaxText.maxengine_server \
2628
model_call_mode=inference \
2729
sparse_matmul=False \
2830
use_chunked_prefill=true \
29-
prefill_chunk_size=64 \
30-
load_parameters_path=${PARAM_PATH} &
31+
prefill_chunk_size=256 \
32+
load_parameters_path=gs://jetstream-runner/8-7B-int8 &
33+
34+
SERVER_PID=$!
3135

3236
popd
3337

@@ -41,4 +45,21 @@ python ./JetStream/benchmarks/benchmark_serving.py \
4145
--num-prompts 100 \
4246
--max-output-length 2048 \
4347
--dataset openorca \
44-
--run-eval True
48+
--run-eval True &
49+
50+
CLIENT_PID=$!
51+
52+
while true; do
53+
# If server is not running, it is crash. Terminate the script.
54+
if ! kill -0 "${SERVER_PID}" 2>/dev/null; then
55+
exit 1
56+
fi
57+
58+
# If client is done
59+
if ! kill -0 "${CLIENT_PID}" 2>/dev/null; then
60+
wait $CLIENT_PID
61+
exit $?
62+
fi
63+
64+
sleep 1
65+
done

0 commit comments

Comments
 (0)