@@ -187,7 +187,7 @@ function check_server_status() {
187187 echo -e " \n"
188188}
189189
190- echo " ============ Online: start to test ERNIE-4.5-21B-A3B-Paddle ==========="
190+ echo " ============ Online: start to test ERNIE-4.5-21B-A3B-Paddle (wint8, tp=1, enable_cudagraph) ==========="
191191clear_message
192192echo " Start server..."
193193python -m fastdeploy.entrypoints.openai.api_server \
233233# fi
234234echo -e " \nPASSED"
235235
236+ echo " ============ Online: start to test ERNIE-4.5-21B-A3B-Paddle (wint8, tp=2, enable_cudagraph) ==========="
237+ clear_message
238+ echo " Start server..."
239+ python -m fastdeploy.entrypoints.openai.api_server \
240+ --model ${MODEL_DIR} /ERNIE-4.5-21B-A3B-Paddle \
241+ --port 8180 \
242+ --tensor-parallel-size 2 \
243+ --quantization wint8 \
244+ --max-model-len 32768 \
245+ --max-num-seqs 8 \
246+ --block-size 16 \
247+ --graph-optimization-config ' {"use_cudagraph": true}' > server.log 2>&1 &
248+
249+ check_server_status
250+
251+ echo " Start inference..."
252+ cp ${CI_PATH} /test.jsonl ./
253+ python3 -u ${CI_PATH} /bench_gsm8k.py --port 8180 --num-questions 10 --num-shots 5 --parallel 8
254+
255+ exit_code=$?
256+ echo -e " \nexit_code is ${exit_code} "
257+
258+ echo -e " \nStop server..."
259+ stop_processes
260+ echo -e " \nStop server done."
261+
262+ if [ ${exit_code} -ne 0 ]; then
263+ print_error_message
264+ exit 1
265+ fi
266+
267+ acc=` python3 -c " import json; [print(json.loads(line)['latency']) for line in open('result.jsonl')]" `
268+ latency=` python3 -c " import json; [print(json.loads(line)['latency']) for line in open('result.jsonl')]" `
269+ expected_lowerest_acc=0.8
270+ expected_largest_latency=60
271+ if awk -v a=" $acc " -v b=" $expected_lowerest_acc " ' BEGIN {exit !(a < b)}' ; then
272+ echo -e " \nExit with Accucary error, current accuracy $acc less than $expected_lowerest_acc "
273+ exit 1
274+ fi
275+
276+ # if awk -v a="$latency" -v b="$expected_largest_latency" 'BEGIN {exit !(a > b)}'; then
277+ # echo -e "\nExit with Latency Error, current latency $latency greater than $expected_largest_latency "
278+ # exit 1
279+ # fi
280+ echo -e " \nPASSED"
281+
236282echo -e " \n============ Online: start to test ERNIE-4.5-VL-28B-A3B-Paddle ==========="
237283clear_message
238284echo " Start server..."
0 commit comments