File tree Expand file tree Collapse file tree
docker_compose/intel/gpu/arc Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -22,10 +22,10 @@ export VLLM_SERVICE_PORT_B60=${VLLM_SERVICE_PORT_B60}
2222export VLLM_SERVICE_PORT_A770=${VLLM_SERVICE_PORT_A770}
2323export TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE}
2424
25- export SELECTED_XPU_0=${SELECTED_XPU_0}
2625export vLLM_ENDPOINT=${vLLM_ENDPOINT}
2726export MAX_NUM_SEQS=${MAX_NUM_SEQS}
2827export MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS}
2928export MAX_MODEL_LEN=${MAX_MODEL_LEN}
3029export LOAD_IN_LOW_BIT=${LOAD_IN_LOW_BIT}
3130export CCL_DG2_USM=${CCL_DG2_USM}
31+ export ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK}
Original file line number Diff line number Diff line change @@ -29,7 +29,6 @@ UI_UPLOAD_PATH="$WORKPATH/tests"
2929HF_ENDPOINT=https://hf-mirror.com
3030VLLM_SERVICE_PORT_A770=8086
3131TENSOR_PARALLEL_SIZE=1
32- SELECTED_XPU_0=0
3332vLLM_ENDPOINT=" http://${HOST_IP} :${VLLM_SERVICE_PORT_A770} "
3433LLM_MODEL=" Qwen/Qwen3-8B"
3534LLM_MODEL_PATH=" ${MODEL_PATH} /${LLM_MODEL} "
Original file line number Diff line number Diff line change @@ -33,6 +33,7 @@ vLLM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVICE_PORT_B60}"
3333LLM_MODEL=" Qwen/Qwen3-8B"
3434VLLM_IMAGE_TAG=" 1.1-preview"
3535DP=1
36+ ZE_AFFINITY_MASK=1
3637
3738function build_docker_images() {
3839 opea_branch=${opea_branch:- " main" }
@@ -62,7 +63,7 @@ function start_services() {
6263 n=0
6364 until [[ " $n " -ge 100 ]]; do
6465 docker logs ipex-serving-xpu-container > ${LOG_PATH} /ipex-serving-xpu-container.log 2>&1
65- if grep -q " Starting vLLM API server on http://0.0.0.0: " ${LOG_PATH} /ipex-serving-xpu-container.log; then
66+ if grep -q " Starting vLLM API server" ${LOG_PATH} /ipex-serving-xpu-container.log; then
6667 break
6768 fi
6869 sleep 6s
You can’t perform that action at this time.
0 commit comments