3131 FORCE_COLOR : " 1"
3232 VLLM_MODEL : " Qwen/Qwen3-0.6B"
3333 VLLM_EMBEDDING_MODEL : " sentence-transformers/all-MiniLM-L6-v2"
34+ VLLM_RANKER_MODEL : " BAAI/bge-reranker-base"
35+ VLLM_TARGET_DEVICE : " cpu"
36+ VLLM_CPU_KVCACHE_SPACE : " 4"
3437 # we only test on Ubuntu to keep vLLM server running simple
3538 TEST_MATRIX_OS : ' ["ubuntu-latest"]'
3639 # vLLM is not compatible with Python 3.14. https://github.com/vllm-project/vllm/issues/34096
9093 --torch-backend cpu
9194
9295 - name : Start vLLM chat server
93- env :
94- VLLM_TARGET_DEVICE : " cpu"
95- VLLM_CPU_KVCACHE_SPACE : " 4"
9696 run : |
9797 nohup hatch run -- vllm serve ${{ env.VLLM_MODEL }} \
9898 --port 8000 \
@@ -120,9 +120,6 @@ jobs:
120120 echo "vLLM chat server started successfully."
121121
122122 - name : Start vLLM embedding server
123- env :
124- VLLM_TARGET_DEVICE : " cpu"
125- VLLM_CPU_KVCACHE_SPACE : " 4"
126123 run : |
127124 nohup hatch run -- vllm serve ${{ env.VLLM_EMBEDDING_MODEL }} \
128125 --port 8001 \
@@ -144,6 +141,27 @@ jobs:
144141
145142 echo "vLLM embedding server started successfully."
146143
144+ - name : Start vLLM ranker server
145+ run : |
146+ nohup hatch run -- vllm serve ${{ env.VLLM_RANKER_MODEL }} \
147+ --port 8002 \
148+ --enforce-eager \
149+ --max-num-seqs 1 &
150+
151+ # Wait for the vLLM ranker server to be ready with a timeout of 300 seconds
152+ timeout=300
153+ while [ $timeout -gt 0 ] && ! curl -sSf http://localhost:8002/health > /dev/null 2>&1; do
154+ echo "Waiting for vLLM ranker server to start..."
155+ sleep 10
156+ ((timeout-=10))
157+ done
158+
159+ if [ $timeout -eq 0 ]; then
160+ echo "Timed out waiting for vLLM ranker server to start."
161+ exit 1
162+ fi
163+
164+ echo "vLLM ranker server started successfully."
147165 - name : Lint
148166 if : matrix.python-version == '3.10' && runner.os == 'Linux'
149167 run : hatch run fmt-check && hatch run test:types
0 commit comments