Skip to content

Commit 01ed357

Browse files
committed
fix: clean lmcache agentic startup logs
1 parent ed79577 commit 01ed357

2 files changed

Lines changed: 66 additions & 30 deletions

File tree

.github/workflows/benchmark-tmpl.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,10 @@ jobs:
239239
name: agentic_${{ env.RESULT_FILENAME }}
240240
path: |
241241
results/server.log
242+
results/lmcache_server.log
242243
results/benchmark.log
243244
results/config.yaml
245+
results/lmcache_command.txt
244246
results/vllm_command.txt
245247
results/benchmark_command.txt
246248
results/workload_distribution_summary.txt
@@ -274,7 +276,9 @@ jobs:
274276
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
275277
with:
276278
name: ${{ inputs.eval-only && 'eval_server_logs_' || 'server_logs_' }}${{ env.RESULT_FILENAME }}
277-
path: ${{ inputs.scenario-type == 'agentic-coding' && 'results/server.log' || 'server.log' }}
279+
path: |
280+
${{ inputs.scenario-type == 'agentic-coding' && 'results/server.log' || 'server.log' }}
281+
${{ inputs.scenario-type == 'agentic-coding' && 'results/lmcache_server.log' || '' }}
278282
if-no-files-found: ignore
279283

280284
- name: Upload GPU metrics

benchmarks/single_node/agentic/dsv4_fp4_b200_vllm.sh

Lines changed: 61 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -75,20 +75,40 @@ cleanup_lmcache_server() {
7575
trap cleanup_lmcache_server EXIT
7676

7777
wait_for_lmcache_ready() {
78+
{ set +x; } 2>/dev/null
7879
local attempts="${LMCACHE_READY_ATTEMPTS:-120}"
80+
local tail_pid=""
81+
82+
while [ ! -f "$LMCACHE_LOG" ]; do
83+
if [[ -n "$LMCACHE_PID" ]] && ! kill -0 "$LMCACHE_PID" 2>/dev/null; then
84+
echo "LMCache server died before creating log file. Exiting." >&2
85+
exit 1
86+
fi
87+
sleep 1
88+
done
89+
90+
tail -f -n +1 "$LMCACHE_LOG" &
91+
tail_pid=$!
92+
7993
for ((i = 1; i <= attempts; i++)); do
8094
if curl --output /dev/null --silent --fail "http://127.0.0.1:${LMCACHE_HTTP_PORT}/healthcheck"; then
95+
kill "$tail_pid" 2>/dev/null || true
96+
wait "$tail_pid" 2>/dev/null || true
8197
return 0
8298
fi
8399
if [[ -n "$LMCACHE_PID" ]] && ! kill -0 "$LMCACHE_PID" 2>/dev/null; then
84100
echo "LMCache server died before becoming healthy. Log follows:" >&2
101+
kill "$tail_pid" 2>/dev/null || true
102+
wait "$tail_pid" 2>/dev/null || true
85103
cat "$LMCACHE_LOG" >&2 || true
86104
exit 1
87105
fi
88106
sleep 1
89107
done
90108

91109
echo "Timed out waiting for LMCache server healthcheck. Log follows:" >&2
110+
kill "$tail_pid" 2>/dev/null || true
111+
wait "$tail_pid" 2>/dev/null || true
92112
cat "$LMCACHE_LOG" >&2 || true
93113
exit 1
94114
}
@@ -131,6 +151,7 @@ case "$OFFLOADING" in
131151
)
132152
;;
133153
lmcache-mp)
154+
{ set +x; } 2>/dev/null
134155
# LMCache docs recommend MP mode for production: start an external
135156
# `lmcache server`, then point vLLM's LMCacheMPConnector at it. For
136157
# vLLM >= 0.20, prefer the LMCache-shipped connector module because it
@@ -154,16 +175,21 @@ case "$OFFLOADING" in
154175
LMCACHE_MAX_WORKERS="${LMCACHE_MAX_WORKERS:-$TP}"
155176

156177
echo "Starting LMCache MP server..."
157-
lmcache server \
158-
--host "$LMCACHE_HOST" \
159-
--port "$LMCACHE_PORT" \
160-
--http-host "$LMCACHE_HOST" \
161-
--http-port "$LMCACHE_HTTP_PORT" \
162-
--l1-size-gb "$LMCACHE_L1_SIZE_GB" \
163-
--l1-init-size-gb "$LMCACHE_L1_INIT_SIZE_GB" \
164-
--chunk-size "$LMCACHE_CHUNK_SIZE" \
165-
--max-workers "$LMCACHE_MAX_WORKERS" \
166-
--eviction-policy LRU > "$LMCACHE_LOG" 2>&1 &
178+
LMCACHE_CMD=(
179+
lmcache server
180+
--host "$LMCACHE_HOST"
181+
--port "$LMCACHE_PORT"
182+
--http-host "$LMCACHE_HOST"
183+
--http-port "$LMCACHE_HTTP_PORT"
184+
--l1-size-gb "$LMCACHE_L1_SIZE_GB"
185+
--l1-init-size-gb "$LMCACHE_L1_INIT_SIZE_GB"
186+
--chunk-size "$LMCACHE_CHUNK_SIZE"
187+
--max-workers "$LMCACHE_MAX_WORKERS"
188+
--eviction-policy LRU
189+
)
190+
printf '%q ' "${LMCACHE_CMD[@]}" > "$RESULT_DIR/lmcache_command.txt"
191+
printf '\n' >> "$RESULT_DIR/lmcache_command.txt"
192+
"${LMCACHE_CMD[@]}" > "$LMCACHE_LOG" 2>&1 &
167193
LMCACHE_PID=$!
168194
echo "LMCache server PID: $LMCACHE_PID"
169195
wait_for_lmcache_ready
@@ -206,25 +232,31 @@ export TORCH_CUDA_ARCH_LIST="10.0"
206232
export PYTHONNOUSERSITE=1
207233
export VLLM_FLOAT32_MATMUL_PRECISION=high
208234

209-
vllm serve "$MODEL" \
210-
--host 0.0.0.0 \
211-
--port "$PORT" \
212-
--trust-remote-code \
213-
--kv-cache-dtype fp8 \
214-
--block-size 256 \
215-
"${PARALLEL_ARGS[@]}" \
216-
"${EP_ARGS[@]}" \
217-
--compilation-config '{"cudagraph_mode":"FULL_AND_PIECEWISE","custom_ops":["all"]}' \
218-
--attention_config.use_fp4_indexer_cache=True \
219-
--tokenizer-mode deepseek_v4 \
220-
--tool-call-parser deepseek_v4 \
221-
--enable-auto-tool-choice \
222-
--reasoning-parser deepseek_v4 \
223-
--enable-prefix-caching \
224-
"${HYBRID_KV_ARGS[@]}" \
225-
--max-model-len "$MAX_MODEL_LEN" \
226-
--max-num-seqs "$PER_ENGINE_MAX_NUM_SEQS" \
227-
"${OFFLOAD_ARGS[@]}" > "$SERVER_LOG" 2>&1 &
235+
{ set +x; } 2>/dev/null
236+
VLLM_CMD=(
237+
vllm serve "$MODEL"
238+
--host 0.0.0.0
239+
--port "$PORT"
240+
--trust-remote-code
241+
--kv-cache-dtype fp8
242+
--block-size 256
243+
"${PARALLEL_ARGS[@]}"
244+
"${EP_ARGS[@]}"
245+
--compilation-config '{"cudagraph_mode":"FULL_AND_PIECEWISE","custom_ops":["all"]}'
246+
--attention_config.use_fp4_indexer_cache=True
247+
--tokenizer-mode deepseek_v4
248+
--tool-call-parser deepseek_v4
249+
--enable-auto-tool-choice
250+
--reasoning-parser deepseek_v4
251+
--enable-prefix-caching
252+
"${HYBRID_KV_ARGS[@]}"
253+
--max-model-len "$MAX_MODEL_LEN"
254+
--max-num-seqs "$PER_ENGINE_MAX_NUM_SEQS"
255+
"${OFFLOAD_ARGS[@]}"
256+
)
257+
printf '%q ' "${VLLM_CMD[@]}" | tee "$RESULT_DIR/vllm_command.txt"
258+
printf '\n' | tee -a "$RESULT_DIR/vllm_command.txt"
259+
"${VLLM_CMD[@]}" > "$SERVER_LOG" 2>&1 &
228260
SERVER_PID=$!
229261
echo "Server PID: $SERVER_PID"
230262

0 commit comments

Comments
 (0)