Skip to content

Commit d4c0c58

Browse files
seungrokjclaude
andcommitted
fix(agentx): remove LMCache ROCm patch setup from MI355X agentic benchmarks
Drop the LMCACHE_ROCM_PATCH_DIR block (write_lmcache_rocm_mp_patch, write_chunked_connector_patch, write_scheduler_assertion_patch and related env exports) from dsv4, kimik2.5, and minimaxm2.5 MI355X agentic scripts. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent c215222 commit d4c0c58

3 files changed

Lines changed: 0 additions & 39 deletions

File tree

benchmarks/single_node/agentic/dsv4_fp4_mi355x_vllm.sh

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -149,19 +149,6 @@ case "$OFFLOADING" in
149149
CXX=hipcc BUILD_WITH_HIP=1 pip install -e . --no-build-isolation
150150
cd ..
151151

152-
LMCACHE_ROCM_PATCH_DIR="$RESULT_DIR/lmcache_rocm_patch"
153-
write_lmcache_rocm_mp_patch "$LMCACHE_ROCM_PATCH_DIR"
154-
write_chunked_connector_patch "$LMCACHE_ROCM_PATCH_DIR"
155-
write_scheduler_assertion_patch "$LMCACHE_ROCM_PATCH_DIR"
156-
export LMCACHE_ROCM_MP_BLOCK_FALLBACK=1
157-
export LMCACHE_ROCM_MP_BLOCK_FALLBACK_DTYPE=bfloat16
158-
export LMCACHE_ROCM_DEMAND_PINNED_ALLOCATOR=1
159-
# Cap external KV tokens loaded per scheduling step to prevent GPU
160-
# block exhaustion deadlock at high concurrency (c>=32). Default
161-
# 32768 keeps peak block demand within the GPU KV pool. Set to 0 to
162-
# disable chunking (only safe at low concurrency).
163-
export CHUNKED_LMCACHE_MAX_TOKENS_PER_LOAD="${CHUNKED_LMCACHE_MAX_TOKENS_PER_LOAD:-32768}"
164-
export PYTHONPATH="$LMCACHE_ROCM_PATCH_DIR${PYTHONPATH:+:$PYTHONPATH}"
165152
python3 -c "import lmcache.integration.vllm.lmcache_mp_connector" >/dev/null
166153

167154
# Match the B200 Kimi LMCache setup: keep a 2.5 TB semantic CPU KV

benchmarks/single_node/agentic/kimik2.5_fp4_mi355x.sh

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -153,19 +153,6 @@ case "$OFFLOADING" in
153153
CXX=hipcc BUILD_WITH_HIP=1 pip install -e . --no-build-isolation
154154
cd ..
155155

156-
LMCACHE_ROCM_PATCH_DIR="$RESULT_DIR/lmcache_rocm_patch"
157-
write_lmcache_rocm_mp_patch "$LMCACHE_ROCM_PATCH_DIR"
158-
write_chunked_connector_patch "$LMCACHE_ROCM_PATCH_DIR"
159-
write_scheduler_assertion_patch "$LMCACHE_ROCM_PATCH_DIR"
160-
export LMCACHE_ROCM_MP_BLOCK_FALLBACK=1
161-
export LMCACHE_ROCM_MP_BLOCK_FALLBACK_DTYPE=bfloat16
162-
export LMCACHE_ROCM_DEMAND_PINNED_ALLOCATOR=1
163-
# Cap external KV tokens loaded per scheduling step to prevent GPU
164-
# block exhaustion deadlock at high concurrency (c>=32). Default
165-
# 32768 keeps peak block demand within the GPU KV pool. Set to 0 to
166-
# disable chunking (only safe at low concurrency).
167-
export CHUNKED_LMCACHE_MAX_TOKENS_PER_LOAD="${CHUNKED_LMCACHE_MAX_TOKENS_PER_LOAD:-32768}"
168-
export PYTHONPATH="$LMCACHE_ROCM_PATCH_DIR${PYTHONPATH:+:$PYTHONPATH}"
169156
python3 -c "import lmcache.integration.vllm.lmcache_mp_connector" >/dev/null
170157

171158
# Match the B200 Kimi LMCache setup: keep a 2.5 TB semantic CPU KV

benchmarks/single_node/agentic/minimaxm2.5_fp4_mi355x.sh

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -149,19 +149,6 @@ case "$OFFLOADING" in
149149
CXX=hipcc BUILD_WITH_HIP=1 pip install -e . --no-build-isolation
150150
cd ..
151151

152-
LMCACHE_ROCM_PATCH_DIR="$RESULT_DIR/lmcache_rocm_patch"
153-
write_lmcache_rocm_mp_patch "$LMCACHE_ROCM_PATCH_DIR"
154-
write_chunked_connector_patch "$LMCACHE_ROCM_PATCH_DIR"
155-
write_scheduler_assertion_patch "$LMCACHE_ROCM_PATCH_DIR"
156-
export LMCACHE_ROCM_MP_BLOCK_FALLBACK=1
157-
export LMCACHE_ROCM_MP_BLOCK_FALLBACK_DTYPE=bfloat16
158-
export LMCACHE_ROCM_DEMAND_PINNED_ALLOCATOR=1
159-
# Cap external KV tokens loaded per scheduling step to prevent GPU
160-
# block exhaustion deadlock at high concurrency (c>=32). Default
161-
# 32768 keeps peak block demand within the GPU KV pool. Set to 0 to
162-
# disable chunking (only safe at low concurrency).
163-
export CHUNKED_LMCACHE_MAX_TOKENS_PER_LOAD="${CHUNKED_LMCACHE_MAX_TOKENS_PER_LOAD:-32768}"
164-
export PYTHONPATH="$LMCACHE_ROCM_PATCH_DIR${PYTHONPATH:+:$PYTHONPATH}"
165152
python3 -c "import lmcache.integration.vllm.lmcache_mp_connector" >/dev/null
166153

167154
# Match the B200 Kimi LMCache setup: keep a 2.5 TB semantic CPU KV

0 commit comments

Comments
 (0)