Skip to content

Commit 00ffb64

Browse files
committed
bugfix: fix the problem of incorrect kv cache data format when enabling prefix cache feature(for deepseek v3 model).
1 parent e828645 commit 00ffb64

2 files changed

Lines changed: 3 additions & 4 deletions

File tree

xllm/core/runtime/worker_impl.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -287,8 +287,8 @@ bool WorkerImpl::allocate_kv_cache(
287287
// Full attention layer: allocate key_cache and value_cache only
288288
#if defined(USE_NPU)
289289
aclFormat npu_format_type =
290-
context_.get_model_args().model_type().compare(
291-
0, strlen("deepseek_v3"), "deepseek_v3") == 0 &&
290+
absl::StartsWith(context_.get_model_args().model_type(),
291+
"deepseek_v3") &&
292292
FLAGS_enable_prefix_cache
293293
? ACL_FORMAT_FRACTAL_NZ
294294
: ACL_FORMAT_ND;

xllm/models/llm/npu/mtp_model_base.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,7 @@ class MtpModelImplBase : public torch::nn::Module {
138138
}
139139
attn_mask = torch::cat(req_mask_vec, 0);
140140
}
141-
} else if (model_type_.compare(0, strlen("deepseek_v3"), "deepseek_v3") ==
142-
0 &&
141+
} else if (absl::StartsWith(model_type_, "deepseek_v3") &&
143142
FLAGS_enable_prefix_cache &&
144143
!input_params.batch_forward_type.is_decode()) {
145144
attn_mask =

0 commit comments

Comments
 (0)