Skip to content

Commit dd03beb

Browse files
committed
only support n_seq=1 for now
1 parent a4e850c commit dd03beb

2 files changed

Lines changed: 5 additions & 9 deletions

File tree

src/llama-graph.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -824,15 +824,7 @@ bool llm_graph_input_dsv4::can_reuse(const llm_graph_params & params) {
824824

825825
bool res = true;
826826

827-
if (inp_raw->self_k_idxs && inp_raw->self_k_idxs->buffer) {
828-
res &= inp_raw->self_k_idxs->ne[0] == params.ubatch.n_tokens;
829-
res &= can_reuse_kq_mask(inp_raw->self_kq_mask, mctx->get_raw()->get_base(), params.ubatch, params.cparams);
830-
}
831-
832-
if (inp_raw->self_k_idxs_swa && inp_raw->self_k_idxs_swa->buffer) {
833-
res &= inp_raw->self_k_idxs_swa->ne[0] == params.ubatch.n_tokens;
834-
res &= can_reuse_kq_mask(inp_raw->self_kq_mask_swa, mctx->get_raw()->get_swa(), params.ubatch, params.cparams);
835-
}
827+
res &= inp_raw->can_reuse(params);
836828

837829
res &= dsv4_can_reuse_comp_input(inp_csa, mctx->get_csa_plan(params.ubatch), params.ubatch.n_tokens);
838830
res &= dsv4_can_reuse_comp_input(inp_hca, mctx->get_hca_plan(params.ubatch), params.ubatch.n_tokens);

src/llama-kv-cache-dsv4.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,10 @@ llama_kv_cache_dsv4::llama_kv_cache_dsv4(
722722
hparams_hca(model.hparams),
723723
hparams_lid(model.hparams) {
724724

725+
if (n_seq_max > 1) {
726+
throw std::runtime_error("DSV4 KV cache currently supports only single-sequence decoding");
727+
}
728+
725729
const layer_filter_cb filter_raw = [&](int32_t il) {
726730
if (filter && !filter(il)) {
727731
return false;

0 commit comments

Comments
 (0)