diff --git a/src/models/deepseek4.cpp b/src/models/deepseek4.cpp index 56af9707e..bf2cf4ca4 100644 --- a/src/models/deepseek4.cpp +++ b/src/models/deepseek4.cpp @@ -1009,7 +1009,7 @@ llm_build_deepseek4::llm_build_deepseek4(const llama_model & model, const llm_gr if (compress_ratio == 0) { ggml_tensor * k_cache = mctx_swa->get_k(ctx0, il); - k_cache = ggml_reshape_3d(ctx0, k_cache, n_embd_head_k, 1, k_cache->ne[2]); + k_cache = ggml_view_3d(ctx0, k_cache, n_embd_head_k, 1, k_cache->ne[2], k_cache->nb[1], k_cache->nb[2], 0); cur = build_attn_mha(q, k_cache, k_cache, nullptr, inp_attn->get_kq_mask_swa(), layer.attn_sinks, nullptr, kq_scale, il); cb(cur, "kqv_out", il); @@ -1192,7 +1192,7 @@ llm_build_deepseek4::llm_build_deepseek4(const llama_model & model, const llm_gr } ggml_tensor * k_raw = mctx_swa->get_k(ctx0, il); - k_raw = ggml_reshape_3d(ctx0, k_raw, n_embd_head_k, 1, k_raw->ne[2]); + k_raw = ggml_view_3d(ctx0, k_raw, n_embd_head_k, 1, k_raw->ne[2], k_raw->nb[1], k_raw->nb[2], 0); k_all = k_raw; v_all = k_raw; attn_mask = inp_attn->self_kq_mask_swa;