cleanup dead code

am17an · am17an · commit f011c470c182 · 2026-06-04T07:44:02.000+02:00
diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp
@@ -695,10 +695,6 @@ static void dsv4_set_comp_inputs(
         const char * name,
         bool debug,
         uint32_t n_tokens) {
-    dsv4_set_i64(inp.write_idxs, plan.write_idxs);
-    dsv4_set_i32(inp.write_pos,  plan.write_pos);
-    dsv4_set_i32(inp.write_end,  plan.write_end);
-    dsv4_set_i32(inp.pending_end, plan.pending_end);
     dsv4_set_i32(inp.state_idxs, plan.state_idxs);
     dsv4_set_i32(inp.state_pos, plan.state_pos);
     dsv4_set_i32(inp.state_read_idxs, plan.state_read_idxs);
@@ -709,11 +705,9 @@ static void dsv4_set_comp_inputs(
     dsv4_set_kq_mask(inp.kq_mask, plan, n_tokens);
 
     if (debug || dsv4_compress_debug()) {
-        LLAMA_LOG_INFO("%s: %s ratio=%u, n_tokens=%u, write_end=%s, state_write_end=%s, pending_end=%s\n",
+        LLAMA_LOG_INFO("%s: %s ratio=%u, n_tokens=%u, state_write_end=%s\n",
                 __func__, name, plan.ratio, n_tokens,
-                dsv4_plan_positions(plan.write_end).c_str(),
-                dsv4_plan_positions(plan.state_write_end).c_str(),
-                dsv4_plan_positions(plan.pending_end).c_str());
+                dsv4_plan_positions(plan.state_write_end).c_str());
     }
 }
 
@@ -740,13 +734,7 @@ static bool dsv4_can_reuse_comp_input(
         const llm_graph_input_dsv4::comp_input & inp,
         const llama_kv_cache_dsv4_context::comp_plan & plan,
         uint32_t n_tokens) {
-    const int64_t n_write = plan.write_idxs.size();
-
     bool res = true;
-    res &= dsv4_can_reuse_tensor_1d(inp.write_idxs, n_write);
-    res &= dsv4_can_reuse_tensor_1d(inp.write_pos,  n_write);
-    res &= dsv4_can_reuse_tensor_1d(inp.write_end,  n_write);
-    res &= dsv4_can_reuse_tensor_1d(inp.pending_end, plan.pending_end.size());
     res &= dsv4_can_reuse_tensor_1d(inp.state_idxs, plan.state_idxs.size());
     res &= dsv4_can_reuse_tensor_1d(inp.state_pos, plan.state_pos.size());
     res &= dsv4_can_reuse_tensor_1d(inp.state_read_idxs, plan.state_read_idxs.size());
@@ -780,12 +768,6 @@ static void dsv4_build_comp_inputs(
         llm_graph_input_dsv4::comp_input & inp,
         const llama_kv_cache_dsv4_context::comp_plan & plan,
         const char * name) {
-    const int64_t n_write = plan.write_idxs.size();
-
-    inp.write_idxs = dsv4_build_input_1d(ctx, GGML_TYPE_I64, n_write, std::string("dsv4_") + name + "_write_idxs");
-    inp.write_pos  = dsv4_build_input_1d(ctx, GGML_TYPE_I32, n_write, std::string("dsv4_") + name + "_write_pos");
-    inp.write_end  = dsv4_build_input_1d(ctx, GGML_TYPE_I32, n_write, std::string("dsv4_") + name + "_write_end");
-    inp.pending_end = dsv4_build_input_1d(ctx, GGML_TYPE_I32, plan.pending_end.size(), std::string("dsv4_") + name + "_pending_end");
     inp.state_idxs = dsv4_build_input_1d(ctx, GGML_TYPE_I32, plan.state_idxs.size(), std::string("dsv4_") + name + "_state_idxs");
     inp.state_pos = dsv4_build_input_1d(ctx, GGML_TYPE_I32, plan.state_pos.size(), std::string("dsv4_") + name + "_state_pos");
     inp.state_read_idxs = dsv4_build_input_1d(ctx, GGML_TYPE_I32, plan.state_read_idxs.size(), std::string("dsv4_") + name + "_state_read_idxs");
diff --git a/src/llama-graph.h b/src/llama-graph.h
@@ -463,11 +463,6 @@ class llm_graph_input_attn_kv_iswa : public llm_graph_input_i {
 class llm_graph_input_dsv4 : public llm_graph_input_i {
 public:
     struct comp_input {
-        ggml_tensor * write_idxs = nullptr; // I64 [n_write]
-        ggml_tensor * write_pos  = nullptr; // I32 [n_write]
-        ggml_tensor * write_end  = nullptr; // I32 [n_write]
-        ggml_tensor * pending_end = nullptr; // I32 [n_pending]
-
         ggml_tensor * state_idxs       = nullptr; // I32 [n_state]
         ggml_tensor * state_pos        = nullptr; // I32 [n_state]
         ggml_tensor * state_read_idxs  = nullptr; // I32 [ratio*n_state_write]
diff --git a/src/llama-kv-cache-dsv4.cpp b/src/llama-kv-cache-dsv4.cpp
@@ -210,7 +210,6 @@ static llama_kv_cache_dsv4_context::comp_plan dsv4_build_comp_plan(
         const llama_ubatch & ubatch,
         uint32_t ratio,
         bool overlap,
-        bool stateful,
         uint32_t state_size,
         uint32_t kv_size,
         uint32_t n_stream) {
@@ -256,12 +255,10 @@ static llama_kv_cache_dsv4_context::comp_plan dsv4_build_comp_plan(
 
         const llama_seq_id seq_id = ubatch.seq_id[i][0];
 
-        if (stateful) {
-            const int64_t stream_off = n_stream > 1 ? (int64_t) seq_id*state_size : 0;
+        const int64_t stream_off = n_stream > 1 ? (int64_t) seq_id*state_size : 0;
 
-            plan.state_idxs.push_back((int32_t) (stream_off + pos%state_size));
-            plan.state_pos .push_back((int32_t) (pos%ratio));
-        }
+        plan.state_idxs.push_back((int32_t) (stream_off + pos%state_size));
+        plan.state_pos .push_back((int32_t) (pos%ratio));
 
         const int64_t n_visible = (int64_t) (pos + 1)/ratio;
         plan.n_visible[i] = (int32_t) n_visible;
@@ -273,36 +270,26 @@ static llama_kv_cache_dsv4_context::comp_plan dsv4_build_comp_plan(
 
         const llama_pos source_start = pos + 1 - ratio;
 
-        if (stateful) {
-            const int64_t cache_off = n_stream > 1 ? (int64_t) seq_id*kv_size : 0;
+        const int64_t cache_off = n_stream > 1 ? (int64_t) seq_id*kv_size : 0;
 
-            plan.state_write_idxs.push_back(cache_off + pos/ratio);
-            plan.state_write_pos .push_back((int32_t) source_start);
-            plan.state_write_end .push_back((int32_t) pos);
+        plan.state_write_idxs.push_back(cache_off + pos/ratio);
+        plan.state_write_pos .push_back((int32_t) source_start);
+        plan.state_write_end .push_back((int32_t) pos);
 
-            if (overlap) {
-                const llama_pos prev_start = source_start - ratio;
+        if (overlap) {
+            const llama_pos prev_start = source_start - ratio;
 
-                for (uint32_t j = 0; j < ratio; ++j) {
-                    plan.state_read_idxs.push_back(state_source_idx(seq_id, prev_start + j));
-                }
-                for (uint32_t j = 0; j < ratio; ++j) {
-                    plan.state_read_idxs.push_back(state_source_idx(seq_id, source_start + j));
-                }
-            } else {
-                for (uint32_t j = 0; j < ratio; ++j) {
-                    plan.state_read_idxs.push_back(state_source_idx(seq_id, source_start + j));
-                }
+            for (uint32_t j = 0; j < ratio; ++j) {
+                plan.state_read_idxs.push_back(state_source_idx(seq_id, prev_start + j));
+            }
+            for (uint32_t j = 0; j < ratio; ++j) {
+                plan.state_read_idxs.push_back(state_source_idx(seq_id, source_start + j));
+            }
+        } else {
+            for (uint32_t j = 0; j < ratio; ++j) {
+                plan.state_read_idxs.push_back(state_source_idx(seq_id, source_start + j));
             }
-
-            continue;
         }
-
-        const int64_t stream_off = n_stream > 1 ? (int64_t) seq_id*kv_size : 0;
-
-        plan.write_idxs.push_back(stream_off + pos/ratio);
-        plan.write_pos .push_back((int32_t) (pos + 1 - ratio));
-        plan.write_end .push_back((int32_t) pos);
     }
 
     static const bool debug = []() {
@@ -311,11 +298,9 @@ static llama_kv_cache_dsv4_context::comp_plan dsv4_build_comp_plan(
     }();
 
     if (debug) {
-        LLAMA_LOG_INFO("%s: ratio=%u, n_tokens=%u, write_end=%s, state_write_end=%s, pending_end=%s\n",
+        LLAMA_LOG_INFO("%s: ratio=%u, n_tokens=%u, state_write_end=%s\n",
                 __func__, ratio, ubatch.n_tokens,
-                dsv4_plan_positions(plan.write_end).c_str(),
-                dsv4_plan_positions(plan.state_write_end).c_str(),
-                dsv4_plan_positions(plan.pending_end).c_str());
+                dsv4_plan_positions(plan.state_write_end).c_str());
     }
 
     return plan;
@@ -325,15 +310,14 @@ static std::vector<llama_kv_cache_dsv4_context::comp_plan> dsv4_build_comp_plans
         const std::vector<llama_ubatch> & ubatches,
         uint32_t ratio,
         bool overlap,
-        bool stateful,
         uint32_t state_size,
         uint32_t kv_size,
         uint32_t n_stream) {
     std::vector<llama_kv_cache_dsv4_context::comp_plan> plans;
     plans.reserve(ubatches.size());
 
     for (const llama_ubatch & ubatch : ubatches) {
-        plans.push_back(dsv4_build_comp_plan(ubatch, ratio, overlap, stateful, state_size, kv_size, n_stream));
+        plans.push_back(dsv4_build_comp_plan(ubatch, ratio, overlap, state_size, kv_size, n_stream));
     }
 
     return plans;
@@ -1023,9 +1007,9 @@ llama_kv_cache_dsv4_context::llama_kv_cache_dsv4_context(
         slot_info_vec_t sinfos_raw_swa,
         std::vector<llama_ubatch> ubatches) :
     ubatches(std::move(ubatches)),
-    plans_csa(dsv4_build_comp_plans(this->ubatches, DSV4_CSA_RATIO, true,  true,
+    plans_csa(dsv4_build_comp_plans(this->ubatches, DSV4_CSA_RATIO, true,
                 kv->get_csa_state()->get_state_size(), kv->get_csa()->get_size(), kv->get_csa_state()->get_n_stream())),
-    plans_hca(dsv4_build_comp_plans(this->ubatches, DSV4_HCA_RATIO, false, true,
+    plans_hca(dsv4_build_comp_plans(this->ubatches, DSV4_HCA_RATIO, false,
                 kv->get_hca_state()->get_state_size(), kv->get_hca()->get_size(), kv->get_hca_state()->get_n_stream())),
     plans_lid(plans_csa),
     ctx_raw(new llama_kv_cache_iswa_context(kv->get_raw(), std::move(sinfos_raw_base), std::move(sinfos_raw_swa), this->ubatches)),
diff --git a/src/llama-kv-cache-dsv4.h b/src/llama-kv-cache-dsv4.h
@@ -155,20 +155,6 @@ class llama_kv_cache_dsv4_context : public llama_memory_context_i {
     struct comp_plan {
         uint32_t ratio = 0;
 
-        // Logical compressed row ids written by the current graph.
-        std::vector<int64_t> write_idxs;
-
-        // Position used for compressor RoPE. For a completed block this is the
-        // first source-token position of that block.
-        std::vector<int32_t> write_pos;
-
-        // Position at which the compressed row becomes visible to attention.
-        std::vector<int32_t> write_end;
-
-        // Completed blocks that could not be planned. This should remain empty
-        // for the scratch-backed state path.
-        std::vector<int32_t> pending_end;
-
         // Compressor-state row ids updated by the current graph.
         std::vector<int32_t> state_idxs;
 
diff --git a/src/models/deepseek-v4.cpp b/src/models/deepseek-v4.cpp
diff --git a/src/models/models.h b/src/models/models.h