Skip to content

Commit f011c47

Browse files
committed
cleanup dead code
1 parent 651a4b4 commit f011c47

6 files changed

Lines changed: 25 additions & 313 deletions

File tree

src/llama-graph.cpp

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -695,10 +695,6 @@ static void dsv4_set_comp_inputs(
695695
const char * name,
696696
bool debug,
697697
uint32_t n_tokens) {
698-
dsv4_set_i64(inp.write_idxs, plan.write_idxs);
699-
dsv4_set_i32(inp.write_pos, plan.write_pos);
700-
dsv4_set_i32(inp.write_end, plan.write_end);
701-
dsv4_set_i32(inp.pending_end, plan.pending_end);
702698
dsv4_set_i32(inp.state_idxs, plan.state_idxs);
703699
dsv4_set_i32(inp.state_pos, plan.state_pos);
704700
dsv4_set_i32(inp.state_read_idxs, plan.state_read_idxs);
@@ -709,11 +705,9 @@ static void dsv4_set_comp_inputs(
709705
dsv4_set_kq_mask(inp.kq_mask, plan, n_tokens);
710706

711707
if (debug || dsv4_compress_debug()) {
712-
LLAMA_LOG_INFO("%s: %s ratio=%u, n_tokens=%u, write_end=%s, state_write_end=%s, pending_end=%s\n",
708+
LLAMA_LOG_INFO("%s: %s ratio=%u, n_tokens=%u, state_write_end=%s\n",
713709
__func__, name, plan.ratio, n_tokens,
714-
dsv4_plan_positions(plan.write_end).c_str(),
715-
dsv4_plan_positions(plan.state_write_end).c_str(),
716-
dsv4_plan_positions(plan.pending_end).c_str());
710+
dsv4_plan_positions(plan.state_write_end).c_str());
717711
}
718712
}
719713

@@ -740,13 +734,7 @@ static bool dsv4_can_reuse_comp_input(
740734
const llm_graph_input_dsv4::comp_input & inp,
741735
const llama_kv_cache_dsv4_context::comp_plan & plan,
742736
uint32_t n_tokens) {
743-
const int64_t n_write = plan.write_idxs.size();
744-
745737
bool res = true;
746-
res &= dsv4_can_reuse_tensor_1d(inp.write_idxs, n_write);
747-
res &= dsv4_can_reuse_tensor_1d(inp.write_pos, n_write);
748-
res &= dsv4_can_reuse_tensor_1d(inp.write_end, n_write);
749-
res &= dsv4_can_reuse_tensor_1d(inp.pending_end, plan.pending_end.size());
750738
res &= dsv4_can_reuse_tensor_1d(inp.state_idxs, plan.state_idxs.size());
751739
res &= dsv4_can_reuse_tensor_1d(inp.state_pos, plan.state_pos.size());
752740
res &= dsv4_can_reuse_tensor_1d(inp.state_read_idxs, plan.state_read_idxs.size());
@@ -780,12 +768,6 @@ static void dsv4_build_comp_inputs(
780768
llm_graph_input_dsv4::comp_input & inp,
781769
const llama_kv_cache_dsv4_context::comp_plan & plan,
782770
const char * name) {
783-
const int64_t n_write = plan.write_idxs.size();
784-
785-
inp.write_idxs = dsv4_build_input_1d(ctx, GGML_TYPE_I64, n_write, std::string("dsv4_") + name + "_write_idxs");
786-
inp.write_pos = dsv4_build_input_1d(ctx, GGML_TYPE_I32, n_write, std::string("dsv4_") + name + "_write_pos");
787-
inp.write_end = dsv4_build_input_1d(ctx, GGML_TYPE_I32, n_write, std::string("dsv4_") + name + "_write_end");
788-
inp.pending_end = dsv4_build_input_1d(ctx, GGML_TYPE_I32, plan.pending_end.size(), std::string("dsv4_") + name + "_pending_end");
789771
inp.state_idxs = dsv4_build_input_1d(ctx, GGML_TYPE_I32, plan.state_idxs.size(), std::string("dsv4_") + name + "_state_idxs");
790772
inp.state_pos = dsv4_build_input_1d(ctx, GGML_TYPE_I32, plan.state_pos.size(), std::string("dsv4_") + name + "_state_pos");
791773
inp.state_read_idxs = dsv4_build_input_1d(ctx, GGML_TYPE_I32, plan.state_read_idxs.size(), std::string("dsv4_") + name + "_state_read_idxs");

src/llama-graph.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -463,11 +463,6 @@ class llm_graph_input_attn_kv_iswa : public llm_graph_input_i {
463463
class llm_graph_input_dsv4 : public llm_graph_input_i {
464464
public:
465465
struct comp_input {
466-
ggml_tensor * write_idxs = nullptr; // I64 [n_write]
467-
ggml_tensor * write_pos = nullptr; // I32 [n_write]
468-
ggml_tensor * write_end = nullptr; // I32 [n_write]
469-
ggml_tensor * pending_end = nullptr; // I32 [n_pending]
470-
471466
ggml_tensor * state_idxs = nullptr; // I32 [n_state]
472467
ggml_tensor * state_pos = nullptr; // I32 [n_state]
473468
ggml_tensor * state_read_idxs = nullptr; // I32 [ratio*n_state_write]

src/llama-kv-cache-dsv4.cpp

Lines changed: 23 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,6 @@ static llama_kv_cache_dsv4_context::comp_plan dsv4_build_comp_plan(
210210
const llama_ubatch & ubatch,
211211
uint32_t ratio,
212212
bool overlap,
213-
bool stateful,
214213
uint32_t state_size,
215214
uint32_t kv_size,
216215
uint32_t n_stream) {
@@ -256,12 +255,10 @@ static llama_kv_cache_dsv4_context::comp_plan dsv4_build_comp_plan(
256255

257256
const llama_seq_id seq_id = ubatch.seq_id[i][0];
258257

259-
if (stateful) {
260-
const int64_t stream_off = n_stream > 1 ? (int64_t) seq_id*state_size : 0;
258+
const int64_t stream_off = n_stream > 1 ? (int64_t) seq_id*state_size : 0;
261259

262-
plan.state_idxs.push_back((int32_t) (stream_off + pos%state_size));
263-
plan.state_pos .push_back((int32_t) (pos%ratio));
264-
}
260+
plan.state_idxs.push_back((int32_t) (stream_off + pos%state_size));
261+
plan.state_pos .push_back((int32_t) (pos%ratio));
265262

266263
const int64_t n_visible = (int64_t) (pos + 1)/ratio;
267264
plan.n_visible[i] = (int32_t) n_visible;
@@ -273,36 +270,26 @@ static llama_kv_cache_dsv4_context::comp_plan dsv4_build_comp_plan(
273270

274271
const llama_pos source_start = pos + 1 - ratio;
275272

276-
if (stateful) {
277-
const int64_t cache_off = n_stream > 1 ? (int64_t) seq_id*kv_size : 0;
273+
const int64_t cache_off = n_stream > 1 ? (int64_t) seq_id*kv_size : 0;
278274

279-
plan.state_write_idxs.push_back(cache_off + pos/ratio);
280-
plan.state_write_pos .push_back((int32_t) source_start);
281-
plan.state_write_end .push_back((int32_t) pos);
275+
plan.state_write_idxs.push_back(cache_off + pos/ratio);
276+
plan.state_write_pos .push_back((int32_t) source_start);
277+
plan.state_write_end .push_back((int32_t) pos);
282278

283-
if (overlap) {
284-
const llama_pos prev_start = source_start - ratio;
279+
if (overlap) {
280+
const llama_pos prev_start = source_start - ratio;
285281

286-
for (uint32_t j = 0; j < ratio; ++j) {
287-
plan.state_read_idxs.push_back(state_source_idx(seq_id, prev_start + j));
288-
}
289-
for (uint32_t j = 0; j < ratio; ++j) {
290-
plan.state_read_idxs.push_back(state_source_idx(seq_id, source_start + j));
291-
}
292-
} else {
293-
for (uint32_t j = 0; j < ratio; ++j) {
294-
plan.state_read_idxs.push_back(state_source_idx(seq_id, source_start + j));
295-
}
282+
for (uint32_t j = 0; j < ratio; ++j) {
283+
plan.state_read_idxs.push_back(state_source_idx(seq_id, prev_start + j));
284+
}
285+
for (uint32_t j = 0; j < ratio; ++j) {
286+
plan.state_read_idxs.push_back(state_source_idx(seq_id, source_start + j));
287+
}
288+
} else {
289+
for (uint32_t j = 0; j < ratio; ++j) {
290+
plan.state_read_idxs.push_back(state_source_idx(seq_id, source_start + j));
296291
}
297-
298-
continue;
299292
}
300-
301-
const int64_t stream_off = n_stream > 1 ? (int64_t) seq_id*kv_size : 0;
302-
303-
plan.write_idxs.push_back(stream_off + pos/ratio);
304-
plan.write_pos .push_back((int32_t) (pos + 1 - ratio));
305-
plan.write_end .push_back((int32_t) pos);
306293
}
307294

308295
static const bool debug = []() {
@@ -311,11 +298,9 @@ static llama_kv_cache_dsv4_context::comp_plan dsv4_build_comp_plan(
311298
}();
312299

313300
if (debug) {
314-
LLAMA_LOG_INFO("%s: ratio=%u, n_tokens=%u, write_end=%s, state_write_end=%s, pending_end=%s\n",
301+
LLAMA_LOG_INFO("%s: ratio=%u, n_tokens=%u, state_write_end=%s\n",
315302
__func__, ratio, ubatch.n_tokens,
316-
dsv4_plan_positions(plan.write_end).c_str(),
317-
dsv4_plan_positions(plan.state_write_end).c_str(),
318-
dsv4_plan_positions(plan.pending_end).c_str());
303+
dsv4_plan_positions(plan.state_write_end).c_str());
319304
}
320305

321306
return plan;
@@ -325,15 +310,14 @@ static std::vector<llama_kv_cache_dsv4_context::comp_plan> dsv4_build_comp_plans
325310
const std::vector<llama_ubatch> & ubatches,
326311
uint32_t ratio,
327312
bool overlap,
328-
bool stateful,
329313
uint32_t state_size,
330314
uint32_t kv_size,
331315
uint32_t n_stream) {
332316
std::vector<llama_kv_cache_dsv4_context::comp_plan> plans;
333317
plans.reserve(ubatches.size());
334318

335319
for (const llama_ubatch & ubatch : ubatches) {
336-
plans.push_back(dsv4_build_comp_plan(ubatch, ratio, overlap, stateful, state_size, kv_size, n_stream));
320+
plans.push_back(dsv4_build_comp_plan(ubatch, ratio, overlap, state_size, kv_size, n_stream));
337321
}
338322

339323
return plans;
@@ -1023,9 +1007,9 @@ llama_kv_cache_dsv4_context::llama_kv_cache_dsv4_context(
10231007
slot_info_vec_t sinfos_raw_swa,
10241008
std::vector<llama_ubatch> ubatches) :
10251009
ubatches(std::move(ubatches)),
1026-
plans_csa(dsv4_build_comp_plans(this->ubatches, DSV4_CSA_RATIO, true, true,
1010+
plans_csa(dsv4_build_comp_plans(this->ubatches, DSV4_CSA_RATIO, true,
10271011
kv->get_csa_state()->get_state_size(), kv->get_csa()->get_size(), kv->get_csa_state()->get_n_stream())),
1028-
plans_hca(dsv4_build_comp_plans(this->ubatches, DSV4_HCA_RATIO, false, true,
1012+
plans_hca(dsv4_build_comp_plans(this->ubatches, DSV4_HCA_RATIO, false,
10291013
kv->get_hca_state()->get_state_size(), kv->get_hca()->get_size(), kv->get_hca_state()->get_n_stream())),
10301014
plans_lid(plans_csa),
10311015
ctx_raw(new llama_kv_cache_iswa_context(kv->get_raw(), std::move(sinfos_raw_base), std::move(sinfos_raw_swa), this->ubatches)),

src/llama-kv-cache-dsv4.h

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -155,20 +155,6 @@ class llama_kv_cache_dsv4_context : public llama_memory_context_i {
155155
struct comp_plan {
156156
uint32_t ratio = 0;
157157

158-
// Logical compressed row ids written by the current graph.
159-
std::vector<int64_t> write_idxs;
160-
161-
// Position used for compressor RoPE. For a completed block this is the
162-
// first source-token position of that block.
163-
std::vector<int32_t> write_pos;
164-
165-
// Position at which the compressed row becomes visible to attention.
166-
std::vector<int32_t> write_end;
167-
168-
// Completed blocks that could not be planned. This should remain empty
169-
// for the scratch-backed state path.
170-
std::vector<int32_t> pending_end;
171-
172158
// Compressor-state row ids updated by the current graph.
173159
std::vector<int32_t> state_idxs;
174160

0 commit comments

Comments
 (0)