Skip to content

Commit 9abedf3

Browse files
authored
[INTEL_HPU] fix prefix caching context padding bug (PaddlePaddle#2116)
1 parent bf020cf commit 9abedf3

1 file changed

Lines changed: 1 addition & 6 deletions

File tree

backends/intel_hpu/custom_ops/llama_infer/prepare_block_metadata.cc

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,25 +24,20 @@
2424
std::tuple<int, int, int, std::vector<int>> get_max_and_where_nonzero(
2525
int* seq_lens_encoder, int* seq_lens_decoder, const int elem_cnt) {
2626
int max_seq_len_without_context = 0;
27-
int max_seq_len_with_context = 0;
2827
int max_context_len = 0;
2928
std::vector<int> valid_batch;
3029
for (int i = 0; i < elem_cnt; ++i) {
3130
if (seq_lens_encoder[i] > 0) {
3231
valid_batch.push_back(i);
3332
if (seq_lens_encoder[i] > max_seq_len_without_context) {
3433
max_seq_len_without_context = seq_lens_encoder[i];
35-
max_seq_len_with_context = seq_lens_encoder[i];
3634
}
3735
if (seq_lens_decoder[i] > max_context_len) {
3836
max_context_len = seq_lens_decoder[i];
3937
}
40-
if (seq_lens_decoder[i] > 0 && seq_lens_encoder[i] + seq_lens_decoder[i] >
41-
max_seq_len_with_context) {
42-
max_seq_len_with_context = seq_lens_encoder[i] + seq_lens_decoder[i];
43-
}
4438
}
4539
}
40+
int max_seq_len_with_context = max_seq_len_without_context + max_context_len;
4641
return {max_seq_len_without_context,
4742
max_seq_len_with_context,
4843
max_context_len,

0 commit comments

Comments
 (0)