Skip to content

Commit c596fff

Browse files
committed
fix attn_metadata bug
1 parent bb68ca5 commit c596fff

2 files changed

Lines changed: 12 additions & 1 deletion

File tree

csrc/engine/compiler/chunk_prefill_compiler.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "chunk_prefill_compiler.hpp"
2+
#include "../../global_state/global_state.hpp"
23
#include "infinicore/context/context.hpp"
34

45

@@ -121,6 +122,16 @@ void ChunkPrefillCompiler::compile() {
121122
{total_tokens}, infinicore::DataType::I64, infinicore::context::getDevice());
122123
set_zeros(input.slot_mapping.value());
123124

125+
// Attention reads attn_metadata from thread-local forward context.
126+
infinilm::global_state::get_forward_context().attn_metadata = {
127+
input.past_sequence_lengths,
128+
input.total_sequence_lengths,
129+
input.input_offsets,
130+
input.cu_seqlens,
131+
input.block_tables,
132+
input.slot_mapping,
133+
};
134+
124135
barrier_->wait();
125136
infinicore::context::startGraphRecording();
126137
auto output = model_->forward(input);

scripts/test_perf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
NUM_REQUESTS = 64
3131
CONCURRENCY = 20
32-
API_URL = "http://127.0.0.1:8000"
32+
API_URL = "http://127.0.0.1:3456"
3333
MODEL = "FM9G-7B"
3434

3535

0 commit comments

Comments
 (0)