Skip to content

Commit 28b57c4

Browse files
committed
refactor batch stream processor
1 parent 4fcdfe2 commit 28b57c4

12 files changed

Lines changed: 978 additions & 737 deletions

rtp_llm/cpp/engine_base/stream/StreamGroups.h

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,12 @@ struct StreamGroups {
3636
has_multimodal_input_ = true;
3737
}
3838
}
39-
total_block_update_copy_num_ += stream->streamCacheResource().getKVBlockUpdateMapping().size();
39+
auto block_update_copy_num = stream->streamCacheResource().getKVBlockUpdateMapping().size();
40+
if (stream->isContextStream()) {
41+
context_block_update_copy_num_ += block_update_copy_num;
42+
} else {
43+
decode_block_update_copy_num_ += block_update_copy_num;
44+
}
4045
model_execute_token_size_ += stream->currentExecuteTokenSize();
4146
total_sampler_batch_size_in_ += stream->needTilingForSampling() ? next_batch_size : cur_batch_size;
4247
total_sampler_batch_size_out_ += next_batch_size;
@@ -64,7 +69,13 @@ struct StreamGroups {
6469
return total_sampler_batch_size_out_;
6570
}
6671
size_t totalBlockUpdateCopyNum() const {
67-
return total_block_update_copy_num_;
72+
return decode_block_update_copy_num_ + context_block_update_copy_num_;
73+
}
74+
size_t decodeBlockUpdateCopyNum() const {
75+
return decode_block_update_copy_num_;
76+
}
77+
size_t contextBlockUpdateCopyNum() const {
78+
return context_block_update_copy_num_;
6879
}
6980
size_t curBlocksNum() const {
7081
return max_blocks_num_;
@@ -173,7 +184,7 @@ struct StreamGroups {
173184
<< ", total_model_batch_size: " << totalModelBatchSize()
174185
<< ", total_sampler_batch_size_in: " << total_sampler_batch_size_in_
175186
<< ", total_sampler_batch_size_out: " << total_sampler_batch_size_out_
176-
<< ", total_block_update_copy_num: " << total_block_update_copy_num_
187+
<< ", total_block_update_copy_num: " << totalBlockUpdateCopyNum()
177188
<< ", max_blocks_num_: " << max_blocks_num_
178189
<< ", model_execute_token_size: " << model_execute_token_size_ << ", max_seq_len: " << max_seq_len_
179190
<< ", is_fake_stream: " << is_fake_stream_ << "}";
@@ -195,22 +206,23 @@ struct StreamGroups {
195206
private:
196207
std::list<GenerateStreamPtr> context_streams_;
197208
std::list<GenerateStreamPtr> decode_streams_;
198-
size_t total_sampler_batch_size_in_ = 0;
199-
size_t total_sampler_batch_size_out_ = 0;
200-
size_t total_decode_batch_size_ = 0;
201-
size_t total_context_batch_size_ = 0;
202-
size_t total_block_update_copy_num_ = 0;
203-
size_t max_blocks_num_ = 0;
204-
size_t model_execute_token_size_ = 0;
205-
size_t max_seq_len_ = 0;
206-
size_t max_context_seq_len_ = 0;
207-
size_t max_reuse_length_ = 0;
208-
size_t cum_context_seq_len_ = 0;
209-
size_t multimodal_features_len_ = 0;
210-
size_t total_score_batch_size_ = 0;
211-
bool has_multimodal_input_ = false;
212-
bool gen_timeline_ = false;
213-
bool is_fake_stream_ = false;
209+
size_t total_sampler_batch_size_in_ = 0;
210+
size_t total_sampler_batch_size_out_ = 0;
211+
size_t total_decode_batch_size_ = 0;
212+
size_t total_context_batch_size_ = 0;
213+
size_t decode_block_update_copy_num_ = 0;
214+
size_t context_block_update_copy_num_ = 0;
215+
size_t max_blocks_num_ = 0;
216+
size_t model_execute_token_size_ = 0;
217+
size_t max_seq_len_ = 0;
218+
size_t max_context_seq_len_ = 0;
219+
size_t max_reuse_length_ = 0;
220+
size_t cum_context_seq_len_ = 0;
221+
size_t multimodal_features_len_ = 0;
222+
size_t total_score_batch_size_ = 0;
223+
bool has_multimodal_input_ = false;
224+
bool gen_timeline_ = false;
225+
bool is_fake_stream_ = false;
214226
std::list<std::string> adapter_names;
215227
};
216228

0 commit comments

Comments
 (0)