@@ -36,7 +36,12 @@ struct StreamGroups {
3636 has_multimodal_input_ = true ;
3737 }
3838 }
39- total_block_update_copy_num_ += stream->streamCacheResource ().getKVBlockUpdateMapping ().size ();
39+ auto block_update_copy_num = stream->streamCacheResource ().getKVBlockUpdateMapping ().size ();
40+ if (stream->isContextStream ()) {
41+ context_block_update_copy_num_ += block_update_copy_num;
42+ } else {
43+ decode_block_update_copy_num_ += block_update_copy_num;
44+ }
4045 model_execute_token_size_ += stream->currentExecuteTokenSize ();
4146 total_sampler_batch_size_in_ += stream->needTilingForSampling () ? next_batch_size : cur_batch_size;
4247 total_sampler_batch_size_out_ += next_batch_size;
@@ -64,7 +69,13 @@ struct StreamGroups {
6469 return total_sampler_batch_size_out_;
6570 }
6671 size_t totalBlockUpdateCopyNum () const {
67- return total_block_update_copy_num_;
72+ return decode_block_update_copy_num_ + context_block_update_copy_num_;
73+ }
74+ size_t decodeBlockUpdateCopyNum () const {
75+ return decode_block_update_copy_num_;
76+ }
77+ size_t contextBlockUpdateCopyNum () const {
78+ return context_block_update_copy_num_;
6879 }
6980 size_t curBlocksNum () const {
7081 return max_blocks_num_;
@@ -173,7 +184,7 @@ struct StreamGroups {
173184 << " , total_model_batch_size: " << totalModelBatchSize ()
174185 << " , total_sampler_batch_size_in: " << total_sampler_batch_size_in_
175186 << " , total_sampler_batch_size_out: " << total_sampler_batch_size_out_
176- << " , total_block_update_copy_num: " << total_block_update_copy_num_
187+ << " , total_block_update_copy_num: " << totalBlockUpdateCopyNum ()
177188 << " , max_blocks_num_: " << max_blocks_num_
178189 << " , model_execute_token_size: " << model_execute_token_size_ << " , max_seq_len: " << max_seq_len_
179190 << " , is_fake_stream: " << is_fake_stream_ << " }" ;
@@ -195,22 +206,23 @@ struct StreamGroups {
195206private:
196207 std::list<GenerateStreamPtr> context_streams_;
197208 std::list<GenerateStreamPtr> decode_streams_;
198- size_t total_sampler_batch_size_in_ = 0 ;
199- size_t total_sampler_batch_size_out_ = 0 ;
200- size_t total_decode_batch_size_ = 0 ;
201- size_t total_context_batch_size_ = 0 ;
202- size_t total_block_update_copy_num_ = 0 ;
203- size_t max_blocks_num_ = 0 ;
204- size_t model_execute_token_size_ = 0 ;
205- size_t max_seq_len_ = 0 ;
206- size_t max_context_seq_len_ = 0 ;
207- size_t max_reuse_length_ = 0 ;
208- size_t cum_context_seq_len_ = 0 ;
209- size_t multimodal_features_len_ = 0 ;
210- size_t total_score_batch_size_ = 0 ;
211- bool has_multimodal_input_ = false ;
212- bool gen_timeline_ = false ;
213- bool is_fake_stream_ = false ;
209+ size_t total_sampler_batch_size_in_ = 0 ;
210+ size_t total_sampler_batch_size_out_ = 0 ;
211+ size_t total_decode_batch_size_ = 0 ;
212+ size_t total_context_batch_size_ = 0 ;
213+ size_t decode_block_update_copy_num_ = 0 ;
214+ size_t context_block_update_copy_num_ = 0 ;
215+ size_t max_blocks_num_ = 0 ;
216+ size_t model_execute_token_size_ = 0 ;
217+ size_t max_seq_len_ = 0 ;
218+ size_t max_context_seq_len_ = 0 ;
219+ size_t max_reuse_length_ = 0 ;
220+ size_t cum_context_seq_len_ = 0 ;
221+ size_t multimodal_features_len_ = 0 ;
222+ size_t total_score_batch_size_ = 0 ;
223+ bool has_multimodal_input_ = false ;
224+ bool gen_timeline_ = false ;
225+ bool is_fake_stream_ = false ;
214226 std::list<std::string> adapter_names;
215227};
216228
0 commit comments