File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -98,15 +98,27 @@ def schedule(self) -> Optional[SchedulerOutput]:
9898 )
9999 )
100100
101+ num_tokens_this_step = (
102+ req .get_prompt_length () - req .num_cached_tokens
103+ )
104+ if (
105+ current_num_batched_tokens + num_tokens_this_step
106+ >= self .max_num_batched_tokens
107+ ):
108+ if req .num_cached_tokens > 0 :
109+ self .cache_manager .free_blocks (req .block_table )
110+ req .block_table = []
111+ req .slot_mapping = []
112+ req .num_cached_tokens = 0
113+
114+ self .waiting_queue .sync_q .put (req )
115+ break
116+
117+ current_num_batched_tokens += num_tokens_this_step
101118 req .num_blocks = len (req .block_table )
102119 req .status = RequestStatus .RUNNING
103120 scheduled_requests .append (req )
104121
105- # TODO
106- # num_tokens_this_step = req.get_prompt_length() - req.num_cached_tokens
107- # current_num_batched_tokens += num_tokens_this_step
108- assert False
109-
110122 # Return prefill batch if any waiting requests were scheduled
111123 if scheduled_requests :
112124 is_prefill = True
You can’t perform that action at this time.
0 commit comments