Skip to content

Commit 7daa4e9

Browse files
committed
fix: report total paused reqs instead of only last dp_index
The loop over dp_index was repeatedly overwriting the same lightllm_batch_pause_size metric, so only the last dp_index value was reported. Use _get_paused_req_num() which sums across all dp indices.
1 parent fbe5f21 commit 7daa4e9

File tree

1 file changed

+1
-3
lines changed

1 file changed

+1
-3
lines changed

lightllm/server/router/manager.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -249,9 +249,7 @@ async def loop_for_fwd(
249249
# Count output tokens (each running req produces ~1 token per decode step)
250250
self.status_reporter.count_output_tokens(len(self.running_batch.reqs))
251251
if counter_count % 100 == 0:
252-
for dp_index in range(self.dp_size_in_node):
253-
paused_req_num = self._get_paused_req_num_in_dp_index(dp_index=dp_index)
254-
self.metric_client.gauge_set("lightllm_batch_pause_size", paused_req_num)
252+
self.metric_client.gauge_set("lightllm_batch_pause_size", self._get_paused_req_num())
255253
# pd decode mode need to update token_load more frequently
256254
self.req_queue.update_token_load(self.running_batch, force_update=self.is_pd_decode_mode)
257255
self.metric_client.gauge_set("lightllm_batch_current_size", len(self.running_batch.reqs))

0 commit comments

Comments
 (0)