fix: report total paused reqs instead of only last dp_index

sufubao · sufubao · commit 7daa4e99f9bc · 2026-03-20T04:25:05.000Z
The loop over dp_index was repeatedly overwriting the same
lightllm_batch_pause_size metric, so only the last dp_index value
was reported. Use _get_paused_req_num() which sums across all dp
indices.
diff --git a/lightllm/server/router/manager.py b/lightllm/server/router/manager.py
@@ -249,9 +249,7 @@ async def loop_for_fwd(
                 # Count output tokens (each running req produces ~1 token per decode step)
                 self.status_reporter.count_output_tokens(len(self.running_batch.reqs))
                 if counter_count % 100 == 0:
-                    for dp_index in range(self.dp_size_in_node):
-                        paused_req_num = self._get_paused_req_num_in_dp_index(dp_index=dp_index)
-                        self.metric_client.gauge_set("lightllm_batch_pause_size", paused_req_num)
+                    self.metric_client.gauge_set("lightllm_batch_pause_size", self._get_paused_req_num())
                 # pd decode mode need to update token_load more frequently
                 self.req_queue.update_token_load(self.running_batch, force_update=self.is_pd_decode_mode)
                 self.metric_client.gauge_set("lightllm_batch_current_size", len(self.running_batch.reqs))