@@ -711,15 +711,23 @@ def build_connector_meta(
711711 for request_id in scheduler_output .finished_req_ids :
712712 self .requests_meta .pop (request_id , None )
713713
714- for request_id , dispatch_meta in requests_dispatch_meta .items ():
715- if len (dispatch_meta .dump_block_ids [0 ]) > 0 :
716- self ._async_dump_req_ids .add (request_id )
714+ self ._track_async_dump_requests (requests_dispatch_meta )
717715
718716 return UCMConnectorMetadata (
719717 requests_dispatch_meta ,
720718 scheduler_output .preempted_req_ids or set (),
721719 )
722720
721+ def _track_async_dump_requests (
722+ self ,
723+ requests_dispatch_meta : dict [str , RequestDispatchMeta ],
724+ ) -> None :
725+ self ._async_dump_req_ids .update (
726+ request_id
727+ for request_id , dispatch_meta in requests_dispatch_meta .items ()
728+ if len (dispatch_meta .dump_block_ids [0 ]) > 0
729+ )
730+
723731 def start_load_kv (self , forward_context : "ForwardContext" , ** kwargs ) -> None :
724732 metadata = self ._get_connector_metadata ()
725733 assert isinstance (metadata , UCMConnectorMetadata )
@@ -2660,6 +2668,8 @@ def build_connector_meta(
26602668 for request_id in scheduler_output .finished_req_ids :
26612669 self .requests_meta .pop (request_id , None )
26622670
2671+ self ._track_async_dump_requests (requests_dispatch_meta )
2672+
26632673 return UCMConnectorMetadata (
26642674 requests_dispatch_meta ,
26652675 scheduler_output .preempted_req_ids or set (),
@@ -2670,7 +2680,9 @@ def request_finished_all_groups(
26702680 request : "Request" ,
26712681 block_ids : tuple [list [int ], ...],
26722682 ) -> tuple [bool , dict [str , Any ] | None ]:
2673- return False , None
2683+ if block_ids :
2684+ return self .request_finished (request , block_ids [0 ])
2685+ return self .request_finished (request , [])
26742686
26752687
26762688def use_hybrid_linear_attention_layout (
0 commit comments