@@ -176,20 +176,10 @@ async def _add_worker_to_router(
176176 return False
177177
178178
179- async def _remove_worker_from_router (client : AsyncClient , worker_url : str ) -> bool :
179+ async def _remove_worker_from_router_by_id (
180+ client : AsyncClient , worker_id : str , * , worker_url : str
181+ ) -> bool :
180182 try :
181- current = await _get_router_workers (client )
182- worker_id = None
183- for w in current :
184- u = w .get ("url" )
185- if u and isinstance (u , str ) and u .rstrip ("/" ) == worker_url .rstrip ("/" ):
186- wid = w .get ("id" )
187- if wid and isinstance (wid , str ):
188- worker_id = wid
189- break
190- if not worker_id :
191- logger .error ("No worker id found for url %s" , worker_url )
192- return False
193183 body = await _request_json_limited (
194184 client ,
195185 "DELETE" ,
@@ -212,10 +202,16 @@ async def _update_workers_in_router_replica(
212202) -> None :
213203 current = await _get_router_workers (client )
214204 current_urls : set [str ] = set ()
205+ current_ids_by_norm_url : dict [str , str ] = {}
215206 for w in current :
216207 u = w .get ("url" )
217- if isinstance (u , str ) and u :
218- current_urls .add (_normalize_worker_url (u ))
208+ if not isinstance (u , str ) or not u :
209+ continue
210+ norm_u = _normalize_worker_url (u )
211+ current_urls .add (norm_u )
212+ wid = w .get ("id" )
213+ if isinstance (wid , str ) and wid :
214+ current_ids_by_norm_url [norm_u ] = wid
219215 target_by_norm = {_normalize_worker_url (t ["url" ]): t for t in target_workers }
220216 target_urls = set (target_by_norm .keys ())
221217 to_add = sorted (target_urls - current_urls )
@@ -231,7 +227,12 @@ async def _update_workers_in_router_replica(
231227 if not ok :
232228 logger .warning ("Failed to add worker %s, continuing with others" , tw ["url" ])
233229 for url in to_remove :
234- ok = await _remove_worker_from_router (client , url )
230+ wid = current_ids_by_norm_url .get (url )
231+ if not wid :
232+ logger .error ("No worker id found for url %s" , url )
233+ ok = False
234+ else :
235+ ok = await _remove_worker_from_router_by_id (client , wid , worker_url = url )
235236 if not ok :
236237 logger .warning ("Failed to remove worker %s, continuing with others" , url )
237238
@@ -270,9 +271,9 @@ async def _get_worker_payload(job_model: JobModel, worker_url: str) -> _WorkerPa
270271 "payload" : {"url" : worker_url , "worker_type" : "regular" },
271272 }
272273 except _ResponseTooLargeError :
273- logger .debug ("server_info response too large for worker %s" , worker_url )
274+ logger .warning ("server_info response too large for worker %s" , worker_url )
274275 except Exception as e :
275- logger .debug ("Could not fetch server_info for worker %s: %r" , worker_url , e )
276+ logger .exception ("Could not fetch server_info for worker %s: %r" , worker_url , e )
276277 return {"status" : "not_ready" , "payload" : None }
277278
278279
0 commit comments