@@ -330,7 +330,7 @@ def get_endpoint_info(self) -> List[EndpointInfo]:
330330
331331 async def initialize_client_sessions (self ) -> None :
332332 """
333- Initialize aiohttp ClientSession objects for prefill and decode endpoints.
333+ Initialize aiohttp client sessions for prefill and decode endpoints.
334334 This must be called from an async context during app startup.
335335 """
336336 if (
@@ -739,18 +739,22 @@ def _add_engine(
739739 # Store model information in the endpoint info
740740 self .available_engines [engine_name ].model_info = model_info
741741
742- if self .event_loop_ready .is_set () and self .event_loop is not None :
743- try :
742+ # Initialize client sessions only if event_loop is available
743+ try :
744+ if hasattr (self .app .state , "event_loop" ) and self .app .state .event_loop :
744745 fut = asyncio .run_coroutine_threadsafe (
745- self .initialize_client_sessions (),
746- self .event_loop ,
746+ self .initialize_client_sessions (), self .app .state .event_loop
747747 )
748748 fut .result ()
749- except Exception as e :
750- logger .error (f"Error initializing client sessions: { e } " )
751- else :
752- logger .debug (
753- "Event loop not ready; deferring client session initialization"
749+ logger .info ("Client sessions initialized successfully in _add_engine" )
750+ else :
751+ # Event loop not ready yet, client sessions will be initialized in lifespan
752+ logger .debug (
753+ "Event loop not ready in _add_engine, client sessions will be initialized later"
754+ )
755+ except Exception as e :
756+ logger .error (
757+ f"Error initializing client sessions in _add_engine: { e } " , exc_info = True
754758 )
755759
756760 # Track all models we've ever seen
@@ -833,35 +837,63 @@ def close(self):
833837
834838 async def initialize_client_sessions (self ) -> None :
835839 """
836- Initialize aiohttp ClientSession objects for prefill and decode endpoints.
840+ Initialize aiohttp client sessions for prefill and decode endpoints.
837841 This must be called from an async context during app startup.
838842 """
843+ logger .info (
844+ f"initialize_client_sessions called. prefill_model_labels={ self .prefill_model_labels } , decode_model_labels={ self .decode_model_labels } "
845+ )
839846 if (
840847 self .prefill_model_labels is not None
841848 and self .decode_model_labels is not None
842849 ):
843850 endpoint_infos = self .get_endpoint_info ()
851+ logger .info (f"Got { len (endpoint_infos )} endpoints" )
844852 for endpoint_info in endpoint_infos :
853+ logger .info (
854+ f"Checking endpoint: url={ endpoint_info .url } , model_label={ endpoint_info .model_label } "
855+ )
845856 if endpoint_info .model_label in self .prefill_model_labels :
846857 if (
847858 hasattr (self .app .state , "prefill_client" )
848859 and self .app .state .prefill_client is not None
849860 ):
850- await self .app .state .prefill_client .close ()
851- self .app .state .prefill_client = aiohttp .ClientSession (
852- base_url = endpoint_info .url ,
853- timeout = aiohttp .ClientTimeout (total = None ),
854- )
861+ # Session already initialised; skip to avoid disrupting
862+ # in-flight requests. xPyD (multiple prefill nodes) is
863+ # not supported in this PR — only the first discovered
864+ # prefill endpoint is used.
865+ logger .debug (
866+ f"prefill_client already set, skipping { endpoint_info .url } "
867+ )
868+ else :
869+ self .app .state .prefill_client = aiohttp .ClientSession (
870+ base_url = endpoint_info .url ,
871+ timeout = aiohttp .ClientTimeout (total = None ),
872+ )
873+ logger .info (
874+ f"Created prefill_client for { endpoint_info .url } with timeout=None"
875+ )
876+
855877 elif endpoint_info .model_label in self .decode_model_labels :
856878 if (
857879 hasattr (self .app .state , "decode_client" )
858880 and self .app .state .decode_client is not None
859881 ):
860- await self .app .state .decode_client .close ()
861- self .app .state .decode_client = aiohttp .ClientSession (
862- base_url = endpoint_info .url ,
863- timeout = aiohttp .ClientTimeout (total = None ),
864- )
882+ logger .debug (
883+ f"decode_client already set, skipping { endpoint_info .url } "
884+ )
885+ else :
886+ self .app .state .decode_client = aiohttp .ClientSession (
887+ base_url = endpoint_info .url ,
888+ timeout = aiohttp .ClientTimeout (total = None ),
889+ )
890+ logger .info (
891+ f"Created decode_client for { endpoint_info .url } with timeout=None"
892+ )
893+ else :
894+ logger .warning (
895+ "prefill_model_labels or decode_model_labels is None, skipping client session initialization"
896+ )
865897
866898 def has_ever_seen_model (self , model_name : str ) -> bool :
867899 """Check if we've ever seen this model, even if currently scaled to zero."""
@@ -1195,6 +1227,21 @@ def _add_engine(self, engine_name: str, model_names: List[str], model_label: str
11951227 # Store model information in the endpoint info
11961228 self .available_engines [engine_name ].model_info = model_info
11971229
1230+ try :
1231+ # Only initialize client sessions if event_loop is available
1232+ if hasattr (self .app .state , "event_loop" ) and self .app .state .event_loop :
1233+ fut = asyncio .run_coroutine_threadsafe (
1234+ self .initialize_client_sessions (), self .app .state .event_loop
1235+ )
1236+ fut .result ()
1237+ else :
1238+ # Event loop not ready yet, client sessions will be initialized in lifespan
1239+ logger .debug (
1240+ "Event loop not ready, client sessions will be initialized later"
1241+ )
1242+ except Exception as e :
1243+ logger .error (f"Error initializing client sessions: { e } " )
1244+
11981245 def _delete_engine (self , engine_name : str ):
11991246 logger .info (f"Serving engine { engine_name } is deleted" )
12001247 with self .available_engines_lock :
@@ -1270,25 +1317,58 @@ def close(self):
12701317
12711318 async def initialize_client_sessions (self ) -> None :
12721319 """
1273- Initialize aiohttp ClientSession objects for prefill and decode endpoints.
1320+ Initialize aiohttp client sessions for prefill and decode endpoints.
12741321 This must be called from an async context during app startup.
12751322 """
1323+ logger .info (
1324+ f"K8sServiceNameServiceDiscovery.initialize_client_sessions called. prefill_model_labels={ self .prefill_model_labels } , decode_model_labels={ self .decode_model_labels } "
1325+ )
12761326 if (
12771327 self .prefill_model_labels is not None
12781328 and self .decode_model_labels is not None
12791329 ):
12801330 endpoint_infos = self .get_endpoint_info ()
1331+ logger .info (f"Got { len (endpoint_infos )} endpoints" )
12811332 for endpoint_info in endpoint_infos :
1333+ logger .info (
1334+ f"Checking endpoint: url={ endpoint_info .url } , model_label={ endpoint_info .model_label } "
1335+ )
12821336 if endpoint_info .model_label in self .prefill_model_labels :
1283- self .app .state .prefill_client = aiohttp .ClientSession (
1284- base_url = endpoint_info .url ,
1285- timeout = aiohttp .ClientTimeout (total = None ),
1286- )
1337+ if (
1338+ hasattr (self .app .state , "prefill_client" )
1339+ and self .app .state .prefill_client is not None
1340+ ):
1341+ logger .debug (
1342+ f"prefill_client already set, skipping { endpoint_info .url } "
1343+ )
1344+ else :
1345+ self .app .state .prefill_client = aiohttp .ClientSession (
1346+ base_url = endpoint_info .url ,
1347+ timeout = aiohttp .ClientTimeout (total = None ),
1348+ )
1349+ logger .info (
1350+ f"Created prefill_client for { endpoint_info .url } with timeout=None"
1351+ )
12871352 elif endpoint_info .model_label in self .decode_model_labels :
1288- self .app .state .decode_client = aiohttp .ClientSession (
1289- base_url = endpoint_info .url ,
1290- timeout = aiohttp .ClientTimeout (total = None ),
1291- )
1353+ if (
1354+ hasattr (self .app .state , "decode_client" )
1355+ and self .app .state .decode_client is not None
1356+ ):
1357+ logger .debug (
1358+ f"decode_client already set, skipping { endpoint_info .url } "
1359+ )
1360+ else :
1361+ self .app .state .decode_client = aiohttp .ClientSession (
1362+ base_url = endpoint_info .url ,
1363+ timeout = aiohttp .ClientTimeout (total = None ),
1364+ )
1365+ logger .info (
1366+ f"Created decode_client for { endpoint_info .url } with timeout=None"
1367+ )
1368+ else :
1369+ logger .warning (
1370+ "K8sServiceNameServiceDiscovery: prefill_model_labels or decode_model_labels is None, skipping client session initialization"
1371+ )
12921372
12931373
12941374def _create_service_discovery (
0 commit comments