@@ -209,8 +209,8 @@ def _if_session_stale(self, session: Session,
209209 epoch = session .epoch
210210 if epoch is None or epoch == self .epoch :
211211 return None
212- logger .info (
213- f'[generate] session { session . session_id } dropped (session.epoch={ epoch } , epoch={ self .epoch } )' )
212+ logger .info (f'[generate] drop stale session { session . session_id } '
213+ f' (session.epoch={ epoch } , async_engine. epoch={ self .epoch } )' )
214214 return GenOut (response = '' ,
215215 history_token_len = session .step ,
216216 input_token_len = input_token_len ,
@@ -241,15 +241,15 @@ def prepare_sleep(self):
241241 self .sleeping_tags = {'weights' , 'kv_cache' }
242242 self .is_sleeping = True
243243
244- def sleep (self , level : int = 1 ):
244+ async def sleep (self , level : int = 1 ):
245245 """Sleep the model.
246246
247247 Args:
248248 level (int): The sleep level. Level 1 sleep will offload the model
249249 weights and discard the kv cache. Level 2 sleep will
250250 discard both the model weights and the kv cache.
251251 """
252- self .engine .sleep (level )
252+ await self .engine .sleep (level )
253253 self .sleeping_tags = {'weights' , 'kv_cache' }
254254 self .is_sleeping = True
255255
@@ -460,7 +460,7 @@ def is_error(status):
460460 async with session .request_handle () as handle :
461461 if session .epoch is not None and session .epoch != self .epoch :
462462 logger .info (f'[generate] session { session_id } got aborted before starting inference, '
463- f'session.epoch={ session .epoch } , epoch={ self .epoch } ' )
463+ f'session.epoch={ session .epoch } , async_engine. epoch={ self .epoch } ' )
464464 metrics_processor .increase_failed_requests ('abort' )
465465 yield GenOut (response = '' ,
466466 history_token_len = 0 ,
0 commit comments