@@ -162,6 +162,9 @@ def _allocated_gpu_count(self, model: Model) -> int:
162162 def __enter__ (self ) -> Self :
163163 return self
164164
165+ async def __aenter__ (self ) -> Self :
166+ return self
167+
165168 def __exit__ (
166169 self ,
167170 exc_type : type [BaseException ] | None ,
@@ -170,11 +173,19 @@ def __exit__(
170173 ) -> None :
171174 self ._close ()
172175
176+ async def __aexit__ (
177+ self ,
178+ exc_type : type [BaseException ] | None ,
179+ exc : BaseException | None ,
180+ tb : TracebackType | None ,
181+ ) -> None :
182+ await self .close ()
183+
173184 async def close (self ) -> None :
174185 """
175186 If running vLLM in a separate process, this will kill that process and close the communication threads.
176187 """
177- self ._close ()
188+ await self ._aclose ()
178189
179190 def _close (self ) -> None :
180191 for _ , service in self ._services .items ():
@@ -183,6 +194,17 @@ def _close(self) -> None:
183194 close ()
184195 close_proxy (service )
185196
197+ async def _aclose (self ) -> None :
198+ for _ , service in self ._services .items ():
199+ aclose = getattr (service , "aclose" , None )
200+ if aclose is not None :
201+ await aclose ()
202+ else :
203+ close = getattr (service , "close" , None )
204+ if close is not None :
205+ close ()
206+ close_proxy (service )
207+
186208 async def register (
187209 self ,
188210 model : Model ,
@@ -505,6 +527,7 @@ async def train( # type: ignore[override]
505527 * ,
506528 # Core training parameters
507529 learning_rate : float = 5e-6 ,
530+ loss_fn : Literal ["cispo" , "ppo" ] | None = None ,
508531 # KL-penalized advantage adjustment
509532 kl_penalty_coef : float = 0.0 ,
510533 kl_penalty_reference_step : int | None = None ,
@@ -600,6 +623,8 @@ async def train( # type: ignore[override]
600623 # await model.log(metrics=result.metrics, step=result.step)
601624 """
602625 groups_list = list (trajectory_groups )
626+ if loss_fn is not None :
627+ ppo = loss_fn == "ppo"
603628
604629 resolved_kl_ref_adapter_path = kl_ref_adapter_path
605630 if (
0 commit comments