@@ -187,6 +187,9 @@ async def stream_inference_response(
187187
188188 # ── PATH B: STREAMING ─────────────────────────────────────────────────
189189 # SSE connection — yield each chunk as it arrives.
190+ # aiter_lines() is explicitly closed in a finally block to prevent
191+ # "Task was destroyed but it is pending" warnings from dangling
192+ # aiter_raw coroutines when the generator is abandoned mid-stream.
190193 async with httpx .AsyncClient (
191194 base_url = self .base_url ,
192195 timeout = httpx .Timeout (timeout , connect = 10.0 ),
@@ -197,20 +200,23 @@ async def stream_inference_response(
197200 "POST" , "/v1/completions" , json = payload
198201 ) as response :
199202 response .raise_for_status ()
200-
201- async for line in response .aiter_lines ():
202- line = line .strip ()
203- if not line or not line .startswith ("data:" ):
204- continue
205-
206- data_str = line [len ("data:" ) :].strip ()
207- if data_str == "[DONE]" :
208- break
209-
210- try :
211- yield json .loads (data_str )
212- except json .JSONDecodeError :
213- continue
203+ aiter = response .aiter_lines ()
204+ try :
205+ async for line in aiter :
206+ line = line .strip ()
207+ if not line or not line .startswith ("data:" ):
208+ continue
209+
210+ data_str = line [len ("data:" ) :].strip ()
211+ if data_str == "[DONE]" :
212+ break
213+
214+ try :
215+ yield json .loads (data_str )
216+ except json .JSONDecodeError :
217+ continue
218+ finally :
219+ await aiter .aclose () # nosec B110 — best-effort cleanup, failure is non-fatal
214220
215221 except httpx .HTTPStatusError as e :
216222 logging_utility .error (
0 commit comments