@@ -209,21 +209,47 @@ async def _run_container(self) -> None:
209209 raise WorkerError (f"Failed to start Docker container: { stderr .strip ()} " )
210210
211211 async def _wait_for_agent (self ) -> None :
212+ """Block until the guest agent is actually answering on the wire.
213+
214+ A bare TCP ``open_connection`` is not sufficient on Linux: when
215+ a container port is published, ``docker-proxy`` accepts host
216+ connections *before* the in-container process is listening,
217+ which causes the very first request to race with agent startup
218+ and come back as ``IncompleteReadError``. Performing an actual
219+ ping/pong exchange guarantees end-to-end readiness.
220+ """
212221 assert self ._host_port is not None
213- for _ in range (int (self .boot_timeout )):
222+ deadline = asyncio .get_running_loop ().time () + self .boot_timeout
223+ last_err : Exception | None = None
224+ while asyncio .get_running_loop ().time () < deadline :
214225 try :
215- _r , _w = await asyncio .wait_for (
226+ reader , writer = await asyncio .wait_for (
216227 asyncio .open_connection ("127.0.0.1" , self ._host_port ),
217228 timeout = 2.0 ,
218229 )
219- _w .close ()
220- with contextlib .suppress (ConnectionError , OSError ):
221- await _w .wait_closed ()
222- return
223- except (OSError , asyncio .TimeoutError ):
224- await asyncio .sleep (1 )
230+ try :
231+ await asyncio .wait_for (
232+ async_send (writer , {"op" : "ping" }), timeout = 2.0 ,
233+ )
234+ msg = await asyncio .wait_for (async_recv (reader ), timeout = 2.0 )
235+ finally :
236+ writer .close ()
237+ with contextlib .suppress (ConnectionError , OSError ):
238+ await writer .wait_closed ()
239+ if msg .get ("status" ) == "pong" :
240+ return
241+ last_err = WorkerError (f"Unexpected handshake reply: { msg !r} " )
242+ except (
243+ OSError ,
244+ asyncio .TimeoutError ,
245+ asyncio .IncompleteReadError ,
246+ ConnectionError ,
247+ ) as exc :
248+ last_err = exc
249+ await asyncio .sleep (0.5 )
225250 raise WorkerError (
226- f"Docker guest agent did not become reachable within { self .boot_timeout } s"
251+ f"Docker guest agent did not become reachable within "
252+ f"{ self .boot_timeout } s: { last_err !r} "
227253 )
228254
229255 async def _connect (self ) -> None :
0 commit comments