3434
3535from verify_agents import build_agent_command , load_registry , prepare_binary
3636
37- DEFAULT_INIT_TIMEOUT = 45 .0
37+ DEFAULT_INIT_TIMEOUT = 120 .0
3838DEFAULT_RPC_TIMEOUT = 5.0
39+ DEFAULT_EXIT_GRACE = 0.25
40+ EXIT_GRACE_POLL_INTERVAL = 0.05
41+ EXIT_GRACE_REAP_SLACK = 0.05
3942DEFAULT_SANDBOX_DIR = ".matrix-sandbox"
4043DEFAULT_OUTPUT_DIR = ".protocol-matrix"
4144DEFAULT_TABLE_MODE = "full"
@@ -330,23 +333,79 @@ def send_jsonrpc_request(
330333 proc .stdin .flush ()
331334
332335
336+ def process_exit_outcome (
337+ exit_code : int ,
338+ method : str ,
339+ ) -> tuple [ProbeOutcome , dict [str , Any ] | None ]:
340+ """Build a normalized process-exit outcome for a pending request."""
341+ return (
342+ ProbeOutcome (
343+ status = "process_error" ,
344+ message = f"process exited with code { exit_code } before responding to { method } " ,
345+ ),
346+ None ,
347+ )
348+
349+
350+ def reconcile_timed_out_request (
351+ proc : subprocess .Popen ,
352+ request_id : int ,
353+ method : str ,
354+ exit_grace : float ,
355+ ) -> tuple [ProbeOutcome , dict [str , Any ] | None ] | None :
356+ """Reconcile a timed-out request with a near-immediate exit or late response."""
357+ if exit_grace <= 0 :
358+ return None
359+
360+ deadline = time .monotonic () + exit_grace
361+ while True :
362+ remaining = deadline - time .monotonic ()
363+ if remaining <= 0 :
364+ break
365+
366+ message = read_jsonrpc_line (proc , min (remaining , EXIT_GRACE_POLL_INTERVAL ))
367+ if message is None :
368+ exit_code = proc .poll ()
369+ if exit_code is not None :
370+ return process_exit_outcome (exit_code , method )
371+ continue
372+
373+ if "_decode_error" in message :
374+ return (
375+ ProbeOutcome (status = "decode_error" , message = message ["_decode_error" ]),
376+ None ,
377+ )
378+
379+ if message .get ("id" ) == request_id and ("result" in message or "error" in message ):
380+ return classify_rpc_response (message ), message
381+
382+ exit_code = proc .poll ()
383+ if exit_code is not None :
384+ return process_exit_outcome (exit_code , method )
385+
386+ if EXIT_GRACE_REAP_SLACK > 0 :
387+ try :
388+ exit_code = proc .wait (timeout = EXIT_GRACE_REAP_SLACK )
389+ except subprocess .TimeoutExpired :
390+ exit_code = None
391+ if exit_code is not None :
392+ return process_exit_outcome (exit_code , method )
393+
394+ return None
395+
396+
333397def request_with_timeout (
334398 proc : subprocess .Popen ,
335399 request_id : int ,
336400 method : str ,
337401 params : dict [str , Any ],
338402 timeout : float ,
403+ exit_grace : float = DEFAULT_EXIT_GRACE ,
339404) -> tuple [ProbeOutcome , dict [str , Any ] | None ]:
340405 """Send request and wait for the response with matching id."""
341406 exit_code = proc .poll ()
342407 if exit_code is not None :
343- return (
344- ProbeOutcome (
345- status = "process_error" ,
346- message = f"process exited with code { exit_code } before { method } " ,
347- ),
348- None ,
349- )
408+ return process_exit_outcome (exit_code , method )
350409
351410 try :
352411 send_jsonrpc_request (proc , request_id , method , params )
@@ -372,15 +431,7 @@ def request_with_timeout(
372431 if message is None :
373432 exit_code = proc .poll ()
374433 if exit_code is not None :
375- return (
376- ProbeOutcome (
377- status = "process_error" ,
378- message = (
379- f"process exited with code { exit_code } before responding to { method } "
380- ),
381- ),
382- None ,
383- )
434+ return process_exit_outcome (exit_code , method )
384435 break
385436
386437 if "_decode_error" in message :
@@ -392,6 +443,10 @@ def request_with_timeout(
392443 if message .get ("id" ) == request_id and ("result" in message or "error" in message ):
393444 return classify_rpc_response (message ), message
394445
446+ reconciled = reconcile_timed_out_request (proc , request_id , method , exit_grace )
447+ if reconciled is not None :
448+ return reconciled
449+
395450 return (
396451 ProbeOutcome (status = "no_response" , message = f"timeout after { timeout :.1f} s" ),
397452 None ,
@@ -737,11 +792,22 @@ def ensure_binary_executable(cmd: list[str], dist_type: str) -> ProbeOutcome | N
737792 return None
738793
739794 exe_path = Path (cmd [0 ])
740- if not exe_path .exists () or os .access (exe_path , os .X_OK ):
795+ if not exe_path .exists ():
796+ return None
797+
798+ try :
799+ current_mode = exe_path .stat ().st_mode
800+ except OSError as exc :
801+ return ProbeOutcome (
802+ status = "process_error" ,
803+ message = short_message (f"Failed to inspect executable { exe_path } : { exc } " ),
804+ )
805+
806+ if current_mode & 0o111 :
741807 return None
742808
743809 try :
744- exe_path .chmod (exe_path . stat (). st_mode | 0o755 )
810+ exe_path .chmod (current_mode | 0o755 )
745811 except OSError as exc :
746812 return ProbeOutcome (
747813 status = "process_error" ,
0 commit comments