@@ -280,17 +280,31 @@ async def very_slow(n: int) -> int:
280280
281281class TestRetryAndTimeout :
282282 async def test_retry_on_failure (self , worker : subprocess .Popen [bytes ]) -> None :
283- @trace (retries = 3 , retry_delay = 0.1 )
284- def sometimes_fails () -> str :
285- import random
286- random .seed () # re-seed each call
287- if random .random () < 0.3 :
288- raise RuntimeError ("transient" )
289- return "ok"
283+ # Deterministic retry test: a counter file local to the worker
284+ # process records how many attempts have been made. The first
285+ # two raise; the third succeeds. This avoids RNG flakes while
286+ # still exercising the full retry path. The path lives on
287+ # whichever filesystem actually runs the function -- the host
288+ # worker's /tmp, or the sandbox container's /tmp.
289+ import uuid
290+ counter_path = f"/tmp/pyfuse-retry-{ uuid .uuid4 ().hex } .txt"
290291
291- # With 3 retries, at least one attempt should succeed (very high probability)
292- result = await sometimes_fails .run ()
293- assert result == "ok"
292+ @trace (retries = 3 , retry_delay = 0.1 )
293+ def fails_then_succeeds (path : str , fail_until : int ) -> str :
294+ import os
295+ n = 0
296+ if os .path .exists (path ):
297+ with open (path ) as f :
298+ n = int (f .read () or "0" )
299+ n += 1
300+ with open (path , "w" ) as f :
301+ f .write (str (n ))
302+ if n <= fail_until :
303+ raise RuntimeError (f"transient (attempt { n } )" )
304+ return f"ok after { n } attempts"
305+
306+ result = await fails_then_succeeds .run (counter_path , 2 )
307+ assert result == "ok after 3 attempts"
294308
295309
296310class TestScheduling :
0 commit comments