Skip to content

Commit ff56b62

Browse files
Ambient Code Botclaude
andcommitted
fix: use status code for PERMISSION_DENIED, remove timeout floor, add tests
- Check e.code() == PERMISSION_DENIED instead of fragile string matching on e.details(), preventing false positives/negatives - Remove 0.5s floor on channel_ready_timeout to prevent overshooting dial_timeout when remaining time is small - Add test verifying channel_ready_timeout is bounded by remaining deadline - Add tests for PERMISSION_DENIED with custom details and UNAUTHENTICATED with "permission denied" text Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 4520874 commit ff56b62

2 files changed

Lines changed: 82 additions & 32 deletions

File tree

python/packages/jumpstarter/jumpstarter/client/lease.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -315,12 +315,14 @@ def __contextmanager__(self) -> Generator[Self]:
315315

316316
# DEADLINE_EXCEEDED and CANCELLED are excluded: they indicate client-side
317317
# timeout or cancellation, not server/network transients worth retrying.
318-
_TRANSIENT_GRPC_CODES = frozenset({
319-
grpc.StatusCode.UNAVAILABLE,
320-
grpc.StatusCode.RESOURCE_EXHAUSTED,
321-
grpc.StatusCode.ABORTED,
322-
grpc.StatusCode.INTERNAL,
323-
})
318+
_TRANSIENT_GRPC_CODES = frozenset(
319+
{
320+
grpc.StatusCode.UNAVAILABLE,
321+
grpc.StatusCode.RESOURCE_EXHAUSTED,
322+
grpc.StatusCode.ABORTED,
323+
grpc.StatusCode.INTERNAL,
324+
}
325+
)
324326

325327
# UNKNOWN error messages that indicate transient tunnel teardowns.
326328
# We don't blanket-retry all UNKNOWN errors (they could be permanent
@@ -333,9 +335,7 @@ def _retry_delay(attempt: int, remaining: float, base: float = 0.3, cap: float =
333335
"""Compute exponential-backoff delay, capped by *cap* and *remaining* time."""
334336
return min(base * (2**attempt), cap, remaining)
335337

336-
async def _dial_and_connect(
337-
self, stream: SocketStream, channel_ready_timeout: float = 10.0
338-
) -> None:
338+
async def _dial_and_connect(self, stream: SocketStream, channel_ready_timeout: float = 10.0) -> None:
339339
"""Single attempt; raises on failure for caller-driven retry."""
340340
response = await self.controller.Dial(jumpstarter_pb2.DialRequest(lease_name=self.name))
341341
async with connect_router_stream(
@@ -360,7 +360,7 @@ async def handle_async(self, stream: SocketStream) -> None:
360360
attempt = 0
361361
while True:
362362
remaining = deadline - time.monotonic()
363-
channel_ready_timeout = max(min(10.0, remaining), 0.5)
363+
channel_ready_timeout = max(min(10.0, remaining), 0)
364364
try:
365365
await self._dial_and_connect(stream, channel_ready_timeout=channel_ready_timeout)
366366
return
@@ -409,7 +409,7 @@ async def handle_async(self, stream: SocketStream) -> None:
409409
await sleep(delay)
410410
attempt += 1
411411
continue
412-
if "permission denied" in str(e.details()).lower():
412+
if e.code() == grpc.StatusCode.PERMISSION_DENIED:
413413
self.lease_transferred = True
414414
logger.warning(
415415
"Lease %s has been transferred to another client. Your session is no longer valid.",

python/packages/jumpstarter/jumpstarter/client/lease_test.py

Lines changed: 71 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -692,9 +692,7 @@ async def dial_side_effect(req):
692692
nonlocal call_count
693693
call_count += 1
694694
if call_count == 1:
695-
raise _make_aio_rpc_error(
696-
grpc.StatusCode.UNKNOWN, "watch channel closed"
697-
)
695+
raise _make_aio_rpc_error(grpc.StatusCode.UNKNOWN, "watch channel closed")
698696
return dial_response
699697

700698
lease.controller.Dial = AsyncMock(side_effect=dial_side_effect)
@@ -717,9 +715,7 @@ async def test_unknown_without_known_message_not_retried(self):
717715
lease = _make_lease_for_handle()
718716

719717
lease.controller.Dial = AsyncMock(
720-
side_effect=_make_aio_rpc_error(
721-
grpc.StatusCode.UNKNOWN, "some unexpected server bug"
722-
),
718+
side_effect=_make_aio_rpc_error(grpc.StatusCode.UNKNOWN, "some unexpected server bug"),
723719
)
724720

725721
with patch("jumpstarter.client.lease.sleep", new_callable=AsyncMock):
@@ -888,9 +884,7 @@ async def dial_side_effect(req):
888884
nonlocal call_count
889885
call_count += 1
890886
if call_count <= total_failures:
891-
raise _make_aio_rpc_error(
892-
grpc.StatusCode.UNAVAILABLE, "tunnel dropped"
893-
)
887+
raise _make_aio_rpc_error(grpc.StatusCode.UNAVAILABLE, "tunnel dropped")
894888
return dial_response
895889

896890
lease.controller.Dial = AsyncMock(side_effect=dial_side_effect)
@@ -918,10 +912,7 @@ async def fake_router(*args, **kwargs):
918912
actual_delays = [call.args[0] for call in mock_sleep.call_args_list]
919913
assert len(actual_delays) == len(expected_delays)
920914
for actual, expected in zip(actual_delays, expected_delays, strict=True):
921-
assert actual == pytest.approx(expected), (
922-
f"Expected delay {expected}, got {actual}"
923-
)
924-
915+
assert actual == pytest.approx(expected), f"Expected delay {expected}, got {actual}"
925916

926917
@pytest.mark.anyio
927918
async def test_failed_precondition_not_ready_retries_then_succeeds(self):
@@ -934,9 +925,7 @@ async def dial_side_effect(req):
934925
nonlocal call_count
935926
call_count += 1
936927
if call_count == 1:
937-
raise _make_aio_rpc_error(
938-
grpc.StatusCode.FAILED_PRECONDITION, "exporter not ready"
939-
)
928+
raise _make_aio_rpc_error(grpc.StatusCode.FAILED_PRECONDITION, "exporter not ready")
940929
return dial_response
941930

942931
lease.controller.Dial = AsyncMock(side_effect=dial_side_effect)
@@ -960,9 +949,7 @@ async def test_failed_precondition_returns_after_timeout(self):
960949
lease.dial_timeout = 0.0 # already expired
961950

962951
lease.controller.Dial = AsyncMock(
963-
side_effect=_make_aio_rpc_error(
964-
grpc.StatusCode.FAILED_PRECONDITION, "exporter not ready"
965-
),
952+
side_effect=_make_aio_rpc_error(grpc.StatusCode.FAILED_PRECONDITION, "exporter not ready"),
966953
)
967954

968955
with patch("jumpstarter.client.lease.sleep", new_callable=AsyncMock):
@@ -977,16 +964,79 @@ async def test_permission_denied_sets_lease_transferred(self):
977964
lease = _make_lease_for_handle()
978965
assert lease.lease_transferred is False
979966

967+
lease.controller.Dial = AsyncMock(
968+
side_effect=_make_aio_rpc_error(grpc.StatusCode.PERMISSION_DENIED, "permission denied"),
969+
)
970+
971+
with patch("jumpstarter.client.lease.sleep", new_callable=AsyncMock):
972+
await lease.handle_async(Mock())
973+
974+
assert lease.lease_transferred is True
975+
976+
@pytest.mark.anyio
977+
async def test_permission_denied_with_custom_details_still_detected(self):
978+
"""PERMISSION_DENIED with non-standard detail text should still set lease_transferred."""
979+
lease = _make_lease_for_handle()
980+
assert lease.lease_transferred is False
981+
982+
lease.controller.Dial = AsyncMock(
983+
side_effect=_make_aio_rpc_error(grpc.StatusCode.PERMISSION_DENIED, "lease reassigned to another client"),
984+
)
985+
986+
with patch("jumpstarter.client.lease.sleep", new_callable=AsyncMock):
987+
await lease.handle_async(Mock())
988+
989+
assert lease.lease_transferred is True
990+
991+
@pytest.mark.anyio
992+
async def test_unauthenticated_with_permission_text_does_not_set_transferred(self):
993+
"""UNAUTHENTICATED with 'permission denied' in details should NOT set lease_transferred."""
994+
lease = _make_lease_for_handle()
995+
assert lease.lease_transferred is False
996+
980997
lease.controller.Dial = AsyncMock(
981998
side_effect=_make_aio_rpc_error(
982-
grpc.StatusCode.PERMISSION_DENIED, "permission denied"
999+
grpc.StatusCode.UNAUTHENTICATED,
1000+
"permission denied: token expired",
9831001
),
9841002
)
9851003

9861004
with patch("jumpstarter.client.lease.sleep", new_callable=AsyncMock):
9871005
await lease.handle_async(Mock())
9881006

989-
assert lease.lease_transferred is True
1007+
assert lease.lease_transferred is False
1008+
1009+
@pytest.mark.anyio
1010+
async def test_channel_ready_timeout_bounded_by_remaining(self):
1011+
"""channel_ready_timeout should decrease as the dial deadline approaches."""
1012+
lease = _make_lease_for_handle()
1013+
lease.dial_timeout = 3.0
1014+
1015+
call_count = 0
1016+
captured_timeouts = []
1017+
1018+
async def tracking_dial_and_connect(self_inner, stream, channel_ready_timeout=10.0):
1019+
nonlocal call_count
1020+
call_count += 1
1021+
captured_timeouts.append(channel_ready_timeout)
1022+
if call_count <= 3:
1023+
raise _make_aio_rpc_error(grpc.StatusCode.FAILED_PRECONDITION, "exporter not ready")
1024+
# Succeed on 4th attempt (won't normally reach here with 3s timeout)
1025+
1026+
with (
1027+
patch.object(type(lease), "_dial_and_connect", tracking_dial_and_connect),
1028+
patch("jumpstarter.client.lease.sleep", new_callable=AsyncMock),
1029+
):
1030+
await lease.handle_async(Mock())
1031+
1032+
# With a 3s dial_timeout, the first call should have channel_ready_timeout <= 3.0
1033+
# and subsequent calls should have progressively smaller values
1034+
assert len(captured_timeouts) >= 2
1035+
assert all(t <= 10.0 for t in captured_timeouts), f"All timeouts should be <= 10.0, got {captured_timeouts}"
1036+
# The first timeout should be bounded by remaining (~3.0), not the default 10.0
1037+
assert captured_timeouts[0] <= 3.1, (
1038+
f"First timeout should be bounded by dial_timeout (~3.0), got {captured_timeouts[0]}"
1039+
)
9901040

9911041

9921042
class TestRetryDelay:

0 commit comments

Comments
 (0)