Skip to content

Commit 7159a21

Browse files
raballewclaude
andauthored
fix: guard beforeLease hook from setting LEASE_READY after lease expiry (#655)
When a lease expires before the beforeLease hook completes, the hook's completion would set status to LEASE_READY after cleanup already set AVAILABLE, permanently stalling the exporter. Add a guard that checks lease_ended before the LEASE_READY transition and skips it if the lease has already ended. Fixes #235 --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent e6a7540 commit 7159a21

3 files changed

Lines changed: 148 additions & 0 deletions

File tree

python/packages/jumpstarter/jumpstarter/exporter/exporter_test.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,51 @@ def _make_exporter_for_report_status():
442442
return exporter
443443

444444

445+
class TestBeforeLeaseHookRaceGuard:
446+
async def test_new_lease_after_before_hook_race_recovery(self):
447+
"""After recovering from the beforeLease hook race condition
448+
(lease expired during hook), a new lease must be accepted and
449+
processed normally."""
450+
from jumpstarter.config.exporter import HookConfigV1Alpha1, HookInstanceConfigV1Alpha1
451+
from jumpstarter.exporter.hooks import HookExecutor
452+
453+
hook_config = HookConfigV1Alpha1(
454+
before_lease=HookInstanceConfigV1Alpha1(script="echo setup", timeout=10),
455+
)
456+
hook_executor = HookExecutor(config=hook_config)
457+
458+
lease_ctx_1 = make_lease_context(lease_name="expired-lease")
459+
lease_ctx_1.lease_ended.set()
460+
461+
statuses = []
462+
463+
async def track_status(status, message=""):
464+
statuses.append(status)
465+
466+
exporter = make_exporter(lease_ctx_1, hook_executor)
467+
exporter._report_status = AsyncMock(side_effect=track_status)
468+
469+
await hook_executor.run_before_lease_hook(
470+
lease_ctx_1, exporter._report_status, exporter.stop, exporter._request_lease_release
471+
)
472+
473+
assert lease_ctx_1.before_lease_hook.is_set()
474+
await exporter._cleanup_after_lease(lease_ctx_1)
475+
assert ExporterStatus.AVAILABLE in statuses
476+
477+
lease_ctx_2 = make_lease_context(lease_name="new-lease")
478+
exporter._lease_context = lease_ctx_2
479+
480+
statuses.clear()
481+
await hook_executor.run_before_lease_hook(
482+
lease_ctx_2, exporter._report_status, exporter.stop, exporter._request_lease_release
483+
)
484+
485+
assert ExporterStatus.LEASE_READY in statuses, (
486+
f"New lease must reach LEASE_READY when lease is still active. Statuses: {statuses}"
487+
)
488+
489+
445490
class TestReportStatusGrpcErrorHandling:
446491
async def test_unimplemented_grpc_error_logs_warning(self, caplog):
447492
"""When ReportStatus returns UNIMPLEMENTED, a warning is logged

python/packages/jumpstarter/jumpstarter/exporter/hooks.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,13 @@ async def run_before_lease_hook(
615615
LogSource.BEFORE_LEASE_HOOK,
616616
)
617617

618+
if lease_scope.lease_ended.is_set():
619+
logger.info(
620+
"Lease %s ended during beforeLease hook, skipping LEASE_READY transition",
621+
lease_scope.lease_name,
622+
)
623+
return
624+
618625
if warning:
619626
msg = f"{HOOK_WARNING_PREFIX}beforeLease hook warning: {warning}"
620627
else:

python/packages/jumpstarter/jumpstarter/exporter/hooks_test.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1178,3 +1178,99 @@ async def mock_report_status(status, msg):
11781178
assert msg.startswith(HOOK_WARNING_PREFIX), (
11791179
f"Expected AVAILABLE message to start with '{HOOK_WARNING_PREFIX}', got: '{msg}'"
11801180
)
1181+
1182+
1183+
class TestBeforeLeaseHookLeaseEndedGuard:
1184+
"""Tests for the race condition where beforeLease hook completes after
1185+
the lease has already expired. When lease_ended is set, the hook must
1186+
NOT set status to LEASE_READY, preventing the exporter from being
1187+
stuck in LEASE_READY permanently."""
1188+
1189+
async def test_run_before_lease_hook_skips_lease_ready_when_lease_ended(self, lease_scope) -> None:
1190+
"""When the lease has already ended by the time the beforeLease hook
1191+
completes, status must NOT be set to LEASE_READY."""
1192+
hook_config = HookConfigV1Alpha1(
1193+
before_lease=HookInstanceConfigV1Alpha1(script="echo setup", timeout=10),
1194+
)
1195+
executor = HookExecutor(config=hook_config)
1196+
1197+
lease_scope.lease_ended.set()
1198+
1199+
status_calls = []
1200+
1201+
async def mock_report_status(status, msg):
1202+
status_calls.append((status, msg))
1203+
1204+
mock_shutdown = MagicMock()
1205+
1206+
await executor.run_before_lease_hook(
1207+
lease_scope,
1208+
mock_report_status,
1209+
mock_shutdown,
1210+
)
1211+
1212+
lease_ready_calls = [s for s, _ in status_calls if s == ExporterStatus.LEASE_READY]
1213+
assert len(lease_ready_calls) == 0, (
1214+
f"LEASE_READY must NOT be set when lease has already ended, got: {status_calls}"
1215+
)
1216+
1217+
hook_started_calls = [s for s, _ in status_calls if s == ExporterStatus.BEFORE_LEASE_HOOK]
1218+
assert len(hook_started_calls) == 1, (
1219+
f"BEFORE_LEASE_HOOK must be reported (hook must run) even when lease has ended, got: {status_calls}"
1220+
)
1221+
1222+
async def test_run_before_lease_hook_sets_event_even_when_lease_ended(self, lease_scope) -> None:
1223+
"""The before_lease_hook event must always be set (via the finally block)
1224+
even when the lease has ended, to unblock downstream waiters."""
1225+
hook_config = HookConfigV1Alpha1(
1226+
before_lease=HookInstanceConfigV1Alpha1(script="echo setup", timeout=10),
1227+
)
1228+
executor = HookExecutor(config=hook_config)
1229+
1230+
lease_scope.lease_ended.set()
1231+
1232+
mock_report_status = AsyncMock()
1233+
mock_shutdown = MagicMock()
1234+
1235+
await executor.run_before_lease_hook(
1236+
lease_scope,
1237+
mock_report_status,
1238+
mock_shutdown,
1239+
)
1240+
1241+
assert lease_scope.before_lease_hook.is_set(), (
1242+
"before_lease_hook event must be set even when lease has ended"
1243+
)
1244+
1245+
async def test_run_before_lease_hook_warn_skips_lease_ready_when_lease_ended(self, lease_scope) -> None:
1246+
"""When hook fails with on_failure=warn and the lease has already ended,
1247+
LEASE_READY must still be skipped."""
1248+
hook_config = HookConfigV1Alpha1(
1249+
before_lease=HookInstanceConfigV1Alpha1(script="exit 1", timeout=10, on_failure="warn"),
1250+
)
1251+
executor = HookExecutor(config=hook_config)
1252+
1253+
lease_scope.lease_ended.set()
1254+
1255+
status_calls = []
1256+
1257+
async def mock_report_status(status, msg):
1258+
status_calls.append((status, msg))
1259+
1260+
mock_shutdown = MagicMock()
1261+
1262+
await executor.run_before_lease_hook(
1263+
lease_scope,
1264+
mock_report_status,
1265+
mock_shutdown,
1266+
)
1267+
1268+
lease_ready_calls = [s for s, _ in status_calls if s == ExporterStatus.LEASE_READY]
1269+
assert len(lease_ready_calls) == 0, (
1270+
f"LEASE_READY must NOT be set when lease has ended (even with warn), got: {status_calls}"
1271+
)
1272+
1273+
hook_started_calls = [s for s, _ in status_calls if s == ExporterStatus.BEFORE_LEASE_HOOK]
1274+
assert len(hook_started_calls) == 1, (
1275+
f"BEFORE_LEASE_HOOK must be reported (hook must run) even when lease has ended, got: {status_calls}"
1276+
)

0 commit comments

Comments
 (0)