Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions e2e/exporters/exporter-hooks-before-fail-endLease-with-after.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
export:
power:
type: jumpstarter_driver_power.driver.MockPower
storage:
type: jumpstarter_driver_opendal.driver.MockStorageMux

hooks:
beforeLease:
script: |
echo "HOOK_FAIL_ENDLEASE: will fail and end lease"
exit 1
timeout: 60
onFailure: endLease
afterLease:
script: |
echo "AFTER_SHOULD_NOT_RUN: this proves afterLease was skipped"
timeout: 60
onFailure: warn
37 changes: 36 additions & 1 deletion e2e/test/hooks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,42 @@ var _ = Describe("Hooks E2E Tests", Label("hooks"), Ordered, func() {
WaitForExporter("test-exporter-hooks")
})

It("B3: beforeLease onFailure=exit shuts down exporter", func() {
It("B3: beforeLease onFailure=endLease releases lease and accepts new one", func() {
startHooksExporter("exporter-hooks-before-fail-endLease.yaml")

// First lease: shell should fail because beforeLease hook fails
out, err := Jmp("shell", "--client", "test-client-hooks",
"--selector", "example.com/board=hooks", "j", "power", "on")
Expect(err).To(HaveOccurred())
Expect(out).To(MatchRegexp(`(beforeLease hook fail|Connection to exporter lost)`))

// The exporter should release the lease and return to Available
WaitForExporter("test-exporter-hooks")

// Second lease: should also fail (hook still configured to fail),
// but this proves the exporter accepted a new lease after recovery
out2, err2 := Jmp("shell", "--client", "test-client-hooks",
"--selector", "example.com/board=hooks", "j", "power", "on")
Expect(err2).To(HaveOccurred())
Expect(out2).To(MatchRegexp(`(beforeLease hook fail|Connection to exporter lost)`))

// Exporter should recover again
WaitForExporter("test-exporter-hooks")
})

It("B4: beforeLease fail+endLease does NOT run afterLease hook", func() {
startHooksExporter("exporter-hooks-before-fail-endLease-with-after.yaml")

out, err := Jmp("shell", "--client", "test-client-hooks",
"--selector", "example.com/board=hooks", "j", "power", "on")
Expect(err).To(HaveOccurred())
Expect(out).NotTo(ContainSubstring("AFTER_SHOULD_NOT_RUN"))

// Exporter should still be alive and return to Available
WaitForExporter("test-exporter-hooks")
})
Comment thread
raballew marked this conversation as resolved.

It("B5: beforeLease onFailure=exit shuts down exporter", func() {
startHooksExporterSingle("exporter-hooks-before-fail-exit.yaml")

out, err := Jmp("shell", "--client", "test-client-hooks",
Expand Down
26 changes: 24 additions & 2 deletions python/packages/jumpstarter/jumpstarter/exporter/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import TYPE_CHECKING, Callable, Literal

import anyio
from anyio import CancelScope

from jumpstarter.common import HOOK_WARNING_PREFIX, ExporterStatus, LogSource
from jumpstarter.config.env import JMP_DRIVERS_ALLOW, JUMPSTARTER_HOST
Expand Down Expand Up @@ -586,6 +587,17 @@ async def execute_after_lease_hook(self, lease_scope: "LeaseContext") -> str | N
LogSource.AFTER_LEASE_HOOK,
)

async def _safe_release_lease(
self,
request_lease_release: Callable[[], Awaitable[None]] | None,
) -> None:
"""Call request_lease_release if provided, logging any errors."""
if request_lease_release:
try:
await request_lease_release()
except Exception as e:
logger.error("Failed to request lease release: %s", e, exc_info=True)
Comment thread
raballew marked this conversation as resolved.

async def run_before_lease_hook(
self,
lease_scope: "LeaseContext",
Expand All @@ -608,6 +620,7 @@ async def run_before_lease_hook(
shutdown: Callback to trigger exporter shutdown (accepts optional exit_code kwarg)
request_lease_release: Async callback to request lease release from controller
"""
should_release = False
try:
# Wait for lease scope to be fully populated by handle_lease
# This is necessary because handle_lease and run_before_lease_hook run concurrently
Expand Down Expand Up @@ -670,13 +683,14 @@ async def run_before_lease_hook(
# Defer shutdown: sets _stop_requested=True, actual stop after lease cleanup
shutdown(exit_code=1, wait_for_lease_exit=True, should_unregister=True)
else:
# on_failure='endLease' - report failure, session stays alive for client
# on_failure='endLease' - report failure, release in finally block
logger.error("beforeLease hook failed with on_failure='endLease': %s", e)
lease_scope.skip_after_lease_hook = True
should_release = True
await report_status(
ExporterStatus.BEFORE_LEASE_HOOK_FAILED,
f"beforeLease hook failed (on_failure=endLease): {e}",
)
# No request_lease_release - client will discover failure and release

except Exception as e:
logger.error("beforeLease hook failed with unexpected error: %s", e, exc_info=True)
Expand All @@ -690,6 +704,14 @@ async def run_before_lease_hook(
# Always set the event to unblock connections
lease_scope.before_lease_hook.set()

# Release lease for endLease failure mode.
# Shielded from cancellation to ensure the release completes
# even if the task group is being torn down.
if should_release:
with CancelScope(shield=True):
await anyio.sleep(1.0)
await self._safe_release_lease(request_lease_release)

async def run_after_lease_hook(
self,
lease_scope: "LeaseContext",
Expand Down
32 changes: 29 additions & 3 deletions python/packages/jumpstarter/jumpstarter/exporter/hooks_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,23 +505,49 @@ async def test_before_lease_hook_exit_sets_skip_flag(self, lease_scope) -> None:
assert lease_scope.skip_after_lease_hook is True
mock_shutdown.assert_called_once_with(exit_code=1, wait_for_lease_exit=True, should_unregister=True)

async def test_before_lease_hook_endlease_does_not_set_skip_flag(self, lease_scope) -> None:
"""Test that beforeLease hook failure with on_failure=endLease does NOT set skip_after_lease_hook."""
async def test_before_lease_hook_endlease_sets_skip_flag_and_releases_lease(self, lease_scope) -> None:
"""Test that beforeLease hook failure with on_failure=endLease sets skip_after_lease_hook and releases lease."""
hook_config = HookConfigV1Alpha1(
before_lease=HookInstanceConfigV1Alpha1(script="exit 1", timeout=10, on_failure="endLease"),
)
executor = HookExecutor(config=hook_config)

mock_report_status = AsyncMock()
mock_shutdown = MagicMock()
mock_request_lease_release = AsyncMock()

await executor.run_before_lease_hook(
lease_scope,
mock_report_status,
mock_shutdown,
mock_request_lease_release,
)

assert lease_scope.skip_after_lease_hook is False
assert lease_scope.skip_after_lease_hook is True
mock_request_lease_release.assert_called_once()
mock_shutdown.assert_not_called()

async def test_before_lease_hook_endlease_handles_release_error(self, lease_scope) -> None:
"""Test that beforeLease hook with on_failure=endLease handles release errors gracefully."""
hook_config = HookConfigV1Alpha1(
before_lease=HookInstanceConfigV1Alpha1(script="exit 1", timeout=10, on_failure="endLease"),
)
executor = HookExecutor(config=hook_config)

mock_report_status = AsyncMock()
mock_shutdown = MagicMock()
mock_request_lease_release = AsyncMock(side_effect=RuntimeError("controller unavailable"))

# Should not raise even when request_lease_release fails
await executor.run_before_lease_hook(
lease_scope,
mock_report_status,
mock_shutdown,
mock_request_lease_release,
)

assert lease_scope.skip_after_lease_hook is True
mock_request_lease_release.assert_called_once()

async def test_pty_output_drained_after_stop_flag_set(self) -> None:
"""Test that PTY drain captures data remaining after the stop flag is set.
Expand Down
Loading