From 4ca6edd682f3bc4f6d793714d4472be8988bb55c Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sun, 17 May 2026 01:48:11 -0700 Subject: [PATCH 1/2] fix(envd): bound MMDS lookup in /init to prevent initLock starvation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PostInit holds initLock across SetData, which calls validateInitAccessToken → checkMMDSHash → mmdsClient.GetAccessTokenHash. That call used the request context directly. The orchestrator's per-request ctx has a 50 ms timeout but the handler keeps running after the client cancels (Go's net/http doesn't abort handlers on client disconnect), so an MMDS thread stalled by FC's single VMM thread would hold initLock indefinitely while every retry queued behind it. Wrap the MMDS hash lookup in a 1 s context so a stuck call can't lock the handler. MMDS responses are otherwise sub-millisecond on healthy hosts so this is well above the steady-state cost. --- packages/envd/internal/api/init.go | 14 +++++++++++++- packages/envd/pkg/version.go | 2 +- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/packages/envd/internal/api/init.go b/packages/envd/internal/api/init.go index 4d0081b0a2..9672449a74 100644 --- a/packages/envd/internal/api/init.go +++ b/packages/envd/internal/api/init.go @@ -30,6 +30,13 @@ var ( const ( maxTimeInPast = 50 * time.Millisecond maxTimeInFuture = 5 * time.Second + + // mmdsLookupTimeout caps the MMDS hash lookup inside the /init handler. + // The handler holds initLock; if MMDS is starved (e.g. host-side MMDS + // thread blocked by FC's single VMM thread) a stuck call would queue + // every retry behind it. A short cap is safe because MMDS lookups are + // otherwise sub-millisecond on healthy hosts. + mmdsLookupTimeout = 1 * time.Second ) // validateInitAccessToken validates the access token for /init requests. @@ -70,7 +77,12 @@ func (a *API) checkMMDSHash(ctx context.Context, requestToken *SecureToken) (boo return false, false } - mmdsHash, err := a.mmdsClient.GetAccessTokenHash(ctx) + // Bound the MMDS lookup so a stuck MMDS thread cannot hold initLock + // indefinitely and queue every retry behind it. + lookupCtx, cancel := context.WithTimeout(ctx, mmdsLookupTimeout) + defer cancel() + + mmdsHash, err := a.mmdsClient.GetAccessTokenHash(lookupCtx) if err != nil { return false, false } diff --git a/packages/envd/pkg/version.go b/packages/envd/pkg/version.go index 88bc32c769..4cfbb08b29 100644 --- a/packages/envd/pkg/version.go +++ b/packages/envd/pkg/version.go @@ -1,3 +1,3 @@ package pkg -const Version = "0.5.23" +const Version = "0.5.24" From b685c36a926830c1105277a5613595c37dc28684 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sun, 17 May 2026 02:37:26 -0700 Subject: [PATCH 2/2] chore: trim verbose comments --- packages/envd/internal/api/init.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/packages/envd/internal/api/init.go b/packages/envd/internal/api/init.go index 9672449a74..e6799ec374 100644 --- a/packages/envd/internal/api/init.go +++ b/packages/envd/internal/api/init.go @@ -31,11 +31,7 @@ const ( maxTimeInPast = 50 * time.Millisecond maxTimeInFuture = 5 * time.Second - // mmdsLookupTimeout caps the MMDS hash lookup inside the /init handler. - // The handler holds initLock; if MMDS is starved (e.g. host-side MMDS - // thread blocked by FC's single VMM thread) a stuck call would queue - // every retry behind it. A short cap is safe because MMDS lookups are - // otherwise sub-millisecond on healthy hosts. + // mmdsLookupTimeout caps the MMDS hash lookup so a stuck call can't hold initLock. mmdsLookupTimeout = 1 * time.Second ) @@ -77,8 +73,6 @@ func (a *API) checkMMDSHash(ctx context.Context, requestToken *SecureToken) (boo return false, false } - // Bound the MMDS lookup so a stuck MMDS thread cannot hold initLock - // indefinitely and queue every retry behind it. lookupCtx, cancel := context.WithTimeout(ctx, mmdsLookupTimeout) defer cancel()