Skip to content

Commit 2d6ad2d

Browse files
Merge branch 'master' into fix/buildlogcache-size-cap-2026-06-04
2 parents 5942962 + 3c53dc4 commit 2d6ad2d

2 files changed

Lines changed: 209 additions & 1 deletion

File tree

internal/middleware/idempotency.go

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,14 @@ const (
142142
idempotencyFingerprintTTL = 120 * time.Second
143143
// idempotencyKeyMaxLen caps the client-supplied key. Stripe uses 255.
144144
idempotencyKeyMaxLen = 255
145+
// idempotencyInflightTTL bounds an in-flight reservation marker (BugBash
146+
// 2026-06-02 #21). It must exceed the slowest synchronous handler — a
147+
// real provision (provisionDB/Cache/NoSQL runs in-request) can take
148+
// ~10-20s — so a concurrent retry is correctly told "in progress" rather
149+
// than racing past a too-early-expired marker. It is also the worst-case
150+
// self-heal window if the process dies mid-handler before the marker is
151+
// overwritten or deleted.
152+
idempotencyInflightTTL = 60 * time.Second
145153

146154
// X-Idempotency-Source values.
147155
idempotencySourceExplicit = "explicit"
@@ -157,6 +165,16 @@ type idemEntry struct {
157165
Body []byte `json:"b"`
158166
ContentType string `json:"c"`
159167
BodyHash string `json:"h"` // sha256 hex of the original request body
168+
// InFlight marks a reservation placeholder written (via SETNX) the
169+
// moment a cache-miss request begins running the handler, BEFORE the
170+
// real response exists. A concurrent same-key request that reads this
171+
// marker must NOT re-run the handler — it returns 409
172+
// idempotency_key_in_progress instead. The marker is overwritten by the
173+
// real entry when the handler completes (or deleted if the response is
174+
// non-cacheable), and self-expires after idempotencyInflightTTL if the
175+
// process dies mid-handler. Closes the check-then-act double-create race
176+
// (BugBash 2026-06-02 #21).
177+
InFlight bool `json:"f,omitempty"`
160178
}
161179

162180
// mutableErrorCodes lists machine-readable `error` strings whose 4xx
@@ -324,6 +342,39 @@ func Idempotency(rdb *redis.Client, endpoint string) fiber.Handler {
324342
}
325343
}
326344

345+
// respondIdempotencyInProgress returns the 409 a request gets when it finds an
346+
// in-flight reservation marker for its key (BugBash 2026-06-02 #21). The
347+
// envelope mirrors the idempotency_key_conflict 409 (ok/error/message/
348+
// request_id/retry_after_seconds/agent_action) but is RETRYABLE: unlike a
349+
// body-mismatch conflict, the right move is to wait and retry the SAME key —
350+
// the original request's response will be replayed once it completes.
351+
func respondIdempotencyInProgress(c *fiber.Ctx) error {
352+
return c.Status(fiber.StatusConflict).JSON(fiber.Map{
353+
"ok": false,
354+
"error": "idempotency_key_in_progress",
355+
"message": "A request with this Idempotency-Key is still being processed.",
356+
"request_id": GetRequestID(c),
357+
"retry_after_seconds": 2,
358+
"agent_action": "Another request with the same Idempotency-Key is still in flight. Do NOT mint a new key — wait ~2s and retry the SAME key; the original response will be replayed once it completes. See https://instanode.dev/docs/idempotency.",
359+
})
360+
}
361+
362+
// reserveInflight best-effort places an in-flight marker at cacheKey via SETNX,
363+
// so a concurrent same-key request that arrives after this point reads the
364+
// marker and returns 409 in_progress instead of double-running the handler
365+
// (BugBash 2026-06-02 #21). Fire-and-forget: a SETNX error or a lost sub-
366+
// millisecond race just means the marker isn't placed, and the request falls
367+
// back to the documented best-effort dedup posture (run the handler; the GET
368+
// hit-block still catches the common case where the other request reserved
369+
// first). The marker is overwritten by the real entry on completion, or
370+
// cleared on a non-cacheable response.
371+
func reserveInflight(ctx context.Context, rdb *redis.Client, cacheKey, reqBodyHash string) {
372+
// Marshal of this fixed-shape struct cannot fail; the marker is best-effort
373+
// regardless, so we ignore the error and let SETNX no-op on a nil payload.
374+
marker, _ := json.Marshal(idemEntry{InFlight: true, BodyHash: reqBodyHash})
375+
_ = rdb.SetNX(ctx, cacheKey, marker, idempotencyInflightTTL).Err()
376+
}
377+
327378
// idempotencyExplicit handles the Stripe-shape Idempotency-Key path.
328379
// Extracted from the main Idempotency wrapper so the fingerprint-fallback
329380
// branch can live alongside it without nesting another layer of if-blocks.
@@ -371,6 +422,13 @@ func idempotencyExplicit(c *fiber.Ctx, rdb *redis.Client, endpoint, scope, rawKe
371422
slog.Warn("idempotency.cache_unmarshal_failed",
372423
"error", jerr, "endpoint", endpoint)
373424
} else {
425+
if entry.InFlight {
426+
// A concurrent request with this key is still running (it
427+
// placed this reservation marker). Don't run the handler a
428+
// second time (BugBash #21) — tell the caller to retry the
429+
// same key shortly. No rate-limit refund: nothing was served.
430+
return respondIdempotencyInProgress(c)
431+
}
374432
if entry.BodyHash != reqBodyHash {
375433
// 409 is a genuine error response, not a replay — DO NOT
376434
// refund the rate-limit counter here. The agent did the
@@ -407,13 +465,24 @@ func idempotencyExplicit(c *fiber.Ctx, rdb *redis.Client, endpoint, scope, rawKe
407465
}
408466
}
409467

468+
// MISS — reserve the key (SETNX in-flight marker) before running the
469+
// handler so a concurrent same-key request reads the marker and 409s
470+
// instead of double-creating (BugBash #21). The real entry overwrites
471+
// the marker below; non-cacheable paths delete it.
472+
reserveInflight(ctx, rdb, cacheKey, reqBodyHash)
473+
410474
nextErr := c.Next()
411475
if nextErr != nil && !IsResponseWrittenErr(nextErr) {
476+
// Handler failed before writing a response — clear the reservation so
477+
// an immediate retry isn't told "in progress" for the marker's TTL.
478+
rdb.Del(context.Background(), cacheKey)
412479
return nextErr
413480
}
414481

415482
status := c.Response().StatusCode()
416483
if status >= 500 {
484+
// 5xx is retryable — never leave the marker stranding the retry.
485+
rdb.Del(context.Background(), cacheKey)
417486
return nextErr
418487
}
419488

@@ -426,6 +495,9 @@ func idempotencyExplicit(c *fiber.Ctx, rdb *redis.Client, endpoint, scope, rawKe
426495
// the agent's 24h Idempotency-Key. Success + stable failures still
427496
// cache as before.
428497
if !shouldCacheResponse(status, body, ct) {
498+
// Clear the reservation: a mutable 4xx is meant to be retried once
499+
// the user resolves it, so the marker must not block that retry.
500+
rdb.Del(context.Background(), cacheKey)
429501
slog.Info("idempotency.skip_cache_mutable_error",
430502
"endpoint", endpoint,
431503
"status", status,
@@ -516,6 +588,12 @@ func idempotencyFingerprint(c *fiber.Ctx, rdb *redis.Client, endpoint, scope str
516588
slog.Warn("idempotency.fingerprint_cache_unmarshal_failed",
517589
"error", jerr, "endpoint", endpoint)
518590
// Corrupt — fall through to handler and overwrite below.
591+
} else if entry.InFlight {
592+
// A concurrent same-fingerprint request is still running (BugBash
593+
// #21). Return 409 in_progress rather than double-running the
594+
// handler. The source header still reports the fingerprint path.
595+
c.Set(idempotencySourceHeader, idempotencySourceFingerprint)
596+
return respondIdempotencyInProgress(c)
519597
} else {
520598
// Cache HIT on the body-fingerprint fallback path. Same
521599
// refund semantics as the explicit-key branch: the
@@ -532,15 +610,19 @@ func idempotencyFingerprint(c *fiber.Ctx, rdb *redis.Client, endpoint, scope str
532610
}
533611
}
534612

535-
// Miss — run the handler, then cache the response (non-5xx only).
613+
// Miss — reserve the key (BugBash #21), run the handler, then cache the
614+
// response (non-5xx only).
536615
c.Set(idempotencySourceHeader, idempotencySourceMiss)
616+
reserveInflight(ctx, rdb, cacheKey, sha256Hex(canonBody))
537617
nextErr := c.Next()
538618
if nextErr != nil && !IsResponseWrittenErr(nextErr) {
619+
rdb.Del(context.Background(), cacheKey)
539620
return nextErr
540621
}
541622

542623
status := c.Response().StatusCode()
543624
if status >= 500 {
625+
rdb.Del(context.Background(), cacheKey)
544626
return nextErr
545627
}
546628

@@ -553,6 +635,7 @@ func idempotencyFingerprint(c *fiber.Ctx, rdb *redis.Client, endpoint, scope str
553635
// path is the no-header default, so the silent strand is even more
554636
// likely here.
555637
if !shouldCacheResponse(status, body, ct) {
638+
rdb.Del(context.Background(), cacheKey)
556639
slog.Info("idempotency.fingerprint_skip_cache_mutable_error",
557640
"endpoint", endpoint,
558641
"status", status,
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
package middleware_test
2+
3+
// idempotency_inflight_test.go — BugBash 2026-06-02 #21 regression.
4+
//
5+
// The middleware used to do GET(miss) → run handler → SET, with no atomic
6+
// reservation between the GET and the SET. Two requests carrying the same
7+
// Idempotency-Key (or the same body-fingerprint) that raced in that window
8+
// both saw redis.Nil and both ran the handler — double-creating real
9+
// backend resources for the authenticated provision paths that have no other
10+
// per-burst dedup gate.
11+
//
12+
// The fix writes an in-flight reservation marker (SETNX) the instant a miss
13+
// begins running the handler. A concurrent same-key request reads the marker
14+
// and returns 409 idempotency_key_in_progress instead of re-running the
15+
// handler. These tests hold request A inside the handler (blocked on a
16+
// channel) so request B is guaranteed to observe A's live reservation.
17+
18+
import (
19+
"net/http"
20+
"net/http/httptest"
21+
"strings"
22+
"sync/atomic"
23+
"testing"
24+
25+
"github.com/gofiber/fiber/v2"
26+
"github.com/stretchr/testify/assert"
27+
28+
"instant.dev/internal/middleware"
29+
"instant.dev/internal/testhelpers"
30+
)
31+
32+
// newBlockingInflightApp builds a Fiber app whose POST /test handler signals
33+
// `entered` once it begins and then blocks on `release`, so a test can hold
34+
// one request in flight. ran counts handler invocations.
35+
func newBlockingInflightApp(t *testing.T, ran *int32, entered, release chan struct{}) (*fiber.App, func()) {
36+
t.Helper()
37+
rdb, cleanup := testhelpers.SetupTestRedis(t)
38+
app := fiber.New()
39+
app.Use(middleware.Fingerprint())
40+
app.Post("/test", middleware.Idempotency(rdb, "inflight.endpoint"), func(c *fiber.Ctx) error {
41+
atomic.AddInt32(ran, 1)
42+
entered <- struct{}{}
43+
<-release
44+
return c.Status(fiber.StatusCreated).JSON(fiber.Map{"ok": true})
45+
})
46+
return app, cleanup
47+
}
48+
49+
// fireBlocking sends a request that will block in the handler, returning its
50+
// response on the done channel once released.
51+
func fireBlocking(app *fiber.App, ip, key, body string, done chan *http.Response) {
52+
req := httptest.NewRequest(http.MethodPost, "/test", strings.NewReader(body))
53+
req.Header.Set("Content-Type", "application/json")
54+
req.Header.Set("X-Forwarded-For", ip)
55+
if key != "" {
56+
req.Header.Set("Idempotency-Key", key)
57+
}
58+
resp, _ := app.Test(req, 10000)
59+
done <- resp
60+
}
61+
62+
// TestIdempotency_ExplicitKey_ConcurrentInFlight_Returns409 — a second request
63+
// carrying the same Idempotency-Key while the first is still running the
64+
// handler gets 409 idempotency_key_in_progress and does NOT re-run the handler.
65+
func TestIdempotency_ExplicitKey_ConcurrentInFlight_Returns409(t *testing.T) {
66+
var ran int32
67+
entered := make(chan struct{})
68+
release := make(chan struct{})
69+
app, clean := newBlockingInflightApp(t, &ran, entered, release)
70+
defer clean()
71+
72+
ip := uniqueTestIP("inflight-explicit")
73+
key := "inflight-key-" + ip
74+
body := `{"x":1}`
75+
76+
aDone := make(chan *http.Response, 1)
77+
go fireBlocking(app, ip, key, body, aDone)
78+
<-entered // A is in the handler; its reservation marker is live in Redis.
79+
80+
// B: same key, arrives mid-flight → 409 in_progress, handler not re-run.
81+
respB := postWithIdem(t, app, "/test", ip, key, body)
82+
assert.Equal(t, http.StatusConflict, respB.StatusCode)
83+
bodyB := readBody(t, respB)
84+
assert.Contains(t, bodyB, "idempotency_key_in_progress")
85+
assert.Contains(t, bodyB, "request_id")
86+
87+
close(release) // let A finish.
88+
respA := <-aDone
89+
assert.Equal(t, http.StatusCreated, respA.StatusCode)
90+
respA.Body.Close()
91+
assert.Equal(t, int32(1), atomic.LoadInt32(&ran),
92+
"handler must run exactly once despite the concurrent same-key duplicate")
93+
}
94+
95+
// TestIdempotency_Fingerprint_ConcurrentInFlight_Returns409 — the same race on
96+
// the no-header body-fingerprint path: a concurrent identical-body request
97+
// observes the in-flight marker and 409s instead of double-running.
98+
func TestIdempotency_Fingerprint_ConcurrentInFlight_Returns409(t *testing.T) {
99+
var ran int32
100+
entered := make(chan struct{})
101+
release := make(chan struct{})
102+
app, clean := newBlockingInflightApp(t, &ran, entered, release)
103+
defer clean()
104+
105+
ip := uniqueTestIP("inflight-fp")
106+
body := `{"x":1}`
107+
108+
aDone := make(chan *http.Response, 1)
109+
go fireBlocking(app, ip, "", body, aDone) // no key → fingerprint path
110+
<-entered
111+
112+
// B: no key, identical body + ip + route → same fingerprint → 409.
113+
respB := postWithIdem(t, app, "/test", ip, "", body)
114+
assert.Equal(t, http.StatusConflict, respB.StatusCode)
115+
bodyB := readBody(t, respB)
116+
assert.Contains(t, bodyB, "idempotency_key_in_progress")
117+
assert.Equal(t, "fingerprint", respB.Header.Get("X-Idempotency-Source"))
118+
119+
close(release)
120+
respA := <-aDone
121+
assert.Equal(t, http.StatusCreated, respA.StatusCode)
122+
respA.Body.Close()
123+
assert.Equal(t, int32(1), atomic.LoadInt32(&ran),
124+
"handler must run exactly once despite the concurrent same-fingerprint duplicate")
125+
}

0 commit comments

Comments
 (0)