fix(sqs/throttle): address PR #679 review feedback (Codex P1/P2 + Gemini high/medium)

bootjp · bootjp · commit e3530e93bb8b · 2026-04-27T19:48:39.000+09:00
Six fixes from the first round of automated reviews on PR #679: Codex P1 -- bucket reconciliation on stale config (sqs_throttle.go): loadOrInit returned cached buckets without checking that capacity / refillRate still match the queue's current Throttle config. After a leadership change, a node retaining buckets from a prior leader term would keep enforcing the prior term's limits even after a new SetQueueAttributes had committed -- the invalidation only runs on the leader that processed the commit, so a different leader's stale buckets survive. Now compares cap/refill on every Load hit and rebuilds (Delete + LoadOrStore) on mismatch. Codex P1 -- invalidate only on actual throttle change (sqs_catalog.go, sqs_throttle.go): cache invalidation in setQueueAttributes ran unconditionally after every successful commit, including unrelated- field updates and no-op writes. Result: any caller could silently restore a noisy tenant's burst capacity by writing a no-op SetQueueAttributes. Now gated on throttleAttributesPresent(in.Attributes) which checks the request for any Throttle* key. Bucket reconciliation above acts as the safety net if a future code path bypasses the gate. Codex P2 -- attributesEqual covers Throttle (sqs_catalog.go): CreateQueue idempotency relied on attributesEqual which did not include Throttle*. A re-create with different limits was treated as idempotent and silently kept the old limits. Now compares the full Throttle struct via throttleConfigEqual; baseAttributesEqual extracted to keep cyclop under the ceiling. Gemini high -- thread throttle through existing meta load (sqs_messages.go, sqs_throttle.go): chargeQueue did one Pebble read per request even though the hot-path handlers (sendMessage, receiveMessage) load the meta moments later. Added chargeQueueWithThrottle that takes pre-loaded throttle config; both hot-path handlers now load meta once and pass throttle in. Throttle check now sits AFTER the QueueDoesNotExist branch so a missing queue no longer consumes a token. Batch + delete handlers keep chargeQueue (one extra meta read) -- low-QPS verbs where the simplification of not pulling meta out of the retry loop is worth the per-call cost. Gemini high -- move sweep off hot path (sqs.go, sqs_throttle.go): maybeSweep ran the O(N) sync.Map.Range on whichever request was unlucky enough to trigger the per-minute sweep, causing latency spikes on many-queue clusters. Replaced with runSweepLoop on a background ticker tied to s.reaperCtx (started in Run alongside the existing message reaper, cleaned up by the same reaperCancel in Stop). The hot-path charge() no longer calls into the sweep at all. Gemini medium -- cap retry-after duration (sqs_throttle.go): computeRetryAfter could compute a multi-day Retry-After (or worse, overflow time.Duration arithmetic) for pathologically small refillRate / large requested values. Capped at throttleRetryAfterCap (1h, matching the bucket idle-evict window). Cap is applied before the Duration multiplication so overflow is impossible. New tests: - TestBucketStore_ReconcilesBucketOnConfigChange pins the Codex P1 reconciliation contract. - TestComputeRetryAfter_CapsAtMaximum pins the Gemini medium cap. - TestThrottleAttributesPresent covers the request-gate helper used by the conditional invalidation. All tests pass under -race; golangci-lint clean.
diff --git a/adapter/sqs.go b/adapter/sqs.go
@@ -143,6 +143,10 @@ func NewSQSServer(listen net.Listener, st store.MVCCStore, coordinate kv.Coordin
 
 func (s *SQSServer) Run() error {
 	s.startReaper(s.reaperCtx)
+	// Throttle bucket idle-evict runs on a background ticker so the
+	// request hot path never pays the O(N) sweep cost. Cleaned up by
+	// the same reaperCtx cancellation that stops the message reaper.
+	go s.throttle.runSweepLoop(s.reaperCtx)
 	if err := s.httpServer.Serve(s.listen); err != nil && !errors.Is(err, http.ErrServerClosed) {
 		return errors.WithStack(err)
 	}
diff --git a/adapter/sqs_catalog.go b/adapter/sqs_catalog.go
@@ -640,6 +640,13 @@ func attributesEqual(a, b *sqsQueueMeta) bool {
 	if a == nil || b == nil {
 		return false
 	}
+	return baseAttributesEqual(a, b) && throttleConfigEqual(a.Throttle, b.Throttle)
+}
+
+// baseAttributesEqual compares the pre-Phase-3.C/3.D attribute set.
+// Split from attributesEqual so adding fields per phase does not
+// push the function over the cyclop ceiling.
+func baseAttributesEqual(a, b *sqsQueueMeta) bool {
 	return a.IsFIFO == b.IsFIFO &&
 		a.ContentBasedDedup == b.ContentBasedDedup &&
 		a.VisibilityTimeoutSeconds == b.VisibilityTimeoutSeconds &&
@@ -650,6 +657,27 @@ func attributesEqual(a, b *sqsQueueMeta) bool {
 		a.RedrivePolicy == b.RedrivePolicy
 }
 
+// throttleConfigEqual compares two Throttle configs for the
+// CreateQueue idempotency check. Without including the throttle
+// fields in attributesEqual, a re-create with different limits would
+// be treated as idempotent and silently keep the old limits.
+func throttleConfigEqual(a, b *sqsQueueThrottle) bool {
+	aEmpty := a.IsEmpty()
+	bEmpty := b.IsEmpty()
+	if aEmpty && bEmpty {
+		return true
+	}
+	if aEmpty != bEmpty {
+		return false
+	}
+	return a.SendCapacity == b.SendCapacity &&
+		a.SendRefillPerSecond == b.SendRefillPerSecond &&
+		a.RecvCapacity == b.RecvCapacity &&
+		a.RecvRefillPerSecond == b.RecvRefillPerSecond &&
+		a.DefaultCapacity == b.DefaultCapacity &&
+		a.DefaultRefillPerSecond == b.DefaultRefillPerSecond
+}
+
 // ------------------------ storage primitives ------------------------
 
 func (s *SQSServer) nextTxnReadTS(ctx context.Context) uint64 {
@@ -1205,13 +1233,18 @@ func (s *SQSServer) setQueueAttributes(w http.ResponseWriter, r *http.Request) {
 	}
 	// Drop the in-memory bucket entries belonging to this queue *after*
 	// the Raft commit so the next request rebuilds from the freshly
-	// committed throttle config. Without this step the old limits keep
-	// being enforced until the idle-evict sweep removes the stale
-	// entry — defeating the operator's intent to throttle a noisy
-	// tenant in real time. The LoadOrStore race a concurrent in-flight
-	// request might run with the stale bucket is benign: the rebuilt
-	// bucket starts at full capacity, same as failover semantics.
-	s.throttle.invalidateQueue(name)
+	// committed throttle config. Gated on whether the request actually
+	// touched a Throttle* attribute — an unconditional invalidate
+	// would reset the bucket on every unrelated SetQueueAttributes
+	// (e.g. VisibilityTimeout-only update), giving any caller a way to
+	// silently restore a noisy tenant's burst capacity by writing a
+	// no-op SetQueueAttributes (Codex P1 on PR #679). The bucket
+	// reconciliation in loadOrInit also catches a stale bucket if a
+	// throttle change slips past this gate (e.g. via a future admin
+	// path), so the gating here is purely a hot-path optimisation.
+	if throttleAttributesPresent(in.Attributes) {
+		s.throttle.invalidateQueue(name)
+	}
 	writeSQSJSON(w, map[string]any{})
 }
 
diff --git a/adapter/sqs_messages.go b/adapter/sqs_messages.go
@@ -294,10 +294,10 @@ type sqsChangeVisibilityInput struct {
 
 // ------------------------ handlers ------------------------
 
-// prepareSendMessage decodes the SendMessage payload, resolves the
-// queue name, and runs the throttle charge. Returning early on any
-// failure keeps sendMessage under the cyclop ceiling — without this
-// extraction the throttle branch pushes the function over the limit.
+// prepareSendMessage decodes the SendMessage payload and resolves
+// the queue name. Throttle charging happens after the meta load in
+// validateSend so we don't pay an extra meta read just to discover
+// throttling is off (Gemini high on PR #679).
 func (s *SQSServer) prepareSendMessage(w http.ResponseWriter, r *http.Request) (sqsSendMessageInput, string, bool) {
 	var in sqsSendMessageInput
 	if err := decodeSQSJSONInput(r, &in); err != nil {
@@ -309,21 +309,29 @@ func (s *SQSServer) prepareSendMessage(w http.ResponseWriter, r *http.Request) (
 		writeSQSErrorFromErr(w, err)
 		return in, "", false
 	}
-	if !s.chargeQueue(w, r, queueName, bucketActionSend, 1) {
-		return in, queueName, false
-	}
 	return in, queueName, true
 }
 
-// validateSend loads queue meta, validates message attributes / FIFO
-// params, and resolves the delay. Returns ok=false if any validation
-// step has already written the error response.
+// validateSend loads queue meta, runs the throttle charge against
+// the loaded throttle config (no extra meta read), then validates
+// message attributes / FIFO params and resolves the delay. Returns
+// ok=false if any step has already written the error response.
+//
+// Throttle check sits AFTER the meta load (so we have the throttle
+// config) and AFTER the QueueDoesNotExist branch (so a missing
+// queue is reported as 400 QueueDoesNotExist, not as a Throttling
+// 400 against a non-existent bucket). It still sits OUTSIDE the
+// OCC transaction (§4.2): a rejected request never reaches the
+// coordinator.
 func (s *SQSServer) validateSend(w http.ResponseWriter, r *http.Request, queueName string, in sqsSendMessageInput) (*sqsQueueMeta, uint64, int64, bool) {
 	meta, readTS, apiErr := s.loadQueueMetaForSend(r.Context(), queueName, []byte(in.MessageBody))
 	if apiErr != nil {
 		writeSQSErrorFromErr(w, apiErr)
 		return nil, 0, 0, false
 	}
+	if !s.chargeQueueWithThrottle(w, queueName, bucketActionSend, 1, meta.Throttle) {
+		return nil, 0, 0, false
+	}
 	if apiErr := validateMessageAttributes(in.MessageAttributes); apiErr != nil {
 		writeSQSErrorFromErr(w, apiErr)
 		return nil, 0, 0, false
@@ -535,9 +543,6 @@ func (s *SQSServer) receiveMessage(w http.ResponseWriter, r *http.Request) {
 		writeSQSErrorFromErr(w, err)
 		return
 	}
-	if !s.chargeQueue(w, r, queueName, bucketActionReceive, 1) {
-		return
-	}
 	ctx := r.Context()
 
 	// Use LeaseRead to fence this scan against a leader that silently lost
@@ -559,6 +564,13 @@ func (s *SQSServer) receiveMessage(w http.ResponseWriter, r *http.Request) {
 		writeSQSError(w, http.StatusBadRequest, sqsErrQueueDoesNotExist, "queue does not exist")
 		return
 	}
+	// Throttle check uses the loaded meta's throttle config so we
+	// don't pay an extra meta read just to discover throttling is off
+	// (Gemini high on PR #679). Sits AFTER the QueueDoesNotExist
+	// branch — a missing queue should not consume a Recv token.
+	if !s.chargeQueueWithThrottle(w, queueName, bucketActionReceive, 1, meta.Throttle) {
+		return
+	}
 	max, maxErr := resolveReceiveMaxMessages(in.MaxNumberOfMessages)
 	if maxErr != nil {
 		writeSQSErrorFromErr(w, maxErr)
diff --git a/adapter/sqs_throttle.go b/adapter/sqs_throttle.go
@@ -1,6 +1,7 @@
 package adapter
 
 import (
+	"context"
 	"math"
 	"net/http"
 	"sync"
@@ -30,6 +31,34 @@ const (
 // invalidate it.
 var throttleAllActions = []string{bucketActionSend, bucketActionReceive, bucketActionAny}
 
+// throttleAttributeNames is the wire-side set of Throttle*
+// attributes a SetQueueAttributes request can carry. Used by the
+// invalidation gate in setQueueAttributes so an unrelated update
+// (e.g. VisibilityTimeout only) does not pay the cache invalidation
+// cost or, worse, give the caller a way to silently reset bucket
+// state via a no-op SetQueueAttributes (Codex P1 on PR #679).
+var throttleAttributeNames = []string{
+	"ThrottleSendCapacity",
+	"ThrottleSendRefillPerSecond",
+	"ThrottleRecvCapacity",
+	"ThrottleRecvRefillPerSecond",
+	"ThrottleDefaultCapacity",
+	"ThrottleDefaultRefillPerSecond",
+}
+
+// throttleAttributesPresent reports whether attrs carries any
+// Throttle* key. Cheap O(6) check; the throttleAttributeNames slice
+// is the source of truth so a future Throttle* attribute name added
+// in one place automatically participates in the gate.
+func throttleAttributesPresent(attrs map[string]string) bool {
+	for _, k := range throttleAttributeNames {
+		if _, ok := attrs[k]; ok {
+			return true
+		}
+	}
+	return false
+}
+
 // throttleHardCeilingPerSecond bounds any user-supplied capacity or
 // refill rate. A typo like SendCapacity=1e9 silently meaning "no limit"
 // is more dangerous than an explicit InvalidAttributeValue (Codex P1 on
@@ -147,7 +176,6 @@ func (b *bucketStore) charge(cfg *sqsQueueThrottle, queue, action string, count
 	if count < 1 {
 		count = 1
 	}
-	b.maybeSweep()
 	// Bucket key uses the *resolved* action so Send-falls-through-to-
 	// Default and Recv-falls-through-to-Default share the same Default
 	// bucket. Without the resolution, an operator who configures only
@@ -191,12 +219,40 @@ func (b *bucketStore) charge(cfg *sqsQueueThrottle, queue, action string, count
 // safe because both racers compute identical (capacity, refillRate)
 // from the same meta snapshot — the bucket they would build is
 // behaviourally interchangeable.
+//
+// Reconciliation against stale config (Codex P1 on PR #679): if a
+// cached bucket's capacity/refillRate differ from the cfg's current
+// values, the bucket is replaced with a fresh one built from the
+// current config. Without this check, a node that lost leadership
+// during a SetQueueAttributes commit and then regained leadership
+// later would keep enforcing the prior leader-term's limits — the
+// SetQueueAttributes invalidation only runs on the leader that
+// processed the commit, so a different leader's stale buckets
+// survive. The reconciliation also covers the case where the
+// invalidation gate in setQueueAttributes is bypassed (e.g. by a
+// future admin path that mutates throttle config without touching
+// SetQueueAttributes).
 func (b *bucketStore) loadOrInit(queue, action string, capacity, refill float64) *tokenBucket {
 	key := bucketKey{queue: queue, action: action}
 	if v, ok := b.buckets.Load(key); ok {
 		// type assertion is sound: only tokenBucket pointers are stored.
 		bucket, _ := v.(*tokenBucket)
-		return bucket
+		// Cheap field comparison under the bucket's own lock — if the
+		// cached bucket matches the current config we return it
+		// directly. A mismatch means the on-disk meta moved while
+		// this node held a stale bucket; rebuild from the current
+		// config (full capacity, matching the failover semantics).
+		bucket.mu.Lock()
+		matches := bucket.capacity == capacity && bucket.refillRate == refill
+		bucket.mu.Unlock()
+		if matches {
+			return bucket
+		}
+		b.buckets.Delete(key)
+		// fall through to LoadOrStore — a concurrent racer might
+		// have already inserted a fresh bucket with the current
+		// config, in which case LoadOrStore picks it up and the new
+		// bucket below is discarded.
 	}
 	now := b.clock()
 	fresh := &tokenBucket{
@@ -225,20 +281,38 @@ func (b *bucketStore) invalidateQueue(queue string) {
 	}
 }
 
-// maybeSweep walks the bucket store dropping any bucket idle longer
-// than evictedAfter. Runs at most once per sweepEvery from the hot
-// path so a many-queue cluster does not pay the O(N) cost on every
-// request.
-func (b *bucketStore) maybeSweep() {
-	if b.evictedAfter <= 0 {
+// runSweepLoop runs the idle-evict sweep on a background ticker so
+// the request hot path never pays the O(N) sync.Map.Range cost
+// (Gemini high on PR #679: a many-queue cluster would see latency
+// spikes on whichever request was unlucky enough to trigger the
+// per-minute on-hot-path sweep). Returns when ctx is done — the
+// SQSServer wires this to s.reaperCtx so a Stop() call cleans the
+// goroutine up alongside the existing reaper.
+func (b *bucketStore) runSweepLoop(ctx context.Context) {
+	if b == nil || b.evictedAfter <= 0 || b.sweepEvery <= 0 {
 		return
 	}
-	b.sweepMu.Lock()
-	now := b.clock()
-	if now.Sub(b.lastSweep) < b.sweepEvery {
-		b.sweepMu.Unlock()
-		return
+	t := time.NewTicker(b.sweepEvery)
+	defer t.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-t.C:
+			b.sweep()
+		}
 	}
+}
+
+// sweep walks the bucket store dropping any bucket idle longer than
+// evictedAfter. Called from runSweepLoop on a background ticker.
+// Bucket lookups are still O(1) on the hot path; sweep iterates
+// every entry under the per-bucket lock (held briefly for the
+// timestamp read) so it never blocks a charge() that already has
+// the bucket.
+func (b *bucketStore) sweep() {
+	now := b.clock()
+	b.sweepMu.Lock()
 	b.lastSweep = now
 	b.sweepMu.Unlock()
 	cutoff := now.Add(-b.evictedAfter)
@@ -277,6 +351,17 @@ func resolveActionConfig(cfg *sqsQueueThrottle, action string) (string, float64,
 	return action, 0, 0
 }
 
+// throttleRetryAfterCap bounds the Retry-After value the client sees
+// (Gemini medium on PR #679). Without a cap, a tiny refillRate plus
+// a large requested count would compute a multi-day wait — and
+// time.Duration arithmetic can overflow at the upper end. One hour
+// matches the bucket store's idle-evict window: by the time the
+// suggested retry would otherwise expire, the bucket would have
+// been evicted and rebuilt at full capacity anyway, so a longer
+// suggestion is meaningless. Producers that hit the cap are also
+// strongly mis-configured; capping is a guard rail, not a feature.
+const throttleRetryAfterCap = time.Hour
+
 // computeRetryAfter implements the §3.4 formula:
 //
 //	needed              := requested - currentTokens
@@ -287,6 +372,9 @@ func resolveActionConfig(cfg *sqsQueueThrottle, action string) (string, float64,
 // verbs, len(Entries) for batch verbs). The min-1 floor matches the
 // HTTP/1.1 §10.2.3 integer-second granularity. The validator keeps
 // refillRate > 0 so no divide-by-zero guard is needed.
+//
+// Capped at throttleRetryAfterCap to bound time.Duration arithmetic
+// against pathologically small refillRate / large requested values.
 func computeRetryAfter(requested, current, refillRate float64) time.Duration {
 	needed := requested - current
 	if needed <= 0 {
@@ -299,6 +387,12 @@ func computeRetryAfter(requested, current, refillRate float64) time.Duration {
 	if secs < 1 {
 		secs = 1
 	}
+	// Cap before multiplying to avoid time.Duration overflow on
+	// pathological inputs (e.g. refillRate just above zero).
+	const capSecs = float64(throttleRetryAfterCap / time.Second)
+	if secs > capSecs {
+		secs = capSecs
+	}
 	return time.Duration(secs) * time.Second
 }
 
@@ -314,13 +408,15 @@ func throttleChargeCount(entries int) int {
 	return entries
 }
 
-// chargeQueue is the per-handler entry point. It loads the queue meta
-// at a fresh read timestamp (Pebble cache makes this cheap) and runs
-// the bucket store's charge against the queue's Throttle config. On
-// rejection it writes the Throttling envelope (400 + Retry-After +
-// AWS-shaped JSON body) and returns false; the caller short-circuits.
-// On allow it returns true and the caller continues with the existing
-// OCC dispatch.
+// chargeQueue is the per-handler entry point used by handlers that
+// do not already load the queue meta themselves (deleteMessage,
+// changeMessageVisibility, and their batch siblings). It loads the
+// meta at a fresh read timestamp (Pebble cache makes this cheap) and
+// runs the bucket store's charge against the queue's Throttle config.
+//
+// Handlers that DO load the meta themselves (sendMessage,
+// sendMessageBatch, receiveMessage) should use chargeQueueWithThrottle
+// to avoid the redundant load (Gemini high on PR #679).
 //
 // chargeQueue intentionally swallows missing-queue errors: the caller
 // is going to discover that the queue does not exist a few lines
@@ -336,6 +432,18 @@ func (s *SQSServer) chargeQueue(w http.ResponseWriter, r *http.Request, queueNam
 		return true
 	}
 	throttle := s.queueThrottleConfig(r, queueName)
+	return s.chargeQueueWithThrottle(w, queueName, action, count, throttle)
+}
+
+// chargeQueueWithThrottle is the variant for handlers that already
+// have the throttle config in hand from their own meta load. Drops
+// the per-request meta load chargeQueue does, addressing the Gemini
+// high finding on PR #679 about redundant storage reads on the hot
+// path.
+func (s *SQSServer) chargeQueueWithThrottle(w http.ResponseWriter, queueName, action string, count int, throttle *sqsQueueThrottle) bool {
+	if s.throttle == nil {
+		return true
+	}
 	outcome := s.throttle.charge(throttle, queueName, action, count)
 	if outcome.allowed {
 		return true
diff --git a/adapter/sqs_throttle_test.go b/adapter/sqs_throttle_test.go