Skip to content

Commit 492b22d

Browse files
committed
fix(admin): clamp seed-fallback targets to maxNodes
Codex P2 on 501b017: when every discovery RPC fails (or the caller context cancels, or refreshMembership returns an unexpected type), currentTargets / refreshMembership returned the raw f.seeds list, bypassing the maxNodes bound that membersFrom otherwise enforces. With an oversized --nodes list (or under outage conditions where the fallback path actually fires), /api/cluster/overview could spawn one RPC per seed entry — exactly when the system is already degraded. Add fanout.seedTargets() that returns a deduplicated copy of f.seeds clamped to f.maxNodes, and replace all three raw-seed-return sites with it. Regression test feeds 9 seeds (1 duplicate) with maxNodes=5 and asserts the result is 5 distinct entries.
1 parent 501b017 commit 492b22d

2 files changed

Lines changed: 61 additions & 4 deletions

File tree

cmd/elastickv-admin/main.go

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ type fanout struct {
313313
// "skip seed entries" check. Seeds are immutable after construction so
314314
// rebuilding the map on every cache-full eviction (under f.mu) is pure
315315
// waste — Gemini flagged the per-call allocation.
316-
seedSet map[string]struct{}
316+
seedSet map[string]struct{}
317317
// maxNodes bounds both the per-overview discovery list and the gRPC
318318
// client cache. Configurable via --maxDiscoveredNodes; values ≤0 fall
319319
// back to defaultMaxDiscoveredNodes so the bound is never disabled.
@@ -669,7 +669,7 @@ func (f *fanout) currentTargets(ctx context.Context) []string {
669669
return addrs
670670
}
671671
log.Printf("elastickv-admin: membership refresh returned unexpected type %T; falling back to seeds", r.Val)
672-
return append([]string(nil), f.seeds...)
672+
return f.seedTargets()
673673
case <-ctx.Done():
674674
// Caller bailed. Give them whatever targets we can assemble without
675675
// blocking: the last cached membership if we have one, else seeds.
@@ -680,10 +680,40 @@ func (f *fanout) currentTargets(ctx context.Context) []string {
680680
if f.members != nil {
681681
return append([]string(nil), f.members.addrs...)
682682
}
683-
return append([]string(nil), f.seeds...)
683+
return f.seedTargets()
684684
}
685685
}
686686

687+
// seedTargets returns a deduplicated copy of f.seeds clamped to f.maxNodes.
688+
// Callers use it on seed-fallback paths (discovery error, ctx cancel,
689+
// unexpected refresh result, no cached members yet) so a misconfigured huge
690+
// --nodes list never bypasses the fan-out bound that membersFrom otherwise
691+
// enforces. Codex P2 on 501b0173: previously these paths returned the raw
692+
// f.seeds, which under outages or oversized seed lists could spawn more
693+
// concurrent RPCs than configured.
694+
func (f *fanout) seedTargets() []string {
695+
cap := f.maxNodes
696+
if cap <= 0 {
697+
cap = defaultMaxDiscoveredNodes
698+
}
699+
if len(f.seeds) < cap {
700+
cap = len(f.seeds)
701+
}
702+
out := make([]string, 0, cap)
703+
seen := make(map[string]struct{}, cap)
704+
for _, s := range f.seeds {
705+
if _, dup := seen[s]; dup {
706+
continue
707+
}
708+
if len(out) >= f.maxNodes && f.maxNodes > 0 {
709+
break
710+
}
711+
seen[s] = struct{}{}
712+
out = append(out, s)
713+
}
714+
return out
715+
}
716+
687717
// refreshMembership performs the actual discovery RPC. It honours the caller's
688718
// context for overall cancellation but derives a short per-seed timeout from
689719
// discoveryRPCTimeout so a slow first seed does not stall the whole request.
@@ -713,7 +743,7 @@ func (f *fanout) refreshMembership(ctx context.Context) []string {
713743
}
714744

715745
log.Printf("elastickv-admin: all seeds unreachable for membership refresh; falling back to static seed list")
716-
return append([]string(nil), f.seeds...)
746+
return f.seedTargets()
717747
}
718748

719749
// membersFrom extracts a deduplicated address list from a cluster overview

cmd/elastickv-admin/main_test.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,33 @@ func TestMembersFromCapsAtMaxDiscoveredNodes(t *testing.T) {
276276
}
277277
}
278278

279+
// TestFanoutSeedTargetsClampsToMaxNodes asserts that the seed-fallback path
280+
// clamps to f.maxNodes and deduplicates so an oversized --nodes list cannot
281+
// bypass the per-overview fan-out bound enforced in membersFrom. Codex P2 on
282+
// 501b0173: previously these paths returned the raw seeds list, letting an
283+
// outage spawn more concurrent RPCs than configured.
284+
func TestFanoutSeedTargetsClampsToMaxNodes(t *testing.T) {
285+
t.Parallel()
286+
const cap_ = 5
287+
seeds := []string{
288+
"a:1", "b:1", "c:1", "d:1", "e:1", "f:1", "g:1", "h:1",
289+
"a:1", // duplicate — must be deduplicated, not counted twice
290+
}
291+
f := newFanout(seeds, "", time.Second, insecure.NewCredentials(), cap_)
292+
defer f.Close()
293+
294+
got := f.seedTargets()
295+
if len(got) != cap_ {
296+
t.Fatalf("len = %d, want %d (clamped to maxNodes)", len(got), cap_)
297+
}
298+
seen := map[string]struct{}{}
299+
for _, s := range got {
300+
if _, dup := seen[s]; dup {
301+
t.Fatalf("seedTargets returned duplicates: %v", got)
302+
}
303+
seen[s] = struct{}{}
304+
}
305+
}
279306

280307
// TestFanoutClientForDeduplicatesConcurrentDials asserts that N goroutines
281308
// asking for the same fresh address run only one grpc.NewClient call between

0 commit comments

Comments
 (0)