Skip to content

Commit e7b4bd4

Browse files
committed
test(dist): replace Set with DebugInject in rebalance test helpers
Switch rebalance integration tests from using node.Set (quorum write) to node.DebugInject (test bypass) when seeding keys before triggering topology changes. This eliminates a ~1-in-50 flake under -shuffle in CI caused by fan-out transport calls missing deadlines on brand-new two-node clusters. Add populateKeysOnAll helper to inject keys across multiple nodes simultaneously, and migrate leave, replica-diff, and throttle tests to use the new helper instead of inline key-seeding loops.
1 parent 1afbd98 commit e7b4bd4

4 files changed

Lines changed: 64 additions & 48 deletions

File tree

tests/integration/dist_rebalance_leave_test.go

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import (
66
"time"
77

88
"github.com/hyp3rd/hypercache/pkg/backend"
9-
cache "github.com/hyp3rd/hypercache/pkg/cache/v2"
109
)
1110

1211
// TestDistRebalanceLeave verifies keys are redistributed after a node leaves.
@@ -32,18 +31,15 @@ func TestDistRebalanceLeave(t *testing.T) {
3231
nodeC := mustDistNode(ctx, t, "C", addrC, []string{addrA, addrB}, opts...)
3332
defer func() { _ = nodeA.Stop(ctx); _ = nodeB.Stop(ctx); _ = nodeC.Stop(ctx) }()
3433

35-
// Insert keys through A.
36-
totalKeys := 300
37-
for i := range totalKeys {
38-
k := cacheKey(i)
34+
// Inject keys across all three nodes — when C leaves, keys
35+
// that lived on C need to migrate to surviving owners, so
36+
// the pre-leave state must have keys on every node for the
37+
// post-leave migration assertion to be meaningful. See
38+
// populateKeys' doc in dist_rebalance_test.go for why we use
39+
// DebugInject instead of Set.
40+
const totalKeys = 300
3941

40-
it := &cache.Item{Key: k, Value: []byte("v"), Version: 1, Origin: "A", LastUpdated: time.Now()}
41-
42-
err := nodeA.Set(ctx, it)
43-
if err != nil {
44-
t.Fatalf("set %s: %v", k, err)
45-
}
46-
}
42+
populateKeysOnAll(ctx, t, totalKeys, nodeA, nodeB, nodeC)
4743

4844
time.Sleep(250 * time.Millisecond) // allow replication
4945

tests/integration/dist_rebalance_replica_diff_test.go

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import (
66
"time"
77

88
"github.com/hyp3rd/hypercache/pkg/backend"
9-
cache "github.com/hyp3rd/hypercache/pkg/cache/v2"
109
)
1110

1211
// TestDistRebalanceReplicaDiff ensures that when a new replica is added (primary unchanged)
@@ -31,18 +30,14 @@ func TestDistRebalanceReplicaDiff(t *testing.T) {
3130
nodeB := mustDistNode(ctx, t, "B", addrB, []string{addrA}, baseOpts...)
3231
defer func() { _ = nodeA.Stop(ctx); _ = nodeB.Stop(ctx) }()
3332

34-
// Insert a set of keys through primary (either node). We'll use A.
35-
totalKeys := 200
36-
for i := range totalKeys {
37-
k := cacheKey(i)
38-
39-
it := &cache.Item{Key: k, Value: []byte("v"), Version: 1, Origin: "A", LastUpdated: time.Now()}
33+
// Inject keys on BOTH A and B (the replica-diff path assumes
34+
// replication has already drained across the pre-join cluster
35+
// — without that, the post-join replica fan-out has nothing to
36+
// diff). See populateKeys' doc in dist_rebalance_test.go for
37+
// why we use DebugInject instead of Set.
38+
const totalKeys = 200
4039

41-
err := nodeA.Set(ctx, it)
42-
if err != nil {
43-
t.Fatalf("set %s: %v", k, err)
44-
}
45-
}
40+
populateKeysOnAll(ctx, t, totalKeys, nodeA, nodeB)
4641

4742
time.Sleep(300 * time.Millisecond) // allow initial replication
4843

tests/integration/dist_rebalance_replica_diff_throttle_test.go

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import (
66
"time"
77

88
"github.com/hyp3rd/hypercache/pkg/backend"
9-
cache "github.com/hyp3rd/hypercache/pkg/cache/v2"
109
)
1110

1211
// TestDistRebalanceReplicaDiffThrottle ensures the per-tick limit increments throttle metric.
@@ -31,12 +30,12 @@ func TestDistRebalanceReplicaDiffThrottle(t *testing.T) {
3130
nodeB := mustDistNode(ctx, t, "B", addrB, []string{addrA}, base...)
3231
defer func() { _ = nodeA.Stop(ctx); _ = nodeB.Stop(ctx) }()
3332

34-
// Seed multiple keys.
35-
for i := range 25 {
36-
k := cacheKey(i)
37-
38-
_ = nodeA.Set(ctx, &cache.Item{Key: k, Value: []byte("x"), Version: 1, Origin: "A", LastUpdated: time.Now()})
39-
}
33+
// Seed keys on BOTH A and B so the post-join replica-diff has
34+
// candidates to push to C (without keys on B, the replica-diff
35+
// throttle would never fire because only A would have work).
36+
// DebugInject also fixes the prior shape's silently-swallowed
37+
// Set error.
38+
populateKeysOnAll(ctx, t, 25, nodeA, nodeB)
4039

4140
time.Sleep(250 * time.Millisecond)
4241

tests/integration/dist_rebalance_test.go

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,51 @@ func rebalanceTestOpts() []backend.DistMemoryOption {
3030
}
3131
}
3232

33-
// populateKeys writes n test keys to node — used to seed the cluster before
34-
// triggering a rebalance.
35-
func populateKeys(ctx context.Context, t *testing.T, node *backend.DistMemory, n int) {
33+
// populateKeys seeds n test keys onto node's local shard via the
34+
// DebugInject test bypass — no replication, no quorum check. We use
35+
// the bypass deliberately: every rebalance test asserts post-state
36+
// of the ring + migration metrics, not the pre-rebalance
37+
// replication path. Going through Set would force a quorum write
38+
// for every key against the brand-new two-node cluster, which is
39+
// flake-prone (~1 in 50 runs under -shuffle in CI) when a single
40+
// fan-out transport call misses its deadline.
41+
//
42+
// Tests that specifically want to exercise the replication path
43+
// during populate should call Set directly with appropriate
44+
// assertions; this helper is for "shape the ring, then test what
45+
// happens next." Use populateKeysOnAll when the test needs the
46+
// keys present on multiple nodes (e.g. replica-diff scenarios that
47+
// assume replication has already drained).
48+
func populateKeys(_ context.Context, t *testing.T, node *backend.DistMemory, n int) {
3649
t.Helper()
3750

3851
for i := range n {
3952
k := cacheKey(i)
53+
it := &cache.Item{Key: k, Value: []byte("v"), Version: 1, Origin: "A", LastUpdated: time.Now()}
54+
55+
node.DebugInject(it)
56+
}
57+
}
4058

59+
// populateKeysOnAll injects n keys on every supplied node via the
60+
// DebugInject bypass — simulates "replication has already drained
61+
// across these nodes" without going through the quorum-write path
62+
// that flakes under -shuffle. Use this for replica-diff and
63+
// leave-migration tests where the post-topology assertion requires
64+
// keys to be present on multiple nodes pre-change.
65+
//
66+
// Over-replication (a key landing on a node that isn't its actual
67+
// owner per the ring) is harmless: the rebalance loop sheds keys
68+
// from nodes that aren't owners after each tick.
69+
func populateKeysOnAll(_ context.Context, t *testing.T, n int, nodes ...*backend.DistMemory) {
70+
t.Helper()
71+
72+
for i := range n {
73+
k := cacheKey(i)
4174
it := &cache.Item{Key: k, Value: []byte("v"), Version: 1, Origin: "A", LastUpdated: time.Now()}
4275

43-
err := node.Set(ctx, it)
44-
if err != nil {
45-
t.Fatalf("set %s: %v", k, err)
76+
for _, node := range nodes {
77+
node.DebugInject(it)
4678
}
4779
}
4880
}
@@ -124,17 +156,11 @@ func TestDistRebalanceThrottle(t *testing.T) {
124156

125157
defer func() { _ = nodeA.Stop(ctx); _ = nodeB.Stop(ctx) }()
126158

127-
// Populate many keys on A.
128-
for i := range 400 {
129-
k := cacheKey(i)
130-
131-
it := &cache.Item{Key: k, Value: []byte("v"), Version: 1, Origin: "A", LastUpdated: time.Now()}
132-
133-
err := nodeA.Set(ctx, it)
134-
if err != nil {
135-
t.Fatalf("set %s: %v", k, err)
136-
}
137-
}
159+
// Populate many keys on A via DebugInject (test bypass — see
160+
// populateKeys' doc for why). We need keys on A's primary
161+
// shards so the post-join rebalance has work to do; the
162+
// pre-join replication path is not under test here.
163+
populateKeys(ctx, t, nodeA, 400)
138164

139165
// Add third node to force migrations while concurrency=1, which should queue batches.
140166
addrC := allocatePort(t)

0 commit comments

Comments
 (0)