@@ -7,7 +7,9 @@ package kvstorage
77
88import (
99 "context"
10+ "fmt"
1011 "math"
12+ "slices"
1113 "testing"
1214
1315 "github.com/cockroachdb/cockroach/pkg/keys"
@@ -20,8 +22,10 @@ import (
2022 "github.com/cockroachdb/cockroach/pkg/settings/cluster"
2123 "github.com/cockroachdb/cockroach/pkg/storage"
2224 "github.com/cockroachdb/cockroach/pkg/storage/fs"
25+ "github.com/cockroachdb/cockroach/pkg/testutils"
2326 "github.com/cockroachdb/cockroach/pkg/util/leaktest"
2427 "github.com/cockroachdb/cockroach/pkg/util/log"
28+ "github.com/cockroachdb/cockroach/pkg/util/stop"
2529 "github.com/stretchr/testify/require"
2630 "golang.org/x/time/rate"
2731)
@@ -103,10 +107,23 @@ func (e *testEngines) listWAGNodes(t *testing.T) []uint64 {
103107 return indices
104108}
105109
110+ func eventuallyExpectWAGNodesIndices (t * testing.T , e * testEngines , expected []uint64 ) {
111+ t .Helper ()
112+ testutils .SucceedsSoon (t , func () error {
113+ actual := e .listWAGNodes (t )
114+ if slices .Equal (expected , actual ) {
115+ return nil
116+ }
117+ return fmt .Errorf ("expected WAG nodes %v, got %v" , expected , actual )
118+ })
119+ }
120+
106121func TestTruncateApplied (t * testing.T ) {
107122 defer leaktest .AfterTest (t )()
108123 defer log .Scope (t ).Close (t )
109124 ctx := context .Background ()
125+ stopper := stop .NewStopper ()
126+ defer stopper .Stop (ctx )
110127 st := cluster .MakeTestingClusterSettings ()
111128
112129 r1 := roachpb.FullReplicaID {RangeID : 1 , ReplicaID : 1 }
@@ -240,7 +257,7 @@ func TestTruncateApplied(t *testing.T) {
240257 t .Run ("" , func (t * testing.T ) {
241258 e := makeTestEngines ()
242259 defer e .Close ()
243- truncator := NewWAGTruncator (st , e .Engines )
260+ truncator := NewWAGTruncator (stopper , st , e .Engines , & e . seq )
244261 tc .setup (t , & e )
245262 require .NoError (t , e .stateEngine .Flush ())
246263 require .NoError (t , truncator .TruncateAll (ctx ))
@@ -258,6 +275,8 @@ func TestTruncateAndClearRaftState(t *testing.T) {
258275 defer leaktest .AfterTest (t )()
259276 defer log .Scope (t ).Close (t )
260277 ctx := context .Background ()
278+ stopper := stop .NewStopper ()
279+ defer stopper .Stop (ctx )
261280 st := cluster .MakeTestingClusterSettings ()
262281
263282 r1 := roachpb.FullReplicaID {RangeID : 1 , ReplicaID : 1 }
@@ -268,7 +287,7 @@ func TestTruncateAndClearRaftState(t *testing.T) {
268287 t .Run (eventType .String (), func (t * testing.T ) {
269288 e := makeTestEngines ()
270289 defer e .Close ()
271- truncator := NewWAGTruncator (st , e .Engines )
290+ truncator := NewWAGTruncator (stopper , st , e .Engines , & e . seq )
272291
273292 // Write WAG nodes: init then destroy/subsume at index 20.
274293 e .writeWAGNode (t , wagpb.Event {
@@ -339,6 +358,8 @@ func TestTruncateGapHandling(t *testing.T) {
339358 defer leaktest .AfterTest (t )()
340359 defer log .Scope (t ).Close (t )
341360 ctx := context .Background ()
361+ stopper := stop .NewStopper ()
362+ defer stopper .Stop (ctx )
342363 st := cluster .MakeTestingClusterSettings ()
343364
344365 r1 := roachpb.FullReplicaID {RangeID : 1 , ReplicaID : 1 }
@@ -372,7 +393,7 @@ func TestTruncateGapHandling(t *testing.T) {
372393 t .Run ("" , func (t * testing.T ) {
373394 e := makeTestEngines ()
374395 defer e .Close ()
375- truncator := NewWAGTruncator (st , e .Engines )
396+ truncator := NewWAGTruncator (stopper , st , e .Engines , & e . seq )
376397
377398 // Write WAG nodes at indices 2, 4, 6.
378399 e .seq .Next ()
@@ -412,3 +433,78 @@ func TestTruncateGapHandling(t *testing.T) {
412433 })
413434 }
414435}
436+
437+ // TestWAGTruncatorBackground verifies that the WAGTruncator background
438+ // goroutine only truncates WAG nodes when both conditions are met: (1) the
439+ // state engine has flushed, and (2) there are WAG nodes to truncate (i.e.,
440+ // seq.Load() > lastTruncatedWAGIndex).
441+ func TestWAGTruncatorBackground (t * testing.T ) {
442+ defer leaktest .AfterTest (t )()
443+ defer log .Scope (t ).Close (t )
444+ ctx := context .Background ()
445+ st := cluster .MakeTestingClusterSettings ()
446+ e := makeTestEngines ()
447+ defer e .Close ()
448+ stopper := stop .NewStopper ()
449+ defer stopper .Stop (ctx ) // must run before e.Close() (LIFO)
450+ r1 := roachpb.FullReplicaID {RangeID : 1 , ReplicaID : 1 }
451+ sl := MakeStateLoader (r1 .RangeID )
452+
453+ // Initialize replica state so events can be considered applied.
454+ require .NoError (t , sl .SetRaftReplicaID (ctx , e .StateEngine (), r1 .ReplicaID ))
455+ require .NoError (t , sl .SetRangeAppliedState (ctx , e .StateEngine (),
456+ & kvserverpb.RangeAppliedState {RaftAppliedIndex : 100 }))
457+
458+ // Start the periodic WAG truncation background task.
459+ truncator := NewWAGTruncator (stopper , st , e .Engines , & e .seq )
460+ require .NoError (t , truncator .Start (ctx ))
461+
462+ flushStateEngineAndSignal := func () {
463+ require .NoError (t , e .StateEngine ().Flush ())
464+ truncator .DurabilityAdvancedCallback ()
465+ }
466+
467+ // No WAG nodes exist. Flushing the state engine should not cause the
468+ // truncator to do anything (seq.Load() == lastTruncatedWAGIndex == 0).
469+ flushStateEngineAndSignal ()
470+ eventuallyExpectWAGNodesIndices (t , & e , nil )
471+ require .Equal (t , truncator .lastTruncatedWAGIndex .Load (), uint64 (0 ))
472+
473+ // Write two WAG nodes whose events are applied (index <= 100).
474+ e .writeWAGNode (t , wagpb.Event {
475+ Addr : wagpb .MakeAddr (r1 , 10 ), Type : wagpb .EventInit ,
476+ })
477+ e .writeWAGNode (t , wagpb.Event {
478+ Addr : wagpb .MakeAddr (r1 , 20 ), Type : wagpb .EventApply ,
479+ })
480+
481+ // WAG nodes exist, but the state engine hasn't flushed yet, so the
482+ // GuaranteedDurability reader won't see the replica state. Signal the
483+ // truncator without flushing first.
484+ truncator .DurabilityAdvancedCallback ()
485+ eventuallyExpectWAGNodesIndices (t , & e , []uint64 {1 , 2 })
486+ require .Equal (t , truncator .lastTruncatedWAGIndex .Load (), uint64 (0 ))
487+
488+ // Now flush the state engine and signal again. Both nodes should be
489+ // truncated since their events are applied (index 10 and 20 <= 100).
490+ flushStateEngineAndSignal ()
491+ eventuallyExpectWAGNodesIndices (t , & e , nil )
492+ require .Equal (t , truncator .lastTruncatedWAGIndex .Load (), uint64 (2 ))
493+
494+ // Write a third WAG node that is NOT applied (index 200 > 100).
495+ e .writeWAGNode (t , wagpb.Event {
496+ Addr : wagpb .MakeAddr (r1 , 200 ), Type : wagpb .EventApply ,
497+ })
498+ flushStateEngineAndSignal ()
499+ // Node 3 should remain because its event isn't applied yet.
500+ eventuallyExpectWAGNodesIndices (t , & e , []uint64 {3 })
501+ require .Equal (t , truncator .lastTruncatedWAGIndex .Load (), uint64 (2 ))
502+
503+ // Advance the applied index past 200 and flush. Now node 3 should be
504+ // truncated.
505+ require .NoError (t , sl .SetRangeAppliedState (ctx , e .StateEngine (),
506+ & kvserverpb.RangeAppliedState {RaftAppliedIndex : 200 }))
507+ flushStateEngineAndSignal ()
508+ eventuallyExpectWAGNodesIndices (t , & e , nil )
509+ require .Equal (t , truncator .lastTruncatedWAGIndex .Load (), uint64 (3 ))
510+ }
0 commit comments