@@ -73,6 +73,7 @@ import (
7373 "github.com/cockroachdb/cockroach/pkg/util/tracing"
7474 "github.com/cockroachdb/cockroach/pkg/util/uuid"
7575 "github.com/cockroachdb/errors"
76+ "github.com/cockroachdb/errors/oserror"
7677 "github.com/cockroachdb/redact"
7778 "github.com/stretchr/testify/assert"
7879 "github.com/stretchr/testify/require"
@@ -424,6 +425,86 @@ func TestStoreInitAndBootstrap(t *testing.T) {
424425 })
425426}
426427
428+ // TestStoreStartClearsSnapshotStorageScratch verifies the scratch directory's
429+ // lifecycle across Store.Start, for both single and separated engines:
430+ //
431+ // 1. Leftover scratch files (simulating a crash mid-snapshot) survive past
432+ // the point where WAG replay would consume them. This is checked via the
433+ // BeforeClearSnapshotScratchOnStart knob.
434+ // 2. The same files are then removed by Clear before Start returns.
435+ //
436+ // TODO(sep-raft-log): Ensure that the test still passes after introducing WAG
437+ // replay. It is essential to have a flush after finishing WAG replay to avoid
438+ // deleting files that would be needed in case of a crash.
439+ func TestStoreStartClearsSnapshotStorageScratch (t * testing.T ) {
440+ defer leaktest .AfterTest (t )()
441+ defer log .Scope (t ).Close (t )
442+
443+ testutils .RunTrueAndFalse (t , "separated" , func (t * testing.T , sepEng bool ) {
444+ ctx := context .Background ()
445+ stopper := stop .NewStopper ()
446+ defer stopper .Stop (ctx )
447+ cfg := TestStoreConfig (nil )
448+
449+ // The knob fires during Start, by which point env and ssts (assigned
450+ // below after the store is built) are populated. The closure captures
451+ // them by reference.
452+ var env * fs.Env
453+ var ssts []string
454+ var preClearCalls int
455+ var preClearErrs []error
456+ cfg .TestingKnobs .BeforeClearSnapshotScratchOnStart = func () {
457+ preClearCalls ++
458+ for _ , p := range ssts {
459+ if _ , statErr := env .Stat (p ); statErr != nil {
460+ preClearErrs = append (preClearErrs ,
461+ errors .Wrapf (statErr , "scratch file %s missing at pre-clear hook" , p ))
462+ }
463+ }
464+ }
465+
466+ store := createTestStoreWithoutStart (
467+ ctx , t , stopper , testStoreOpts {useSeparatedEngines : sepEng }, & cfg ,
468+ )
469+
470+ // Seed a leftover scratch file under the snapshot storage directory.
471+ // We deliberately skip scratch.Close() to simulate a node that crashed
472+ // mid-snapshot and never ran the per-snapshot cleanup.
473+ scratch := store .sstSnapshotStorage .NewScratchSpace (
474+ roachpb .RangeID (42 ), uuid .MakeV4 (), cfg .Settings ,
475+ )
476+ f , err := scratch .NewFile (ctx , 0 )
477+ require .NoError (t , err )
478+ require .NoError (t , f .Write ([]byte ("leftover sst" )))
479+ require .NoError (t , f .Finish ())
480+
481+ env = store .StateEngine ().Env ()
482+ ssts = scratch .SSTs ()
483+ require .NotEmpty (t , ssts )
484+
485+ // Sanity check: the leftover file exists before Start.
486+ for _ , p := range ssts {
487+ _ , statErr := env .Stat (p )
488+ require .NoError (t , statErr , "scratch file %s should exist before Start" , p )
489+ }
490+
491+ require .NoError (t , store .Start (ctx , stopper ))
492+ store .WaitForInit ()
493+
494+ // (1) The pre-clear hook ran exactly once, with all leftover files
495+ // still on disk. This is where WAG replay would consume them.
496+ require .Equal (t , 1 , preClearCalls , "pre-clear knob should fire exactly once" )
497+ require .Empty (t , preClearErrs , "leftover scratch files should survive past WAG replay" )
498+
499+ // (2) Scratch file should have been removed by Start.
500+ for _ , p := range ssts {
501+ _ , statErr := env .Stat (p )
502+ require .True (t , oserror .IsNotExist (statErr ),
503+ "scratch file %s should be removed by Start, got err=%v" , p , statErr )
504+ }
505+ })
506+ }
507+
427508// TestInitializeEngineErrors verifies bootstrap failure if engine
428509// is not empty.
429510func TestInitializeEngineErrors (t * testing.T ) {
0 commit comments