Skip to content

Commit 84fed8d

Browse files
committed
raftengine/etcd: clean up orphaned snapshot spool files on startup
Snapshot spool files (elastickv-etcd-snapshot-*) were left behind when the engine crashed before snapshotSpool.Close() could run. Since the spool directory was moved from os temp to cfg.DataDir, these files accumulated without bound, eventually filling the disk. Add cleanupStaleSnapshotSpools() and call it from openDiskState() so orphaned spools are removed on every engine restart.
1 parent 06ad365 commit 84fed8d

3 files changed

Lines changed: 67 additions & 0 deletions

File tree

internal/raftengine/etcd/snapshot_spool.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package etcd
33
import (
44
"io"
55
"os"
6+
"path/filepath"
67

78
"github.com/cockroachdb/errors"
89
)
@@ -64,6 +65,24 @@ func (s *snapshotSpool) Reader() (io.Reader, error) {
6465
return s.file, nil
6566
}
6667

68+
// cleanupStaleSnapshotSpools removes orphaned snapshot spool files left behind
69+
// by a previous engine instance that crashed before Close could run.
70+
func cleanupStaleSnapshotSpools(dir string) error {
71+
matches, err := filepath.Glob(filepath.Join(dir, snapshotSpoolPattern))
72+
if err != nil {
73+
return errors.WithStack(err)
74+
}
75+
var combined error
76+
for _, match := range matches {
77+
removeErr := os.Remove(match)
78+
if removeErr == nil || os.IsNotExist(removeErr) {
79+
continue
80+
}
81+
combined = errors.CombineErrors(combined, errors.WithStack(removeErr))
82+
}
83+
return errors.WithStack(combined)
84+
}
85+
6786
func (s *snapshotSpool) Close() error {
6887
if s == nil {
6988
return nil
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
package etcd
2+
3+
import (
4+
"os"
5+
"path/filepath"
6+
"testing"
7+
8+
"github.com/stretchr/testify/require"
9+
)
10+
11+
func TestCleanupStaleSnapshotSpools(t *testing.T) {
12+
dir := t.TempDir()
13+
14+
// Create several orphaned spool files matching the pattern.
15+
for i := 0; i < 5; i++ {
16+
f, err := os.CreateTemp(dir, snapshotSpoolPattern)
17+
require.NoError(t, err)
18+
require.NoError(t, f.Close())
19+
}
20+
21+
// Create an unrelated file that must not be removed.
22+
unrelated := filepath.Join(dir, "keep-me.txt")
23+
require.NoError(t, os.WriteFile(unrelated, []byte("data"), 0o644))
24+
25+
matches, err := filepath.Glob(filepath.Join(dir, snapshotSpoolPattern))
26+
require.NoError(t, err)
27+
require.Len(t, matches, 5)
28+
29+
require.NoError(t, cleanupStaleSnapshotSpools(dir))
30+
31+
// All spool files should be gone.
32+
matches, err = filepath.Glob(filepath.Join(dir, snapshotSpoolPattern))
33+
require.NoError(t, err)
34+
require.Empty(t, matches)
35+
36+
// Unrelated file should still exist.
37+
_, err = os.Stat(unrelated)
38+
require.NoError(t, err)
39+
}
40+
41+
func TestCleanupStaleSnapshotSpoolsEmptyDir(t *testing.T) {
42+
dir := t.TempDir()
43+
require.NoError(t, cleanupStaleSnapshotSpools(dir))
44+
}

internal/raftengine/etcd/wal_store.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ func openDiskState(cfg OpenConfig, peers []Peer) (*diskState, error) {
3838
return nil, errors.WithStack(err)
3939
}
4040

41+
if err := cleanupStaleSnapshotSpools(cfg.DataDir); err != nil {
42+
return nil, errors.Wrap(err, "cleanup stale snapshot spools")
43+
}
44+
4145
if wal.Exist(walDir) {
4246
return loadWalState(logger, walDir, snapDir, cfg.StateMachine)
4347
}

0 commit comments

Comments
 (0)