From dff2bece2f8d4d0924ff805f57f726df347bc585 Mon Sep 17 00:00:00 2001 From: Vladimir Savchenko Date: Wed, 6 May 2026 10:31:53 +0300 Subject: [PATCH] Cap GC threshold at store filesystem capacity When a cell is redeployed with a smaller disk, the thresholder computes a GC threshold from the new disk's available space that can exceed the existing GrootFS XFS backing store capacity. This results in the threshold never being reached and garbage collection never firing, eventually leading to ENOSPC. After computing the threshold via the calculator, cap it at the actual store filesystem total capacity obtained via statfs(2). This ensures threshold_bytes <= store size regardless of disk resize history. Adds Meter.GetTotalCapacity() using Blocks*Bsize from statfs, with unit tests and an integration test that creates a small XFS store inside the test filesystem to verify the cap fires correctly. Closes: CFAR-1378 --- src/thresholder/disk/disk.go | 10 ++++++ src/thresholder/disk/disk_test.go | 30 ++++++++++++++++ src/thresholder/disk/fs.go | 1 + src/thresholder/main.go | 7 ++++ .../thresholder_integration_suite_test.go | 34 +++++++++++++++++++ .../thresholder_integration_test.go | 26 ++++++++++++++ 6 files changed, 108 insertions(+) diff --git a/src/thresholder/disk/disk.go b/src/thresholder/disk/disk.go index 6d2e5539a..536ea6d34 100644 --- a/src/thresholder/disk/disk.go +++ b/src/thresholder/disk/disk.go @@ -6,6 +6,7 @@ import ( type Stat struct { AvailableBlocks int64 + TotalBlocks int64 BlockSize int64 } @@ -35,3 +36,12 @@ func (d Meter) GetAvailableSpace(path string) (int64, error) { return stat.BlockSize * stat.AvailableBlocks, nil } + +func (d Meter) GetTotalCapacity(path string) (int64, error) { + stat, err := d.fs.Stat(path) + if err != nil { + return 0, fmt.Errorf("cannot stat %s: %w", path, err) + } + + return stat.BlockSize * stat.TotalBlocks, nil +} diff --git a/src/thresholder/disk/disk_test.go b/src/thresholder/disk/disk_test.go index 4f2a8cc19..196de9ef4 100644 --- a/src/thresholder/disk/disk_test.go +++ b/src/thresholder/disk/disk_test.go @@ -48,3 +48,33 @@ var _ = Describe("Meter", func() { }) }) }) + +var _ = Describe("Meter - GetTotalCapacity", func() { + var ( + fs *diskfakes.FakeFS + meter disk.Meter + ) + + BeforeEach(func() { + fs = new(diskfakes.FakeFS) + fs.StatReturns(disk.Stat{TotalBlocks: 10, BlockSize: 7}, nil) + meter = disk.NewMeterWithFS(fs) + }) + + It("returns TotalBlocks * BlockSize", func() { + cap, err := meter.GetTotalCapacity("/store/path") + Expect(err).NotTo(HaveOccurred()) + Expect(cap).To(Equal(int64(70))) + }) + + When("statting fails", func() { + BeforeEach(func() { + fs.StatReturns(disk.Stat{}, errors.New("stat-error")) + }) + + It("returns the error", func() { + _, err := meter.GetTotalCapacity("/store/path") + Expect(err).To(MatchError("cannot stat /store/path: stat-error")) + }) + }) +}) diff --git a/src/thresholder/disk/fs.go b/src/thresholder/disk/fs.go index e0e0d158b..3b3e9e7f6 100644 --- a/src/thresholder/disk/fs.go +++ b/src/thresholder/disk/fs.go @@ -19,6 +19,7 @@ func (fs SysFS) Stat(path string) (Stat, error) { return Stat{ // #nosec G115 - changing these attributes to uint64 has a bunch of knock on effects that would change grootfs interfaces. We are fine until filesystems are > 9.2 exabytes though AvailableBlocks: int64(fsStat.Bavail), + TotalBlocks: int64(fsStat.Blocks), BlockSize: fsStat.Bsize, }, nil } diff --git a/src/thresholder/main.go b/src/thresholder/main.go index 24c61fa99..46a8a1ede 100644 --- a/src/thresholder/main.go +++ b/src/thresholder/main.go @@ -44,6 +44,13 @@ func main() { config.Clean.ThresholdBytes = calc.CalculateGCThreshold() config.Init.StoreSizeBytes = calc.CalculateStoreSize() + // Cap at actual store filesystem capacity to prevent threshold > store on redeploys. + if storeCapacity, err := disk.NewMeter().GetTotalCapacity(config.StorePath); err == nil && storeCapacity > 0 { + if config.Clean.ThresholdBytes > storeCapacity { + config.Clean.ThresholdBytes = storeCapacity + } + } + writeConfig(config, configPath) if config.Init.StoreSizeBytes == diskSize { diff --git a/src/thresholder/thresholder_integration_suite_test.go b/src/thresholder/thresholder_integration_suite_test.go index bbd208496..60774cec5 100644 --- a/src/thresholder/thresholder_integration_suite_test.go +++ b/src/thresholder/thresholder_integration_suite_test.go @@ -3,14 +3,18 @@ package main_test import ( "bytes" "io" + "os" "os/exec" + "path/filepath" "syscall" "testing" + "code.cloudfoundry.org/grootfs/commands/config" "github.com/BurntSushi/toml" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "github.com/onsi/gomega/gexec" + yaml "gopkg.in/yaml.v2" ) var ( @@ -71,6 +75,36 @@ func getDiskAvailableSpace(diskPath string) int64 { return fsStat.Bsize * int64(fsStat.Bavail) } +func getFilesystemTotalCapacity(path string) int64 { + var fsStat syscall.Statfs_t + Expect(syscall.Statfs(path, &fsStat)).To(Succeed()) + return fsStat.Bsize * int64(fsStat.Blocks) +} + +func createSmallStore(parentDir string) (mntPath, filePath string) { + filePath = filepath.Join(parentDir, "small_store_file") + mntPath = filepath.Join(parentDir, "small_store_mnt") + + Expect(exec.Command("truncate", "-s", "100M", filePath).Run()).To(Succeed()) + Expect(exec.Command("mkfs.xfs", filePath).Run()).To(Succeed()) + Expect(exec.Command("mkdir", "-p", mntPath).Run()).To(Succeed()) + Expect(exec.Command("mount", filePath, mntPath).Run()).To(Succeed()) + return +} + +func updateConfigStorePath(configPath, storePath string) { + content, err := os.ReadFile(configPath) + ExpectWithOffset(1, err).NotTo(HaveOccurred()) + + var c config.Config + ExpectWithOffset(1, yaml.Unmarshal(content, &c)).To(Succeed()) + c.StorePath = storePath + + updatedContent, err := yaml.Marshal(&c) + ExpectWithOffset(1, err).NotTo(HaveOccurred()) + ExpectWithOffset(1, os.WriteFile(configPath, updatedContent, 0600)).To(Succeed()) +} + func createAndMountFilesystem(filename, size, mntPoint string) { err := exec.Command("truncate", "-s", size, filename).Run() ExpectWithOffset(1, err).NotTo(HaveOccurred(), "running truncate failed") diff --git a/src/thresholder/thresholder_integration_test.go b/src/thresholder/thresholder_integration_test.go index 2a4e425f5..c97b36789 100644 --- a/src/thresholder/thresholder_integration_test.go +++ b/src/thresholder/thresholder_integration_test.go @@ -145,6 +145,32 @@ var _ = Describe("Thresholder", func() { exitsNonZeroWithMessage(pathToDisk) }) + When("the store already exists with a smaller capacity than computed threshold", func() { + var ( + storeMnt string + storeFile string + ) + + BeforeEach(func() { + storeMnt, storeFile = createSmallStore(fsMountPoint) + updateConfigStorePath(pathToGrootfsConfig, storeMnt) + }) + + AfterEach(func() { + exec.Command("umount", storeMnt).Run() + exec.Command("rm", "-rf", storeFile, storeMnt).Run() + }) + + It("caps threshold_bytes at the store capacity", func() { + gexecStartAndWait(thresholderCmd, GinkgoWriter, GinkgoWriter) + config := configFromFile(pathToGrootfsConfig) + + storeCapacity := getFilesystemTotalCapacity(storeMnt) + Expect(config.Clean.ThresholdBytes).To(Equal(storeCapacity)) + Expect(config.Clean.ThresholdBytes).To(BeNumerically("<", diskSize-megabytesToBytes(3000))) + }) + }) + Describe("Parameters validation", func() { Context("when too few input args are provided", func() { JustBeforeEach(func() {