diff --git a/iac/provider-gcp/main.tf b/iac/provider-gcp/main.tf index 1159f07b66..9f04638704 100644 --- a/iac/provider-gcp/main.tf +++ b/iac/provider-gcp/main.tf @@ -246,8 +246,11 @@ module "nomad" { launch_darkly_api_key_secret_name = module.init.launch_darkly_api_key_secret_version.secret # Filestore - shared_chunk_cache_path = module.cluster.shared_chunk_cache_path - filestore_cache_max_disk_usage_target = var.filestore_max_disk_usage_target + shared_chunk_cache_path = module.cluster.shared_chunk_cache_path + filestore_cache_cleanup_disk_usage_target = var.filestore_cache_cleanup_disk_usage_target + filestore_cache_cleanup_dry_run = var.filestore_cache_cleanup_dry_run + filestore_cache_cleanup_deletions_per_loop = var.filestore_cache_cleanup_deletions_per_loop + filestore_cache_cleanup_files_per_loop = var.filestore_cache_cleanup_files_per_loop } module "redis" { diff --git a/iac/provider-gcp/nomad/clean-nfs-cache.tf b/iac/provider-gcp/nomad/clean-nfs-cache.tf index 772f567afe..defc00f00d 100644 --- a/iac/provider-gcp/nomad/clean-nfs-cache.tf +++ b/iac/provider-gcp/nomad/clean-nfs-cache.tf @@ -20,6 +20,9 @@ resource "nomad_job" "clean_nfs_cache" { environment = var.environment clean_nfs_cache_checksum = data.external.filestore_cleanup_checksum.result.hex nfs_cache_mount_path = var.shared_chunk_cache_path - max_disk_usage_target = var.filestore_cache_max_disk_usage_target + max_disk_usage_target = var.filestore_cache_cleanup_disk_usage_target + dry_run = var.filestore_cache_cleanup_dry_run + deletions_per_loop = var.filestore_cache_cleanup_deletions_per_loop + files_per_loop = var.filestore_cache_cleanup_files_per_loop }) } diff --git a/iac/provider-gcp/nomad/jobs/clean-nfs-cache.hcl b/iac/provider-gcp/nomad/jobs/clean-nfs-cache.hcl index 7222d068e6..5f8f2bf216 100644 --- a/iac/provider-gcp/nomad/jobs/clean-nfs-cache.hcl +++ b/iac/provider-gcp/nomad/jobs/clean-nfs-cache.hcl @@ -22,8 +22,10 @@ job "filestore-cleanup" { config { command = "local/clean-nfs-cache" args = [ - "--dry-run=false", + "--dry-run=${dry_run}", "--disk-usage-target-percent=${max_disk_usage_target}", + "--files-per-loop=${files_per_loop}", + "--deletions-per-loop=${deletions_per_loop}", "${nfs_cache_mount_path}", ] } diff --git a/iac/provider-gcp/nomad/variables.tf b/iac/provider-gcp/nomad/variables.tf index 447944b2ff..4e5a445c0b 100644 --- a/iac/provider-gcp/nomad/variables.tf +++ b/iac/provider-gcp/nomad/variables.tf @@ -336,9 +336,29 @@ variable "shared_chunk_cache_path" { default = "" } -variable "filestore_cache_max_disk_usage_target" { +variable "filestore_cache_cleanup_disk_usage_target" { type = number - description = "The maximum disk usage target for the Filestore cache in percent" + description = "The disk usage target for the Filestore cache in percent" + validation { + condition = var.filestore_cache_cleanup_disk_usage_target >= 0 && var.filestore_cache_cleanup_disk_usage_target < 100 + error_message = "Must be between 0 and 100" + } +} + +variable "filestore_cache_cleanup_dry_run" { + type = bool +} + +variable "filestore_cache_cleanup_deletions_per_loop" { + type = number + validation { + condition = var.filestore_cache_cleanup_deletions_per_loop > 0 + error_message = "Must be greater than 0" + } +} + +variable "filestore_cache_cleanup_files_per_loop" { + type = number } variable "dockerhub_remote_repository_url" { diff --git a/iac/provider-gcp/variables.tf b/iac/provider-gcp/variables.tf index b31b48c426..1ab67e0334 100644 --- a/iac/provider-gcp/variables.tf +++ b/iac/provider-gcp/variables.tf @@ -430,12 +430,27 @@ variable "filestore_cache_capacity_gb" { default = 0 } -variable "filestore_max_disk_usage_target" { +variable "filestore_cache_cleanup_disk_usage_target" { type = number description = "The max disk usage target of the Filestore" default = 90 } +variable "filestore_cache_cleanup_dry_run" { + type = bool + default = false +} + +variable "filestore_cache_cleanup_files_per_loop" { + type = number + default = 10000 +} + +variable "filestore_cache_cleanup_deletions_per_loop" { + type = number + default = 900 +} + variable "min_cpu_platform" { type = string default = "Intel Skylake" diff --git a/packages/orchestrator/cmd/clean-nfs-cache/main.go b/packages/orchestrator/cmd/clean-nfs-cache/main.go index c119a3c801..f8975ba9ba 100644 --- a/packages/orchestrator/cmd/clean-nfs-cache/main.go +++ b/packages/orchestrator/cmd/clean-nfs-cache/main.go @@ -300,10 +300,10 @@ func parseArgs() (string, opts, error) { flags := flag.NewFlagSet("clean-nfs-cache", flag.ExitOnError) var opts opts - flags.Float64Var(&opts.targetDiskUsagePercent, "disk-usage-target-percent", 10, "disk usage target as a % (0-100)") + flags.Float64Var(&opts.targetDiskUsagePercent, "disk-usage-target-percent", 90, "disk usage target as a % (0-100)") flags.BoolVar(&opts.dryRun, "dry-run", true, "dry run") - flags.IntVar(&opts.filesPerLoop, "files-per-iteration", 10000, "number of files to gather metadata for per loop") - flags.Int64Var(&opts.filesToDeletePerLoop, "max-files-per-iteration", 100, "maximum number of files to delete per loop") + flags.IntVar(&opts.filesPerLoop, "files-per-loop", 10000, "number of files to gather metadata for per loop") + flags.Int64Var(&opts.filesToDeletePerLoop, "deletions-per-loop", 100, "maximum number of files to delete per loop") args := os.Args[1:] // skip the command name if err := flags.Parse(args); err != nil { diff --git a/packages/orchestrator/cmd/clean-nfs-cache/pkg/cache.go b/packages/orchestrator/cmd/clean-nfs-cache/pkg/cache.go index e30f7515c1..101d54a7ac 100644 --- a/packages/orchestrator/cmd/clean-nfs-cache/pkg/cache.go +++ b/packages/orchestrator/cmd/clean-nfs-cache/pkg/cache.go @@ -5,6 +5,7 @@ import ( "math/rand" "os" "path/filepath" + "slices" ) type ListingCache struct { @@ -26,7 +27,31 @@ func NewListingCache(root string) *ListingCache { func (c *ListingCache) Decache(path string) { dirName := filepath.Dir(path) - delete(c.cache, dirName) + items, ok := c.cache[dirName] + if !ok { + return + } + + index := slices.IndexFunc(items, func(e cacheEntry) bool { + return e.path == path + }) + items = removeByIndex(items, index) + c.cache[dirName] = items +} + +func removeByIndex[E any](items []E, index int) []E { + if index < 0 || index >= len(items) { + return items + } + + switch index { + case 0: + return items[1:] + case len(items) - 1: + return items[:len(items)-1] + default: + return append(items[:index], items[index+1:]...) + } } func (c *ListingCache) GetRandomFile() (string, error) { diff --git a/packages/orchestrator/cmd/clean-nfs-cache/pkg/cache_test.go b/packages/orchestrator/cmd/clean-nfs-cache/pkg/cache_test.go index 1b681057c0..6d647979fd 100644 --- a/packages/orchestrator/cmd/clean-nfs-cache/pkg/cache_test.go +++ b/packages/orchestrator/cmd/clean-nfs-cache/pkg/cache_test.go @@ -75,3 +75,104 @@ func TestGetRandomFile(t *testing.T) { assert.Empty(t, f) }) } + +func TestListingCache_Decache(t *testing.T) { + t.Parallel() + + testCases := map[string]struct { + path string + cached []string + expected []string + }{ + "not in cache": { + path: "c", + cached: []string{"a", "b"}, + expected: []string{"a", "b"}, + }, + "in cache": { + path: "c", + cached: []string{"a", "b", "c"}, + expected: []string{"a", "b"}, + }, + } + + for name, testCase := range testCases { + t.Run(name, func(t *testing.T) { + t.Parallel() + + basePath := "/a/b" + + fullPath := filepath.Join(basePath, testCase.path) + items := buildItemsFromPaths(basePath, testCase.cached) + + lc := ListingCache{ + root: basePath, + cache: map[string][]cacheEntry{ + basePath: items, + }, + } + + lc.Decache(fullPath) + + expected := buildItemsFromPaths(basePath, testCase.expected) + assert.Equal(t, expected, lc.cache[basePath]) + }) + } +} + +func buildItemsFromPaths(basePath string, input []string) []cacheEntry { + var items []cacheEntry + for _, path := range input { + items = append(items, cacheEntry{ + path: filepath.Join(basePath, path), + isDir: false, + }) + } + return items +} + +func TestRemoveByIndex(t *testing.T) { + t.Parallel() + + testCases := map[string]struct { + input []string + index int + expected []string + }{ + "empty": {}, + "only": { + input: []string{"one"}, + index: 0, + expected: []string{}, + }, + "first": { + input: []string{"one", "two", "three"}, + index: 0, + expected: []string{"two", "three"}, + }, + "middle": { + input: []string{"one", "two", "three"}, + index: 1, + expected: []string{"one", "three"}, + }, + "last": { + input: []string{"one", "two", "three"}, + index: 2, + expected: []string{"one", "two"}, + }, + "missing": { + input: []string{"one", "two", "three"}, + index: -1, + expected: []string{"one", "two", "three"}, + }, + } + + for name, testCase := range testCases { + t.Run(name, func(t *testing.T) { + t.Parallel() + + actual := removeByIndex(testCase.input, testCase.index) + assert.Equal(t, testCase.expected, actual) + }) + } +}