Skip to content

Commit 80aec2d

Browse files
aster-voidclaude
andcommitted
fix(cleanup): harden runtime directory garbage collection
Add RAII CacheGuard so early exits still clean up sot_path, handle SIGTERM alongside SIGINT, and fall back to filesystem discovery when GetJobIds fails due to actor crash. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 9c90b3a commit 80aec2d

File tree

2 files changed

+105
-10
lines changed

2 files changed

+105
-10
lines changed

src/git.rs

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -405,14 +405,52 @@ pub fn copy_build_to_run(build_dir: &Path, run_dir: &Path) -> Result<()> {
405405
Ok(())
406406
}
407407

408+
/// Discovers job directories by scanning the filesystem for `<sot_name>@*` entries.
409+
/// Used as a fallback when the actor system can't provide job IDs.
410+
pub fn discover_job_dirs(sot_path: &Path) -> Vec<PathBuf> {
411+
let Some(parent) = sot_path.parent() else {
412+
return Vec::new();
413+
};
414+
let sot_name = sot_path
415+
.file_name()
416+
.and_then(|s| s.to_str())
417+
.unwrap_or("unknown");
418+
let prefix = format!("{}@", sot_name);
419+
420+
let Ok(entries) = std::fs::read_dir(parent) else {
421+
return Vec::new();
422+
};
423+
424+
entries
425+
.filter_map(|e| e.ok())
426+
.filter(|e| {
427+
e.file_name()
428+
.to_str()
429+
.is_some_and(|name| name.starts_with(&prefix))
430+
})
431+
.map(|e| e.path())
432+
.collect()
433+
}
434+
408435
/// Removes the sot_path and all associated job directories.
436+
/// If `job_ids` is empty, falls back to filesystem discovery.
409437
pub fn cleanup_cache_dir(sot_path: &Path, job_ids: &[String]) {
410438
use tracing::{info, warn};
411439

440+
// Determine job directories: use provided IDs if available, otherwise scan filesystem
441+
let job_dirs: Vec<PathBuf> = if job_ids.is_empty() {
442+
let discovered = discover_job_dirs(sot_path);
443+
if !discovered.is_empty() {
444+
info!("Discovered {} job directories via filesystem scan", discovered.len());
445+
}
446+
discovered
447+
} else {
448+
job_ids.iter().map(|id| get_job_dir(sot_path, id)).collect()
449+
};
450+
412451
// Remove job directories
413-
for job_id in job_ids {
414-
let job_dir = get_job_dir(sot_path, job_id);
415-
let build_dir = get_build_dir(sot_path, job_id);
452+
for job_dir in &job_dirs {
453+
let build_dir = job_dir.join("build");
416454

417455
// Remove git worktree first (if it exists)
418456
if build_dir.join(".git").exists() {
@@ -429,11 +467,11 @@ pub fn cleanup_cache_dir(sot_path: &Path, job_ids: &[String]) {
429467

430468
if job_dir.exists() {
431469
info!(path = %job_dir.display(), "Removing job directory");
432-
let _ = std::fs::remove_dir_all(&job_dir);
470+
let _ = std::fs::remove_dir_all(job_dir);
433471
}
434472

435473
// Also remove temp/old variants for run dir
436-
let run_dir = get_run_dir(sot_path, job_id);
474+
let run_dir = job_dir.join("run");
437475
let _ = std::fs::remove_dir_all(run_dir.with_extension("tmp"));
438476
let _ = std::fs::remove_dir_all(run_dir.with_extension("old"));
439477
}

src/main.rs

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,58 @@ struct Args {
2626
pull_interval: u64,
2727
}
2828

29+
/// RAII guard that cleans up cache directories on drop.
30+
struct CacheGuard {
31+
sot_path: PathBuf,
32+
job_ids: Vec<String>,
33+
}
34+
35+
impl CacheGuard {
36+
fn new(sot_path: PathBuf) -> Self {
37+
Self {
38+
sot_path,
39+
job_ids: Vec::new(),
40+
}
41+
}
42+
43+
fn set_job_ids(&mut self, ids: Vec<String>) {
44+
self.job_ids = ids;
45+
}
46+
47+
/// Disarm the guard (cleanup already handled manually).
48+
#[allow(dead_code)]
49+
fn disarm(self) {
50+
std::mem::forget(self);
51+
}
52+
}
53+
54+
impl Drop for CacheGuard {
55+
fn drop(&mut self) {
56+
git::cleanup_cache_dir(&self.sot_path, &self.job_ids);
57+
}
58+
}
59+
60+
/// Waits for either SIGINT (Ctrl+C) or SIGTERM.
61+
async fn shutdown_signal() {
62+
let ctrl_c = tokio::signal::ctrl_c();
63+
64+
#[cfg(unix)]
65+
{
66+
use tokio::signal::unix::{SignalKind, signal};
67+
let mut sigterm =
68+
signal(SignalKind::terminate()).expect("Failed to register SIGTERM handler");
69+
tokio::select! {
70+
_ = ctrl_c => {}
71+
_ = sigterm.recv() => {}
72+
}
73+
}
74+
75+
#[cfg(not(unix))]
76+
{
77+
ctrl_c.await.ok();
78+
}
79+
}
80+
2981
#[tokio::main]
3082
async fn main() -> Result<()> {
3183
logging::init();
@@ -50,6 +102,9 @@ async fn main() -> Result<()> {
50102
git::clone_to(&source, &sot_path)?;
51103
info!(cache = %sot_path.display(), "Repository ready");
52104

105+
// RAII guard ensures sot_path is cleaned up even on early exit
106+
let mut cache_guard = CacheGuard::new(sot_path.clone());
107+
53108
let (initial_runner, initial_jobs) = load_config(&sot_path)?;
54109

55110
// Spawn Runner actor
@@ -68,18 +123,20 @@ async fn main() -> Result<()> {
68123
return Ok(());
69124
}
70125

71-
// Wait for shutdown signal
72-
tokio::signal::ctrl_c().await?;
126+
// Wait for shutdown signal (SIGINT or SIGTERM)
127+
shutdown_signal().await;
73128
info!("Shutting down...");
74129

75-
// Get job IDs for cleanup
130+
// Get job IDs for cleanup (fall back to filesystem scan if actor is dead)
76131
let job_ids = runner.send(GetJobIds).await.unwrap_or_default();
132+
cache_guard.set_job_ids(job_ids);
77133

78134
// Graceful shutdown
79135
let _ = runner.send(GracefulShutdown).await;
80136

81-
// Cleanup cache directories
82-
git::cleanup_cache_dir(&sot_path, &job_ids);
137+
// Cleanup via guard drop (disarm and run manually to log any issues)
138+
// The guard will run cleanup in its Drop, so just let it go out of scope.
139+
drop(cache_guard);
83140

84141
Ok(())
85142
}

0 commit comments

Comments
 (0)