Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions scarb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,4 @@ zip.workspace = true
[features]
default = ["scarb-lint"]
scarb-lint = ["dep:cairo-lint"]
test-utils = []
63 changes: 62 additions & 1 deletion scarb/src/ops/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,75 @@ use anyhow::{Context, Result};

use crate::core::Config;

// Name of the package cache lock file inside the cache dir. Kept in sync with
// `Config::package_cache_lock`. `cache_clean` must NOT unlink this file: the
// `flock` held by the in-progress clean is per-inode, and unlinking allows a
// concurrent `acquire_async` to create a different inode at the same path
// and succeed against it as if uncontended — both processes would then
// believe they hold the exclusive lock at the same time. The path is also
// part of the on-disk contract with older scarb versions, so we cannot move
// it without losing cross-version mutual exclusion.
const PACKAGE_CACHE_LOCK_FILENAME: &str = ".package-cache.lock";

#[tracing::instrument(skip_all, level = "debug")]
pub fn cache_clean(config: &Config) -> Result<()> {
let path = config.dirs().cache_dir.path_unchecked();
if path.exists() {
let _lock = config
.tokio_handle()
.block_on(config.package_cache_lock().acquire_async())?;
scarb_fs_utils::remove_dir_all(path).context("failed to clean cache")?;

// Remove every entry in the cache dir except the lock file itself.
// See `PACKAGE_CACHE_LOCK_FILENAME` for why the lock file must stay.
for entry in std::fs::read_dir(path).context("failed to clean cache")? {
let entry = entry.context("failed to clean cache")?;
if entry.file_name() == PACKAGE_CACHE_LOCK_FILENAME {
continue;
}
let entry_path = entry.path();
let file_type = entry
.file_type()
.with_context(|| format!("failed to stat cache entry: {}", entry_path.display()))?;
if file_type.is_dir() {
scarb_fs_utils::remove_dir_all(&entry_path).with_context(|| {
format!("failed to clean cache entry: {}", entry_path.display())
})?;
} else {
std::fs::remove_file(&entry_path).with_context(|| {
format!("failed to clean cache entry: {}", entry_path.display())
})?;
}
}

test_pause_hook();
}
Ok(())
}

// Test-only synchronization point (see
// `cache_clean_race_with_concurrent_lock_acquisition` in `scarb/tests/cache.rs`).
//
// When `SCARB_INTERNAL_CACHE_CLEAN_PAUSE` is set to a path prefix `P`, this:
// 1. creates `P.ready` to signal the test that the lock is held and the
// cache contents (other than the lock file) have just been removed,
// 2. blocks until `P.go` appears.
//
// At this point the cache dir has been emptied except for the lock file, and
// `_lock` in `cache_clean` still holds the `flock` — exactly the state a
// concurrent `acquire_async` must observe and block on. The env var is
// namespaced as `_INTERNAL_` to make clear it is not part of the public
// CLI/env contract.
#[cfg(feature = "test-utils")]
fn test_pause_hook() {
let Ok(prefix) = std::env::var("SCARB_INTERNAL_CACHE_CLEAN_PAUSE") else {
return;
};
let _ = std::fs::write(format!("{prefix}.ready"), "");
let go = std::path::PathBuf::from(format!("{prefix}.go"));
while !go.exists() {
std::thread::sleep(std::time::Duration::from_millis(10));
}
}

#[cfg(not(feature = "test-utils"))]
fn test_pause_hook() {}
123 changes: 122 additions & 1 deletion scarb/tests/cache.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
use std::process::Stdio;
use std::time::{Duration, Instant};

use assert_fs::{TempDir, prelude::*};
use ntest::timeout;
use snapbox::Data;

use scarb_test_support::command::Scarb;
Expand Down Expand Up @@ -27,7 +31,15 @@ fn simple_clean() {
.current_dir(&t)
.assert()
.success();
cache_dir.assert(predicates::path::missing());
// After `cache clean`, the cache dir survives but contains only the
// package cache lock file. See `ops::cache::PACKAGE_CACHE_LOCK_FILENAME`
// for why the lock file is preserved across clean.
cache_dir.assert(predicates::path::is_dir());
let entries: Vec<_> = std::fs::read_dir(cache_dir.path())
.unwrap()
.map(|e| e.unwrap().file_name())
.collect();
assert_eq!(entries, [".package-cache.lock"]);
}

#[test]
Expand All @@ -47,3 +59,112 @@ fn path_print() {
.success();
cache_dir.assert(predicates::path::is_dir());
}

// Regression test for the lock-file race in `ops::cache::cache_clean`:
// 1. clean acquires `flock(EX)` on `<cache>/.package-cache.lock`,
// 2. `remove_dir_all(<cache>)` unlinks the lock file,
// 3. a concurrent `package_cache_lock().acquire_async()` recreates the
// cache dir, opens a brand-new lock file at the same path (different
// inode), and `flock(EX)` succeeds immediately — `flock` is per-inode,
// so it does not conflict with the still-open, now-unlinked old inode.
//
// Without a fix, the two scarb processes both believe they hold the
// exclusive package cache lock at the same time.
//
// This test makes the race deterministic via the
// `SCARB_INTERNAL_CACHE_CLEAN_PAUSE` hook in `cache_clean`: clean is paused
// while still holding `_lock` and after `remove_dir_all` has completed, the
// test then runs `scarb fetch` and signals clean to resume.
//
// Correct behavior: while clean holds the package cache lock, `scarb fetch`
// must block on lock acquisition and emit `flock.rs`'s "Blocking waiting
// for file lock" message. The test asserts that, and therefore fails until
// the lock is kept on a stable inode across `cache clean` (e.g. moved out
// of the cache dir).
#[cfg(unix)]
#[test]
#[timeout(60_000)]
fn cache_clean_race_with_concurrent_lock_acquisition() {
Comment thread
wawel37 marked this conversation as resolved.
let cache_dir = TempDir::new().unwrap();
let config_dir = TempDir::new().unwrap();
let sync_dir = TempDir::new().unwrap();
let t = TempDir::new().unwrap();
ProjectBuilder::start()
.name("hello")
.version("0.1.0")
.build(&t);

// cache_clean only does work when the cache dir already exists.
std::fs::create_dir_all(cache_dir.path()).unwrap();

let prefix = sync_dir.child("clean-pause");
let ready_path = sync_dir.child("clean-pause.ready");
let go_path = sync_dir.child("clean-pause.go");

let mut clean = Scarb::new()
.cache(cache_dir.path())
.config(config_dir.path())
.std()
.arg("cache")
.arg("clean")
.env("SCARB_INTERNAL_CACHE_CLEAN_PAUSE", prefix.path())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.unwrap();

// Wait until cache_clean has acquired the lock and finished remove_dir_all.
let deadline = Instant::now() + Duration::from_secs(30);
while !ready_path.path().exists() {
if Instant::now() > deadline {
let _ = clean.kill();
panic!("scarb cache clean never reached the pause sentinel");
}
std::thread::sleep(Duration::from_millis(20));
}

// Clean is now paused mid-flight: cache dir + lock file unlinked, but it
// still holds the flock on the original inode. Spawn fetch — under a
// correct implementation it must block on lock acquisition until clean
// releases.
let fetch = Scarb::new()
.cache(cache_dir.path())
.config(config_dir.path())
.std()
.arg("fetch")
.current_dir(&t)
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.unwrap();

// Give fetch a moment to reach `acquire_async` and either block (correct)
// or grab a brand-new-inode lock (buggy). Then release clean so fetch
// can complete in either case.
std::thread::sleep(Duration::from_millis(500));
std::fs::write(go_path.path(), "").unwrap();

let fetch_output = fetch.wait_with_output().unwrap();
let clean_output = clean.wait_with_output().unwrap();

let fetch_stdout = String::from_utf8_lossy(&fetch_output.stdout);
let fetch_stderr = String::from_utf8_lossy(&fetch_output.stderr);
let clean_stdout = String::from_utf8_lossy(&clean_output.stdout);
let clean_stderr = String::from_utf8_lossy(&clean_output.stderr);

assert!(
clean_output.status.success(),
"scarb cache clean failed\nstdout: {clean_stdout}\nstderr: {clean_stderr}",
);
assert!(
fetch_output.status.success(),
"scarb fetch failed\nstdout: {fetch_stdout}\nstderr: {fetch_stderr}",
);
assert!(
fetch_stdout.contains("Blocking waiting for file lock"),
"scarb fetch did NOT block on the package cache lock while \
`scarb cache clean` was holding it — both processes acquired what \
they each consider an exclusive lock at the same time. \
fetch stdout: {fetch_stdout}",
);
}
Loading