Skip to content

Commit ce46f5f

Browse files
mmastracclaude
andcommitted
feat: add file_clone option for reflink-based disk cache
Cherry-picked from upstream PR mozilla#2640. Adds a file_clone option for the disk cache that stores cache entries as uncompressed files and restores them using filesystem reflinks (clonefile() on APFS, FICLONE on Linux). When supported, restored artifacts share underlying storage blocks with the cache entry. Configure with SCCACHE_FILE_CLONE=true or file_clone = true in [cache.disk] config. Upstream: mozilla#2640 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 1134cbc commit ce46f5f

15 files changed

Lines changed: 1071 additions & 82 deletions

File tree

Cargo.lock

Lines changed: 101 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ opendal = { version = "0.55.0", optional = true, default-features = false, featu
7575
] }
7676
openssl = { version = "0.10.75", optional = true }
7777
rand = "0.8.4"
78+
reflink-copy = "0.1"
7879
regex = "1.10.3"
7980
reqsign = { version = "0.18.0", optional = true }
8081
reqwest = { version = "0.12", features = [

docs/FileClone.md

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# FileClone Storage
2+
3+
## Overview
4+
5+
The `file_clone` option enables uncompressed cache storage with Copy-on-Write (CoW) filesystem support for faster cache hits.
6+
7+
## Configuration
8+
9+
Add to your sccache config file (e.g., `~/.config/sccache/config`):
10+
11+
```toml
12+
[cache.disk]
13+
file_clone = true
14+
```
15+
16+
Or set via environment variable:
17+
18+
```bash
19+
export SCCACHE_FILE_CLONE=true
20+
```
21+
22+
## How it Works
23+
24+
When `file_clone` is enabled:
25+
26+
1. **Detection**: sccache checks if the cache directory is on a CoW filesystem (APFS on macOS, Btrfs/XFS on Linux)
27+
2. **Uncompressed Storage**: Cache entries are stored as directories with raw files instead of ZIP+zstd
28+
3. **Reflink Extraction**: On cache hit, files are copied using reflink (near-instant on CoW filesystems)
29+
4. **Fallback**: If CoW is not supported, automatically falls back to traditional compressed storage
30+
31+
## Performance Benefits
32+
33+
On CoW filesystems:
34+
- Near-zero copy time for cached files (reflink uses filesystem-level COW)
35+
- Reduced CPU usage (no decompression step)
36+
- Trade-off: Slightly higher disk usage (uncompressed files)
37+
38+
## Compatibility
39+
40+
Works on:
41+
- macOS with APFS
42+
- Linux with Btrfs
43+
- Linux with XFS
44+
- Other filesystems with reflink support
45+
46+
If the filesystem doesn't support reflink, sccache automatically uses compressed storage and logs a warning.
47+
48+
## Implementation Details
49+
50+
- Cache entries stored as directories under `cache/a/b/{hash}/`
51+
- Each directory contains: `{object_name}`, `stdout`, `stderr`
52+
- Original ZIP+zstd format still supported for backwards compatibility

src/cache/cache.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,7 @@ pub fn storage_from_config(
606606
preprocessor_cache_mode_config,
607607
rw_mode,
608608
config.basedirs.clone(),
609+
config.fallback_cache.file_clone,
609610
)))
610611
}
611612

src/cache/cache_io.rs

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,10 @@ pub struct FileObjectSource {
3434

3535
/// Result of a cache lookup.
3636
pub enum Cache {
37-
/// Result was found in cache.
37+
/// Result was found in cache (compressed ZIP format).
3838
Hit(CacheRead),
39+
/// Result was found in cache (uncompressed directory format).
40+
UncompressedHit(UncompressedCacheEntry),
3941
/// Result was not found in cache.
4042
Miss,
4143
/// Do not cache the results of the compilation.
@@ -48,6 +50,7 @@ impl fmt::Debug for Cache {
4850
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4951
match *self {
5052
Cache::Hit(_) => write!(f, "Cache::Hit(...)"),
53+
Cache::UncompressedHit(_) => write!(f, "Cache::UncompressedHit(...)"),
5154
Cache::Miss => write!(f, "Cache::Miss"),
5255
Cache::None => write!(f, "Cache::None"),
5356
Cache::Recache => write!(f, "Cache::Recache"),
@@ -268,3 +271,80 @@ impl Default for CacheWrite {
268271
Self::new()
269272
}
270273
}
274+
275+
/// An uncompressed cache entry stored as a directory.
276+
#[derive(Debug)]
277+
pub struct UncompressedCacheEntry {
278+
pub(crate) dir: PathBuf,
279+
}
280+
281+
impl UncompressedCacheEntry {
282+
pub fn new(dir: PathBuf) -> Self {
283+
Self { dir }
284+
}
285+
286+
pub async fn extract_objects<T>(self, objects: T, pool: &tokio::runtime::Handle) -> Result<()>
287+
where
288+
T: IntoIterator<Item = FileObjectSource> + Send + Sync + 'static,
289+
{
290+
pool.spawn_blocking(move || {
291+
for FileObjectSource {
292+
key,
293+
path,
294+
optional,
295+
} in objects
296+
{
297+
let src = self.dir.join(&key);
298+
299+
if !src.exists() {
300+
if optional {
301+
continue;
302+
}
303+
bail!("Required object '{}' not found in cache", key);
304+
}
305+
306+
let dir = path
307+
.parent()
308+
.context("Output file without a parent directory!")?;
309+
fs::create_dir_all(dir)?;
310+
311+
// Read permissions from the cached source file directly
312+
let mode = get_file_mode(&fs::File::open(&src)?);
313+
314+
// Write to a tempfile and then atomically rename to the final path,
315+
// so parallel builds don't see partially-written files.
316+
let tmp_path = NamedTempFile::new_in(dir)?.into_temp_path();
317+
// Remove the empty temp file so reflink can create the destination
318+
let _ = std::fs::remove_file(&tmp_path);
319+
320+
if let Err(e) = crate::reflink::reflink_or_copy(&src, &tmp_path) {
321+
if !optional {
322+
bail!("Failed to copy object '{}' to {:?}: {}", key, path, e);
323+
}
324+
continue;
325+
}
326+
327+
tmp_path.persist(&path).map_err(|e| {
328+
anyhow::anyhow!("Failed to persist {:?} to {:?}: {}", e.path, path, e.error)
329+
})?;
330+
331+
if let Ok(Some(mode)) = mode {
332+
set_file_mode(&path, mode)?;
333+
}
334+
}
335+
336+
Ok(())
337+
})
338+
.await?
339+
}
340+
341+
pub fn get_stdout(&self) -> Vec<u8> {
342+
let path = self.dir.join("stdout");
343+
fs::read(&path).unwrap_or_default()
344+
}
345+
346+
pub fn get_stderr(&self) -> Vec<u8> {
347+
let path = self.dir.join("stderr");
348+
fs::read(&path).unwrap_or_default()
349+
}
350+
}

0 commit comments

Comments
 (0)