Skip to content

Commit 0dc1576

Browse files
wan9chiclaude
andcommitted
fix(worldline): attribute each write's "before" correctly across replace and fd reuse
Hardening from an audit of the capture pipeline, fixing four ways a write's `before` content could be wrong: - Rename-over and delete-and-recreate (the write-tmp-then-rename pattern is ubiquitous in build tools) resurrected the replaced file's stale content. The file's on-disk identity (inode+device, or volume+index on Windows) is now tracked, so an empty truncating-open read keeps the prior content only when it's the *same* file; a replaced path is treated as fresh (empty `before`). - Under the seccomp backend (raw_fd == -1) every open of a path shared one slot, so a second overlapping write lost its `before`. Pending opens are now a LIFO stack per correlation key. - A descriptor reused for another file (e.g. via dup2, which closes the old target without a close callback) could mispair a close with a leaked open of a different file. The close now validates the open's path and falls back to the file's prior content on a mismatch. The in-place truncating-rewrite fix (keeping the prior content as `before`) is preserved. Adds store-level regression tests for each case. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 4eeab9c commit 0dc1576

5 files changed

Lines changed: 233 additions & 29 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ dist
88
# be embedded in the binary without a Node step in CI.
99
!crates/worldline/ui/dist/
1010
.claude/settings.local.json
11+
.claude/scheduled_tasks.lock
1112
*.tsbuildinfo
1213
.DS_Store
1314
/.vscode/settings.json

crates/worldline/src/capture/mod.rs

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ mod store;
66
use std::sync::Arc;
77

88
use fspy::{AccessMode, FileEvent, FileEventKind};
9-
pub use store::{CapturedData, OpId, Snapshotter, Write, rebuild_frontier};
9+
pub use store::{CapturedData, FileId, OpId, Snapshotter, Write, rebuild_frontier};
1010
use vite_path::AbsolutePath;
1111

1212
use crate::ignore::IgnoreSet;
@@ -23,9 +23,10 @@ use crate::ignore::IgnoreSet;
2323
/// which is write-only for a write open and therefore unreadable. We instead
2424
/// read the file's content by path: the supervisor process opens it `O_RDONLY`
2525
/// itself. The traced process is blocked in the open/close hook while we do
26-
/// this, so the content is a consistent point-in-time read. (For a truncating
27-
/// open the pre-write content reads as empty — the file's observable state at
28-
/// that instant.)
26+
/// this, so the content is a consistent point-in-time read. A truncating open
27+
/// reads as empty here (it already ran); the store recovers the pre-write
28+
/// content from the last recorded write, using the file's identity to avoid
29+
/// resurrecting stale content after a replacement (see [`Snapshotter`]).
2930
pub fn install_callback(cmd: &mut fspy::Command, snap: Snapshotter, ignore: Arc<IgnoreSet>) {
3031
cmd.on_file_event(AccessMode::WRITE, move |event: FileEvent<'_>| {
3132
let Some(path) = event.path.get() else {
@@ -52,10 +53,28 @@ pub fn install_callback(cmd: &mut fspy::Command, snap: Snapshotter, ignore: Arc<
5253
let Ok(content) = std::fs::read(&canonical) else {
5354
return;
5455
};
56+
// The file's on-disk identity, to tell an in-place rewrite from a
57+
// replacement of the same path (rename-over / delete-and-recreate);
58+
// `None` if it can't be determined.
59+
#[cfg(unix)]
60+
let identity = std::fs::metadata(&canonical).ok().map(|meta| {
61+
use std::os::unix::fs::MetadataExt as _;
62+
FileId::new(meta.dev(), meta.ino())
63+
});
64+
#[cfg(windows)]
65+
let identity = std::fs::metadata(&canonical).ok().and_then(|meta| {
66+
use std::os::windows::fs::MetadataExt as _;
67+
// `volume_serial_number`/`file_index` are populated for handle-backed
68+
// metadata (which `fs::metadata` uses); fall back to `None` otherwise.
69+
match (meta.volume_serial_number(), meta.file_index()) {
70+
(Some(vol), Some(idx)) => Some(FileId::new(u64::from(vol), idx)),
71+
_ => None,
72+
}
73+
});
5574
let (pid, fd) = (event.pid, event.raw_fd);
5675
match event.kind {
57-
FileEventKind::Opened => snap.record_open(pid, fd, path_str, &content),
58-
FileEventKind::Closing => snap.record_close(pid, fd, path_str, &content),
76+
FileEventKind::Opened => snap.record_open(pid, fd, path_str, &content, identity),
77+
FileEventKind::Closing => snap.record_close(pid, fd, path_str, &content, identity),
5978
}
6079
});
6180
}

crates/worldline/src/capture/store.rs

Lines changed: 100 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,10 @@ enum Flavor {
6666

6767
/// Correlation key pairing an open with its close. Uses the descriptor when the
6868
/// backend reports it (`raw_fd >= 0`); otherwise falls back to the path (the
69-
/// seccomp backend can't report the target's fd).
69+
/// seccomp backend can't report the target's fd). Several opens can share a key
70+
/// — the path fallback collapses every open of one path, and a descriptor
71+
/// number is reused after close — so each key maps to a *stack* of pending
72+
/// opens, paired LIFO and validated by path on close.
7073
#[derive(Clone, PartialEq, Eq, Hash)]
7174
enum FdKey {
7275
Fd(u32, i64),
@@ -77,14 +80,45 @@ fn fd_key(pid: u32, raw_fd: i64, path: &Str) -> FdKey {
7780
if raw_fd >= 0 { FdKey::Fd(pid, raw_fd) } else { FdKey::Path(pid, path.clone()) }
7881
}
7982

83+
/// A file's identity on disk: device + inode on Unix, volume serial + file index
84+
/// on Windows.
85+
///
86+
/// Used to tell an in-place truncating rewrite (same file) apart from a
87+
/// replacement of the same path — a rename-over or delete-and-recreate (a
88+
/// *different* file at that path).
89+
#[derive(Clone, Copy, PartialEq, Eq)]
90+
pub struct FileId {
91+
dev: u64,
92+
ino: u64,
93+
}
94+
95+
impl FileId {
96+
/// Construct from a `(device, inode)`-style identity pair.
97+
#[must_use]
98+
pub const fn new(dev: u64, ino: u64) -> Self {
99+
Self { dev, ino }
100+
}
101+
}
102+
103+
/// One pending open awaiting its close: the path it was opened on (to validate
104+
/// the pairing) and the before-frontier captured for it.
105+
struct OpenSlot {
106+
path: Str,
107+
before: Vec<OpId>,
108+
}
109+
80110
struct State {
81111
doc: LoroDoc,
82112
text_files: LoroMap,
83113
bin_files: LoroMap,
84114
/// Per-path text/binary classification, to clean up the other map on a flip.
85115
flavor: FxHashMap<Str, Flavor>,
86-
/// Open descriptors awaiting their close: key -> the open's before-frontier.
87-
open: FxHashMap<FdKey, Vec<OpId>>,
116+
/// Pending opens awaiting their close: key -> a LIFO stack of open slots
117+
/// (several opens can share a key; see [`FdKey`]).
118+
open: FxHashMap<FdKey, Vec<OpenSlot>>,
119+
/// Per-path on-disk identity as of the last recorded write, to tell an
120+
/// in-place rewrite from a replacement of the same path.
121+
identity: FxHashMap<Str, FileId>,
88122
writes: Vec<Write>,
89123
output: Vec<u8>,
90124
next_seq: u64,
@@ -120,6 +154,7 @@ impl Snapshotter {
120154
bin_files,
121155
flavor: FxHashMap::default(),
122156
open: FxHashMap::default(),
157+
identity: FxHashMap::default(),
123158
writes: Vec::new(),
124159
output: Vec::new(),
125160
next_seq: 0,
@@ -145,46 +180,73 @@ impl Snapshotter {
145180
/// whole-file writes use) has already emptied the file by the time we read
146181
/// it. To avoid losing the real pre-write content, the open read is only
147182
/// adopted as the `before` when it is non-empty (a non-truncating open, or a
148-
/// pre-existing file); an empty read for a path we've already recorded is
149-
/// treated as a truncating open and the last-recorded content is kept.
183+
/// pre-existing file). An empty read for a path we've already recorded is
184+
/// treated as a truncating open and the last-recorded content is kept —
185+
/// unless `id` shows the file was replaced since (rename-over or
186+
/// delete-and-recreate), in which case it's treated as a fresh file so stale
187+
/// content isn't resurrected.
150188
///
151189
/// # Panics
152190
///
153191
/// Panics if a Loro container operation fails (a corrupted invariant).
154-
pub fn record_open(&self, pid: u32, raw_fd: i64, path: &str, content: &[u8]) {
192+
pub fn record_open(
193+
&self,
194+
pid: u32,
195+
raw_fd: i64,
196+
path: &str,
197+
content: &[u8],
198+
id: Option<FileId>,
199+
) {
155200
let mut guard = self.lock();
156201
let key = Str::from(path);
157202
let before = if !content.is_empty() {
158203
// Non-truncating open or a pre-existing file: the read reflects the
159204
// real current content (including any external modification).
160205
set_content(&mut guard, path, content)
161-
} else if guard.flavor.contains_key(&key) {
162-
// Empty read but we already track this path: a truncating open
163-
// emptied the file before we could read it. Keep what we last
164-
// recorded (the previous write's `after`) as the `before`.
206+
} else if guard.flavor.contains_key(&key) && !replaced(&guard, &key, id) {
207+
// Empty read of a path we already track whose on-disk identity is
208+
// unchanged: a truncating open emptied the file before we could read
209+
// it. Keep what we last recorded (the previous write's `after`).
165210
serialize_frontiers(&guard.doc.state_frontiers())
166211
} else {
167-
// Empty read, never seen before: a genuinely new or empty file.
212+
// Empty read of a genuinely new file, or a path whose identity
213+
// changed since we last saw it (rename-over / delete-and-recreate):
214+
// treat it as fresh rather than resurrecting stale content.
168215
set_content(&mut guard, path, content)
169216
};
170-
guard.open.insert(fd_key(pid, raw_fd, &key), before);
217+
guard
218+
.open
219+
.entry(fd_key(pid, raw_fd, &key))
220+
.or_default()
221+
.push(OpenSlot { path: key, before });
171222
}
172223

173224
/// Record the post-write snapshot taken just before `path` is closed on
174225
/// descriptor `raw_fd` of process `pid`, emitting a [`Write`]. Its `before`
175226
/// is the matching open's snapshot (or the file's prior content if the open
176-
/// wasn't observed).
227+
/// wasn't observed). `id` is the file's on-disk identity, remembered to
228+
/// detect a later replacement of this path.
177229
///
178230
/// # Panics
179231
///
180232
/// Panics if a Loro container operation fails (a corrupted invariant).
181-
pub fn record_close(&self, pid: u32, raw_fd: i64, path: &str, content: &[u8]) {
233+
pub fn record_close(
234+
&self,
235+
pid: u32,
236+
raw_fd: i64,
237+
path: &str,
238+
content: &[u8],
239+
id: Option<FileId>,
240+
) {
182241
let path_key = Str::from(path);
183242
let mut guard = self.lock();
184243
// The file's content before this close, for the fallback `before`.
185244
let prior = serialize_frontiers(&guard.doc.state_frontiers());
186245
let after = set_content(&mut guard, path, content);
187-
let before = guard.open.remove(&fd_key(pid, raw_fd, &path_key)).unwrap_or(prior);
246+
if let Some(id) = id {
247+
guard.identity.insert(path_key.clone(), id);
248+
}
249+
let before = pop_open(&mut guard, pid, raw_fd, &path_key).unwrap_or(prior);
188250
let seq = guard.next_seq;
189251
guard.next_seq += 1;
190252
let output_offset = guard.output.len();
@@ -207,6 +269,29 @@ impl Snapshotter {
207269
}
208270
}
209271

272+
/// Whether the file at `key` has been replaced since we last recorded it — its
273+
/// on-disk identity differs from the stored one. Unknown identities (either side
274+
/// `None`) are treated as "not replaced", so the truncating-rewrite heuristic
275+
/// still applies when identity is unavailable.
276+
fn replaced(state: &State, key: &Str, current: Option<FileId>) -> bool {
277+
matches!((state.identity.get(key), current), (Some(old), Some(new)) if *old != new)
278+
}
279+
280+
/// Pop the most recent pending open for this `(pid, raw_fd/path)` key whose path
281+
/// matches the closing `path`, returning its before-frontier. A mismatched top
282+
/// slot — a descriptor reused for a different file (e.g. via `dup2`) — is
283+
/// discarded so it can't be mispaired. `None` means no matching open was
284+
/// observed, and the caller falls back to the file's prior content.
285+
fn pop_open(state: &mut State, pid: u32, raw_fd: i64, path: &Str) -> Option<Vec<OpId>> {
286+
let key = fd_key(pid, raw_fd, path);
287+
let stack = state.open.get_mut(&key)?;
288+
let slot = stack.pop()?;
289+
if stack.is_empty() {
290+
state.open.remove(&key);
291+
}
292+
(slot.path == *path).then_some(slot.before)
293+
}
294+
210295
/// Set `path`'s content in the doc, committing, and return the new frontier.
211296
/// Identical content is a no-op (Loro dedups), so the frontier is unchanged.
212297
fn set_content(state: &mut State, path: &str, content: &[u8]) -> Vec<OpId> {

crates/worldline/src/serve/data.rs

Lines changed: 105 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -149,22 +149,40 @@ fn read_content(doc: &LoroDoc, path: &str) -> (Str, Blob) {
149149

150150
#[cfg(test)]
151151
mod tests {
152-
use super::reconstruct;
152+
use super::{ApiData, reconstruct};
153153
use crate::{
154-
capture::Snapshotter,
154+
capture::{FileId, Snapshotter},
155155
run::{Captured, Meta},
156156
};
157157

158+
/// Reconstruct a snapshotter's writes into the served payload.
159+
fn api_of(snap: &Snapshotter) -> ApiData {
160+
reconstruct(&Captured {
161+
meta: Meta { argv: vec!["p".into()], cwd: "/w".into(), exit_code: Some(0) },
162+
data: snap.finish(),
163+
})
164+
}
165+
166+
/// The (UTF-8) before-content of write `i`.
167+
fn before(api: &ApiData, i: usize) -> &str {
168+
api.blobs[api.writes[i].before.as_str()].data.as_str()
169+
}
170+
171+
/// The (UTF-8) after-content of write `i`.
172+
fn after(api: &ApiData, i: usize) -> &str {
173+
api.blobs[api.writes[i].after.as_str()].data.as_str()
174+
}
175+
158176
#[test]
159177
fn reconstructs_writes_with_before_after() {
160178
let snap = Snapshotter::new();
161179
snap.append_output(b"out");
162180
// First write to a.txt: created empty, closed with "hello".
163-
snap.record_open(1, 3, "/w/a.txt", b"");
164-
snap.record_close(1, 3, "/w/a.txt", b"hello");
181+
snap.record_open(1, 3, "/w/a.txt", b"", None);
182+
snap.record_close(1, 3, "/w/a.txt", b"hello", None);
165183
// Second write to a.txt: opened with "hello", closed with "hello world".
166-
snap.record_open(1, 3, "/w/a.txt", b"hello");
167-
snap.record_close(1, 3, "/w/a.txt", b"hello world");
184+
snap.record_open(1, 3, "/w/a.txt", b"hello", None);
185+
snap.record_close(1, 3, "/w/a.txt", b"hello world", None);
168186

169187
let captured = Captured {
170188
meta: Meta { argv: vec!["p".into()], cwd: "/w".into(), exit_code: Some(0) },
@@ -185,4 +203,85 @@ mod tests {
185203
// "hello" is shared between write 0's after and write 1's before.
186204
assert_eq!(api.writes[0].after.as_str(), api.writes[1].before.as_str());
187205
}
206+
207+
/// A truncating in-place rewrite (same on-disk identity) keeps the file's
208+
/// prior content as `before`; a replacement of the path (different identity
209+
/// — rename-over or delete-and-recreate) is treated as a fresh file so stale
210+
/// content isn't resurrected. (Empty open reads model an `O_TRUNC` open,
211+
/// which `writeFileSync` uses.)
212+
#[test]
213+
fn identity_distinguishes_rewrite_from_replacement() {
214+
let (x, y) = (FileId::new(1, 100), FileId::new(1, 200));
215+
216+
// In-place rewrite: same identity across both writes -> keep "v1".
217+
let snap = Snapshotter::new();
218+
snap.record_open(1, -1, "/w/a.txt", b"", Some(x));
219+
snap.record_close(1, -1, "/w/a.txt", b"v1", Some(x));
220+
snap.record_open(1, -1, "/w/a.txt", b"", Some(x));
221+
snap.record_close(1, -1, "/w/a.txt", b"v2", Some(x));
222+
let api = api_of(&snap);
223+
assert_eq!(before(&api, 1), "v1", "in-place rewrite keeps prior content");
224+
assert_eq!(after(&api, 1), "v2");
225+
226+
// Replacement: identity changes on the second write -> fresh, before "".
227+
let snap = Snapshotter::new();
228+
snap.record_open(1, -1, "/w/a.txt", b"", Some(x));
229+
snap.record_close(1, -1, "/w/a.txt", b"v1", Some(x));
230+
snap.record_open(1, -1, "/w/a.txt", b"", Some(y));
231+
snap.record_close(1, -1, "/w/a.txt", b"v2", Some(y));
232+
let api = api_of(&snap);
233+
assert_eq!(before(&api, 1), "", "a replaced file shows an empty before, not stale content");
234+
assert_eq!(after(&api, 1), "v2");
235+
}
236+
237+
/// Under the seccomp backend (`raw_fd == -1`) several opens of one path
238+
/// collapse to the same correlation key. Their before-frontiers must be
239+
/// stacked, not overwritten, so two overlapping writes to one path each get
240+
/// their own `before` rather than the second falling back to the first
241+
/// close's after.
242+
#[test]
243+
fn seccomp_stacked_opens_keep_distinct_befores() {
244+
let snap = Snapshotter::new();
245+
snap.record_open(1, -1, "/w/a.txt", b"", None);
246+
snap.record_close(1, -1, "/w/a.txt", b"base", None); // write 0: "" -> "base"
247+
// Two overlapping opens (both saw "base"), then two closes.
248+
snap.record_open(1, -1, "/w/a.txt", b"base", None);
249+
snap.record_open(1, -1, "/w/a.txt", b"base", None);
250+
snap.record_close(1, -1, "/w/a.txt", b"X1", None); // write 1
251+
snap.record_close(1, -1, "/w/a.txt", b"Y1", None); // write 2
252+
let api = api_of(&snap);
253+
assert_eq!(before(&api, 1), "base");
254+
assert_eq!(
255+
before(&api, 2),
256+
"base",
257+
"the second overlapping close must not fall back to the first close's after"
258+
);
259+
}
260+
261+
/// A descriptor reused for a different file (e.g. via `dup2`, which closes
262+
/// the old target without a close callback, leaking that open) must not
263+
/// mispair: the close validates the open's path and falls back to the file's
264+
/// prior content on a mismatch, instead of adopting a stale `before` from the
265+
/// leaked open of another file.
266+
#[test]
267+
fn reused_fd_does_not_mispair() {
268+
let snap = Snapshotter::new();
269+
// fd 5 opened for a.txt ("AAA"); its close is never observed.
270+
snap.record_open(1, 5, "/w/a.txt", b"AAA", None);
271+
// fd 6 opened + closed for b.txt normally.
272+
snap.record_open(1, 6, "/w/b.txt", b"BBB", None);
273+
snap.record_close(1, 6, "/w/b.txt", b"BBB2", None);
274+
// fd 5 now aliases b.txt (after a dup2) and is closed.
275+
snap.record_close(1, 5, "/w/b.txt", b"BBB3", None);
276+
let api = api_of(&snap);
277+
for i in 0..api.writes.len() {
278+
if api.writes[i].path.as_str().ends_with("b.txt") {
279+
assert_ne!(
280+
before(&api, i),
281+
"AAA",
282+
"a b.txt close on a reused fd must not adopt a.txt's leaked before"
283+
);
284+
}
285+
}
286+
}
188287
}

crates/worldline/src/serve/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,8 @@ mod tests {
183183
async fn serves_endpoints_and_assets() {
184184
let snap = Snapshotter::new();
185185
snap.append_output(b"hello-output");
186-
snap.record_open(1, 3, "/w/a.txt", b"");
187-
snap.record_close(1, 3, "/w/a.txt", b"content");
186+
snap.record_open(1, 3, "/w/a.txt", b"", None);
187+
snap.record_close(1, 3, "/w/a.txt", b"content", None);
188188
let captured = Captured {
189189
meta: Meta { argv: vec!["prog".into()], cwd: "/w".into(), exit_code: Some(0) },
190190
data: snap.finish(),

0 commit comments

Comments
 (0)