Skip to content

Commit 4eeab9c

Browse files
wan9chiclaude
andcommitted
fix(worldline): keep prior content as a truncating write's "before"
A rewrite via `writeFileSync` (or any `O_TRUNC` open) empties the file at open, before worldline's open-time snapshot runs in the supervisor — so the snapshot read back empty and overwrote the file's real prior content. A second write to the same file then showed `"" -> "new"` instead of `"old" -> "new"`. Only adopt the open-time read as the `before` when it is non-empty (a non-truncating open, or a pre-existing file). An empty read for a path already tracked is treated as a truncating open, and the last-recorded content (the previous write's `after`) is kept as the `before`. Adds a node-free regression test covering repeated truncating writes. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 3af9426 commit 4eeab9c

2 files changed

Lines changed: 76 additions & 7 deletions

File tree

crates/worldline/src/capture/store.rs

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@
22
//! ordered list of *writes* and a raw terminal-output byte log.
33
//!
44
//! A **write** is one open→…→close lifecycle of a descriptor opened for
5-
//! writing. Its *before* is the file's content snapshotted in the open
6-
//! callback; its *after* is the content snapshotted in the close callback of
7-
//! the same descriptor. Both are stored as points in the Loro history (per-path
5+
//! writing. Its *after* is the content snapshotted in the close callback of
6+
//! that descriptor; its *before* is the file's content just prior to the write
7+
//! — the open-callback snapshot for a fresh or non-truncating open, or, when a
8+
//! truncating open (`O_TRUNC`, as `writeFileSync` uses) has already emptied the
9+
//! file before the snapshot could run, the content last recorded for that path.
10+
//! Both are stored as points in the Loro history (per-path
811
//! `LoroText`/binary, so repeated near-identical writes are delta-stored), and
912
//! each write records the before/after [`Frontiers`](loro::Frontiers) the
1013
//! server later `checkout`s to render them.
@@ -34,7 +37,9 @@ pub struct Write {
3437
pub seq: u64,
3538
/// Absolute path of the written file.
3639
pub path: Str,
37-
/// Frontier capturing the file's content at the matching open.
40+
/// Frontier capturing the file's content just before this write (the open
41+
/// snapshot, or the last-recorded content when a truncating open emptied it
42+
/// first).
3843
pub before: Vec<OpId>,
3944
/// Frontier capturing the file's content at the close.
4045
pub after: Vec<OpId>,
@@ -135,14 +140,34 @@ impl Snapshotter {
135140
/// descriptor `raw_fd` of process `pid`. Pairs with the matching
136141
/// [`Self::record_close`].
137142
///
143+
/// The open-time `content` is read in the supervisor *after* the open
144+
/// syscall, so a truncating open (`O_TRUNC` — what `writeFileSync` and most
145+
/// whole-file writes use) has already emptied the file by the time we read
146+
/// it. To avoid losing the real pre-write content, the open read is only
147+
/// adopted as the `before` when it is non-empty (a non-truncating open, or a
148+
/// pre-existing file); an empty read for a path we've already recorded is
149+
/// treated as a truncating open and the last-recorded content is kept.
150+
///
138151
/// # Panics
139152
///
140153
/// Panics if a Loro container operation fails (a corrupted invariant).
141154
pub fn record_open(&self, pid: u32, raw_fd: i64, path: &str, content: &[u8]) {
142155
let mut guard = self.lock();
143-
let before = set_content(&mut guard, path, content);
144-
let key = fd_key(pid, raw_fd, &Str::from(path));
145-
guard.open.insert(key, before);
156+
let key = Str::from(path);
157+
let before = if !content.is_empty() {
158+
// Non-truncating open or a pre-existing file: the read reflects the
159+
// real current content (including any external modification).
160+
set_content(&mut guard, path, content)
161+
} else if guard.flavor.contains_key(&key) {
162+
// Empty read but we already track this path: a truncating open
163+
// emptied the file before we could read it. Keep what we last
164+
// recorded (the previous write's `after`) as the `before`.
165+
serialize_frontiers(&guard.doc.state_frontiers())
166+
} else {
167+
// Empty read, never seen before: a genuinely new or empty file.
168+
set_content(&mut guard, path, content)
169+
};
170+
guard.open.insert(fd_key(pid, raw_fd, &key), before);
146171
}
147172

148173
/// Record the post-write snapshot taken just before `path` is closed on

crates/worldline/tests/capture.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,50 @@ async fn captures_writes_with_before_and_after() {
9292
);
9393
}
9494

95+
/// A truncating rewrite (`File::create`/`writeFileSync` — `O_TRUNC`) empties the
96+
/// file at open, before the open-time snapshot runs. The write's `before` must
97+
/// still be the file's prior content (the previously-recorded write), not the
98+
/// empty post-truncation read. Uses the test binary (no Node), so it runs in the
99+
/// default `cargo test` on every platform.
100+
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
101+
async fn truncating_rewrite_keeps_prior_content_as_before() {
102+
let dir = tempfile::tempdir().unwrap();
103+
let dir_arg = dir.path().to_str().unwrap().to_owned();
104+
105+
let cmd = command_for_fn!(dir_arg, |dir: String| {
106+
use std::io::Write as _;
107+
let a = std::path::Path::new(&dir).join("a.txt");
108+
// Two truncating writes to the same file.
109+
std::fs::write(&a, b"first").unwrap();
110+
std::fs::write(&a, b"second").unwrap();
111+
let mut out = std::io::stdout();
112+
out.write_all(b"done!").unwrap();
113+
out.flush().unwrap();
114+
});
115+
116+
let cwd = AbsolutePathBuf::new(dir.path().to_path_buf()).unwrap();
117+
let ignore = IgnoreSet::new(cwd.clone(), true, &[]).unwrap();
118+
let captured = run(RunOptions { program: cmd.program, args: cmd.args, cwd, ignore })
119+
.await
120+
.expect("run worldline");
121+
assert_eq!(captured.meta.exit_code, Some(0), "child should exit cleanly");
122+
123+
let api = reconstruct(&captured);
124+
let a_writes: Vec<&ApiWrite> =
125+
api.writes.iter().filter(|w| w.path.ends_with("a.txt")).collect();
126+
assert!(
127+
a_writes.iter().any(|w| {
128+
text(&api, w.before.as_str()).as_deref() == Some("first")
129+
&& text(&api, w.after.as_str()).as_deref() == Some("second")
130+
}),
131+
"expected the truncating rewrite to show before='first' after='second', got {:?}",
132+
a_writes
133+
.iter()
134+
.map(|w| (text(&api, w.before.as_str()), text(&api, w.after.as_str())))
135+
.collect::<Vec<_>>()
136+
);
137+
}
138+
95139
/// Regression for the user-facing `worldline node` scenario: Node's libuv closes
96140
/// descriptors via `close$NOCANCEL` on macOS — a distinct libc symbol from
97141
/// `close`. If fspy doesn't interpose it, the write-close is never observed, so

0 commit comments

Comments
 (0)