From 970e9b7242dd6582d3caa05a2085cd702262f4c8 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Wed, 22 Apr 2026 07:54:34 +0000 Subject: [PATCH 01/46] perf: improve text checkout scalability --- crates/loro-internal/Cargo.toml | 4 + crates/loro-internal/benches/text_checkout.rs | 646 ++++++++++++++++++ .../src/container/richtext/richtext_state.rs | 95 +++ .../src/container/richtext/style_range_map.rs | 10 + .../src/container/richtext/tracker.rs | 91 ++- .../container/richtext/tracker/crdt_rope.rs | 258 +++++-- crates/loro-internal/src/diff_calc.rs | 159 ++++- crates/loro-internal/src/diff_calc/counter.rs | 4 +- crates/loro-internal/src/diff_calc/tree.rs | 4 +- crates/loro-internal/src/diff_calc/unknown.rs | 4 +- crates/loro-internal/src/loro.rs | 362 +++++++++- crates/loro-internal/src/oplog.rs | 26 +- .../loro-internal/src/state/richtext_state.rs | 125 ++-- crates/loro-internal/src/version.rs | 85 +++ 14 files changed, 1701 insertions(+), 172 deletions(-) create mode 100644 crates/loro-internal/benches/text_checkout.rs diff --git a/crates/loro-internal/Cargo.toml b/crates/loro-internal/Cargo.toml index 4504e0aa2..11a3e306d 100644 --- a/crates/loro-internal/Cargo.toml +++ b/crates/loro-internal/Cargo.toml @@ -101,6 +101,10 @@ jsonpath = [] name = "text_r" harness = false +[[bench]] +name = "text_checkout" +harness = false + [[bench]] name = "list" harness = false diff --git a/crates/loro-internal/benches/text_checkout.rs b/crates/loro-internal/benches/text_checkout.rs new file mode 100644 index 000000000..06fe8dfd9 --- /dev/null +++ b/crates/loro-internal/benches/text_checkout.rs @@ -0,0 +1,646 @@ +use criterion::{criterion_group, criterion_main, Criterion}; + +#[cfg(feature = "test_utils")] +mod text_checkout { + use std::{hint::black_box, sync::Arc, time::Duration}; + + use criterion::{measurement::WallTime, BenchmarkGroup, BenchmarkId, Criterion}; + use loro_internal::{ + cursor::PosType, + id::PeerID, + loro::{CheckoutProfile, ExportMode, TextStateProfile}, + version::Frontiers, + LoroDoc, Subscription, + }; + use rand::{rngs::StdRng, Rng, SeedableRng}; + + const TEXT: &str = "fn checkout_profile() { let value = document.version(); }\n"; + + #[derive(Debug, Clone, Copy)] + struct FixtureStats { + scenario: &'static str, + peer_count: usize, + change_count: usize, + base_len: usize, + version_count: usize, + subscribed: bool, + } + + struct CheckoutFixture { + doc: LoroDoc, + frontiers: Vec, + stats: FixtureStats, + _subscription: Option, + } + + #[derive(Debug, Default)] + struct ProfileTotals { + samples: u64, + total: Duration, + frontier_prepare: Duration, + frontiers_to_vv: Duration, + diff_calc: Duration, + state_apply: Duration, + emit_events: Duration, + richtext_tracker_checkout: Duration, + richtext_tracker_diff: Duration, + richtext_delta_build: Duration, + richtext_insert_future_scan: Duration, + causal_vv_materialize: Duration, + max_frontiers_width: usize, + max_vv_width: usize, + max_causal_vv_width: usize, + max_diff_container_count: usize, + richtext_tracker_checkout_count: u64, + richtext_tracker_diff_count: u64, + richtext_delta_build_count: u64, + richtext_insert_future_scan_count: u64, + richtext_insert_future_scan_visited: u64, + richtext_insert_future_scan_max_visited: usize, + causal_vv_materialize_count: u64, + recording_event_samples: u64, + forward_diff_calculator_samples: u64, + } + + impl ProfileTotals { + fn add(&mut self, profile: CheckoutProfile) { + self.samples += 1; + self.total += profile.total; + self.frontier_prepare += profile.frontier_prepare; + self.frontiers_to_vv += profile.frontiers_to_vv; + self.diff_calc += profile.diff_calc; + self.state_apply += profile.state_apply; + self.emit_events += profile.emit_events; + self.richtext_tracker_checkout += profile.richtext_tracker_checkout; + self.richtext_tracker_diff += profile.richtext_tracker_diff; + self.richtext_delta_build += profile.richtext_delta_build; + self.richtext_insert_future_scan += profile.richtext_insert_future_scan; + self.causal_vv_materialize += profile.causal_vv_materialize; + self.max_frontiers_width = self + .max_frontiers_width + .max(profile.from_frontiers_len) + .max(profile.to_frontiers_len); + self.max_vv_width = self + .max_vv_width + .max(profile.from_vv_len) + .max(profile.to_vv_len); + self.max_causal_vv_width = self.max_causal_vv_width.max(profile.max_causal_vv_width); + self.max_diff_container_count = self + .max_diff_container_count + .max(profile.diff_container_count); + self.richtext_tracker_checkout_count += profile.richtext_tracker_checkout_count; + self.richtext_tracker_diff_count += profile.richtext_tracker_diff_count; + self.richtext_delta_build_count += profile.richtext_delta_build_count; + self.richtext_insert_future_scan_count += profile.richtext_insert_future_scan_count; + self.richtext_insert_future_scan_visited += profile.richtext_insert_future_scan_visited; + self.richtext_insert_future_scan_max_visited = self + .richtext_insert_future_scan_max_visited + .max(profile.richtext_insert_future_scan_max_visited); + self.causal_vv_materialize_count += profile.causal_vv_materialize_count; + if profile.recording_events { + self.recording_event_samples += 1; + } + if profile.forward_diff_calculator { + self.forward_diff_calculator_samples += 1; + } + } + } + + pub fn text_checkout(c: &mut Criterion) { + let peer_count = env_usize("LORO_TEXT_CHECKOUT_PEERS", 1000).max(1); + let base_len = env_usize("LORO_TEXT_CHECKOUT_BASE_LEN", 8192).max(1); + let sequential_changes = env_usize("LORO_TEXT_CHECKOUT_CHANGES", peer_count.max(1000)); + + let mut group = c.benchmark_group("text checkout"); + group.sample_size(10); + + bench_fixture( + &mut group, + "plain/random-peer-checkout", + build_concurrent_plain(peer_count, base_len, false, false), + ); + bench_fixture( + &mut group, + "plain/same-position-peer-checkout", + build_concurrent_plain(peer_count, base_len, true, false), + ); + bench_fixture( + &mut group, + "plain/random-peer-checkout/subscribed", + build_concurrent_plain(peer_count, base_len, false, true), + ); + bench_fixture( + &mut group, + "plain/wide-causal-peer-checkout", + build_wide_causal_plain(peer_count, base_len, false), + ); + bench_fixture( + &mut group, + "rich/overlap-mark-peer-checkout", + build_concurrent_rich_marks(peer_count, base_len, false), + ); + bench_fixture( + &mut group, + "rich/overlap-mark-peer-checkout/subscribed", + build_concurrent_rich_marks(peer_count, base_len, true), + ); + bench_fixture( + &mut group, + "rich/unmark-style-peer-checkout", + build_concurrent_rich_unmarks(peer_count, base_len, false), + ); + bench_fixture( + &mut group, + "code/sequential-one-op-txn", + build_code_like_history(sequential_changes, base_len, 1, false), + ); + bench_fixture( + &mut group, + "code/sequential-eight-op-txn", + build_code_like_history((sequential_changes / 8).max(1), base_len, 8, false), + ); + bench_checkout_to_latest_fixture( + &mut group, + "code/checkout-to-latest-linear", + build_code_like_history(sequential_changes, base_len, 1, false), + ); + + group.finish(); + } + + fn bench_fixture( + group: &mut BenchmarkGroup<'_, WallTime>, + name: &str, + fixture: CheckoutFixture, + ) { + let CheckoutFixture { + doc, + frontiers, + stats, + _subscription, + } = fixture; + let mut totals = ProfileTotals::default(); + let mut rng = StdRng::seed_from_u64(0x74ea_7c0d); + let mut last_frontier_idx = usize::MAX; + + group.bench_with_input( + BenchmarkId::new(name, stats.version_count), + &frontiers, + |b, frontiers| { + b.iter_custom(|iters| { + let start = std::time::Instant::now(); + for _ in 0..iters { + let mut frontier_idx = rng.gen_range(0..frontiers.len()); + if frontiers.len() > 1 && frontier_idx == last_frontier_idx { + frontier_idx = (frontier_idx + 1) % frontiers.len(); + } + last_frontier_idx = frontier_idx; + let frontier = &frontiers[frontier_idx]; + let profile = doc.checkout_with_profile(frontier).unwrap(); + totals.add(profile); + black_box(profile); + } + + start.elapsed() + }); + }, + ); + + let state_profile = doc.text_state_profile("text"); + maybe_report_profile(name, stats, &totals, state_profile); + } + + fn bench_checkout_to_latest_fixture( + group: &mut BenchmarkGroup<'_, WallTime>, + name: &str, + fixture: CheckoutFixture, + ) { + let CheckoutFixture { + doc, + frontiers, + stats, + _subscription, + } = fixture; + let old_frontier_idx = if frontiers.len() > 2 { + frontiers.len() / 2 + } else { + 0 + }; + let old_frontier = frontiers[old_frontier_idx].clone(); + let latest_frontier = frontiers.last().unwrap().clone(); + let mut totals = ProfileTotals::default(); + + group.bench_with_input( + BenchmarkId::new(name, stats.version_count), + &latest_frontier, + |b, latest_frontier| { + b.iter_custom(|iters| { + let mut measured = Duration::ZERO; + for _ in 0..iters { + doc.checkout(&old_frontier).unwrap(); + let start = std::time::Instant::now(); + let profile = doc.checkout_with_profile(latest_frontier).unwrap(); + measured += start.elapsed(); + totals.add(profile); + black_box(profile); + } + + measured + }); + }, + ); + + let state_profile = doc.text_state_profile("text"); + maybe_report_profile(name, stats, &totals, state_profile); + } + + fn build_concurrent_plain( + peer_count: usize, + base_len: usize, + same_position: bool, + subscribed: bool, + ) -> CheckoutFixture { + let (snapshot, base_vv) = build_base_snapshot(base_len); + let doc = LoroDoc::new_auto_commit(); + doc.import(&snapshot).unwrap(); + let mut frontiers = Vec::with_capacity(peer_count + 1); + frontiers.push(doc.oplog_frontiers()); + let mut rng = StdRng::seed_from_u64(if same_position { 1 } else { 2 }); + + for peer in 0..peer_count { + let peer_doc = doc_from_snapshot(&snapshot, peer as PeerID + 2); + let text = peer_doc.get_text("text"); + let pos = if same_position { + 0 + } else { + rng.gen_range(0..=base_len) + }; + text.insert(pos, "x", PosType::Unicode).unwrap(); + peer_doc.commit_then_renew(); + let update = peer_doc.export(ExportMode::updates(&base_vv)).unwrap(); + doc.import(&update).unwrap(); + frontiers.push(doc.oplog_frontiers()); + } + + attach_subscription( + doc, + frontiers, + FixtureStats { + scenario: if same_position { + "plain same-position concurrent inserts" + } else { + "plain random concurrent inserts" + }, + peer_count, + change_count: peer_count, + base_len, + version_count: peer_count + 1, + subscribed, + }, + subscribed, + ) + } + + fn build_wide_causal_plain( + peer_count: usize, + base_len: usize, + subscribed: bool, + ) -> CheckoutFixture { + let doc = LoroDoc::new_auto_commit(); + doc.set_peer_id(1).unwrap(); + let text = doc.get_text("text"); + let base = repeated_text(base_len); + text.insert(0, &base, PosType::Unicode).unwrap(); + doc.commit_then_renew(); + let mut frontiers = Vec::with_capacity(peer_count + 1); + frontiers.push(doc.oplog_frontiers()); + let mut rng = StdRng::seed_from_u64(6); + let mut len = base_len; + + for peer in 0..peer_count { + let snapshot = doc.export(ExportMode::snapshot()).unwrap(); + let base_vv = doc.oplog_vv(); + let peer_doc = doc_from_snapshot(&snapshot, peer as PeerID + 2); + let text = peer_doc.get_text("text"); + let pos = rng.gen_range(0..=len); + text.insert(pos, "x", PosType::Unicode).unwrap(); + peer_doc.commit_then_renew(); + let update = peer_doc.export(ExportMode::updates(&base_vv)).unwrap(); + doc.import(&update).unwrap(); + len += 1; + frontiers.push(doc.oplog_frontiers()); + } + + attach_subscription( + doc, + frontiers, + FixtureStats { + scenario: "plain sequential multi-peer edits with wide causal VV", + peer_count, + change_count: peer_count, + base_len, + version_count: peer_count + 1, + subscribed, + }, + subscribed, + ) + } + + fn build_concurrent_rich_marks( + peer_count: usize, + base_len: usize, + subscribed: bool, + ) -> CheckoutFixture { + let (snapshot, base_vv) = build_base_snapshot(base_len); + let doc = LoroDoc::new_auto_commit(); + doc.import(&snapshot).unwrap(); + let mut frontiers = Vec::with_capacity(peer_count + 1); + frontiers.push(doc.oplog_frontiers()); + let mut rng = StdRng::seed_from_u64(3); + let keys = ["bold", "italic", "comment"]; + + for peer in 0..peer_count { + let peer_doc = doc_from_snapshot(&snapshot, peer as PeerID + 2); + let text = peer_doc.get_text("text"); + let start = rng.gen_range(0..base_len); + let end = (start + rng.gen_range(1..=32)).min(base_len); + text.mark( + start, + end, + keys[peer % keys.len()], + true.into(), + PosType::Unicode, + ) + .unwrap(); + peer_doc.commit_then_renew(); + let update = peer_doc.export(ExportMode::updates(&base_vv)).unwrap(); + doc.import(&update).unwrap(); + frontiers.push(doc.oplog_frontiers()); + } + + attach_subscription( + doc, + frontiers, + FixtureStats { + scenario: "rich text overlapping concurrent marks", + peer_count, + change_count: peer_count, + base_len, + version_count: peer_count + 1, + subscribed, + }, + subscribed, + ) + } + + fn build_concurrent_rich_unmarks( + peer_count: usize, + base_len: usize, + subscribed: bool, + ) -> CheckoutFixture { + let (snapshot, base_vv) = build_styled_base_snapshot(base_len); + let doc = LoroDoc::new_auto_commit(); + doc.import(&snapshot).unwrap(); + let mut frontiers = Vec::with_capacity(peer_count + 1); + frontiers.push(doc.oplog_frontiers()); + let mut rng = StdRng::seed_from_u64(5); + + for peer in 0..peer_count { + let peer_doc = doc_from_snapshot(&snapshot, peer as PeerID + 2); + let text = peer_doc.get_text("text"); + let start = rng.gen_range(0..base_len); + let end = (start + rng.gen_range(1..=32)).min(base_len).max(start + 1); + text.unmark(start, end, "bold", PosType::Unicode).unwrap(); + peer_doc.commit_then_renew(); + let update = peer_doc.export(ExportMode::updates(&base_vv)).unwrap(); + doc.import(&update).unwrap(); + frontiers.push(doc.oplog_frontiers()); + } + + attach_subscription( + doc, + frontiers, + FixtureStats { + scenario: "rich text concurrent style deletion", + peer_count, + change_count: peer_count, + base_len, + version_count: peer_count + 1, + subscribed, + }, + subscribed, + ) + } + + fn build_code_like_history( + change_count: usize, + base_len: usize, + ops_per_commit: usize, + subscribed: bool, + ) -> CheckoutFixture { + let doc = LoroDoc::new_auto_commit(); + doc.set_peer_id(1).unwrap(); + let text = doc.get_text("text"); + let base = repeated_text(base_len); + text.insert(0, &base, PosType::Unicode).unwrap(); + doc.commit_then_renew(); + let mut frontiers = Vec::with_capacity(change_count + 1); + frontiers.push(doc.oplog_frontiers()); + let mut rng = StdRng::seed_from_u64(4 + ops_per_commit as u64); + let mut len = base_len; + + for change in 0..change_count { + for op in 0..ops_per_commit { + if len > 0 && (change + op) % 5 == 0 { + let pos = rng.gen_range(0..len); + text.delete(pos, 1, PosType::Unicode).unwrap(); + len -= 1; + } else { + let token = if op % 2 == 0 { "\nlet x = 1;" } else { ";" }; + let pos = rng.gen_range(0..=len); + text.insert(pos, token, PosType::Unicode).unwrap(); + len += token.chars().count(); + } + } + doc.commit_then_renew(); + frontiers.push(doc.oplog_frontiers()); + } + + attach_subscription( + doc, + frontiers, + FixtureStats { + scenario: if ops_per_commit == 1 { + "code-like sequential one-op transactions" + } else { + "code-like sequential multi-op transactions" + }, + peer_count: 1, + change_count, + base_len, + version_count: change_count + 1, + subscribed, + }, + subscribed, + ) + } + + fn build_base_snapshot(base_len: usize) -> (Vec, loro_internal::VersionVector) { + let doc = LoroDoc::new_auto_commit(); + doc.set_peer_id(1).unwrap(); + let text = doc.get_text("text"); + let base = repeated_text(base_len); + text.insert(0, &base, PosType::Unicode).unwrap(); + doc.commit_then_renew(); + (doc.export(ExportMode::snapshot()).unwrap(), doc.oplog_vv()) + } + + fn build_styled_base_snapshot(base_len: usize) -> (Vec, loro_internal::VersionVector) { + let doc = LoroDoc::new_auto_commit(); + doc.set_peer_id(1).unwrap(); + let text = doc.get_text("text"); + let base = repeated_text(base_len); + text.insert(0, &base, PosType::Unicode).unwrap(); + text.mark(0, base_len, "bold", true.into(), PosType::Unicode) + .unwrap(); + doc.commit_then_renew(); + (doc.export(ExportMode::snapshot()).unwrap(), doc.oplog_vv()) + } + + fn doc_from_snapshot(snapshot: &[u8], peer: PeerID) -> LoroDoc { + let doc = LoroDoc::new_auto_commit(); + doc.import(snapshot).unwrap(); + doc.set_peer_id(peer).unwrap(); + doc + } + + fn attach_subscription( + doc: LoroDoc, + frontiers: Vec, + stats: FixtureStats, + subscribed: bool, + ) -> CheckoutFixture { + let subscription = subscribed.then(|| { + doc.subscribe_root(Arc::new(|event| { + black_box(event); + })) + }); + + CheckoutFixture { + doc, + frontiers, + stats, + _subscription: subscription, + } + } + + fn repeated_text(len: usize) -> String { + let mut out = String::with_capacity(len); + while out.len() < len { + out.push_str(TEXT); + } + out.truncate(len); + out + } + + fn env_usize(name: &str, default: usize) -> usize { + std::env::var(name) + .ok() + .and_then(|value| value.parse().ok()) + .unwrap_or(default) + } + + fn maybe_report_profile( + name: &str, + stats: FixtureStats, + totals: &ProfileTotals, + state_profile: Option, + ) { + if std::env::var_os("LORO_TEXT_CHECKOUT_PROFILE").is_none() || totals.samples == 0 { + return; + } + + let samples = totals.samples as u32; + let state_profile = state_profile.unwrap_or_default(); + let avg_future_scan_visited = if totals.richtext_insert_future_scan_count == 0 { + 0 + } else { + totals.richtext_insert_future_scan_visited / totals.richtext_insert_future_scan_count + }; + eprintln!( + concat!( + "[text-checkout-profile] {name}: scenario={scenario}, peers={peers}, ", + "changes={changes}, base_len={base_len}, versions={versions}, ", + "subscribed={subscribed}, samples={samples}, avg_total={avg_total:?}, ", + "avg_frontier_prepare={avg_frontier_prepare:?}, ", + "avg_frontiers_to_vv={avg_frontiers_to_vv:?}, avg_diff_calc={avg_diff_calc:?}, ", + "avg_causal_vv_materialize={avg_causal_vv_materialize:?}, ", + "causal_vv_materialize_calls={causal_vv_materialize_calls}, ", + "max_causal_vv_width={max_causal_vv_width}, ", + "avg_state_apply={avg_state_apply:?}, avg_emit_events={avg_emit_events:?}, ", + "avg_richtext_tracker_checkout={avg_richtext_tracker_checkout:?}, ", + "avg_richtext_tracker_diff={avg_richtext_tracker_diff:?}, ", + "avg_richtext_delta_build={avg_richtext_delta_build:?}, ", + "avg_richtext_insert_future_scan={avg_richtext_insert_future_scan:?}, ", + "richtext_tracker_checkout_calls={richtext_tracker_checkout_calls}, ", + "richtext_tracker_diff_calls={richtext_tracker_diff_calls}, ", + "richtext_delta_build_calls={richtext_delta_build_calls}, ", + "richtext_insert_future_scan_calls={richtext_insert_future_scan_calls}, ", + "avg_future_scan_visited={avg_future_scan_visited}, ", + "max_future_scan_visited={max_future_scan_visited}, ", + "max_frontiers_width={max_frontiers_width}, max_vv_width={max_vv_width}, ", + "max_diff_containers={max_diff_containers}, recording_event_samples={recording_event_samples}, ", + "forward_diff_calculator_samples={forward_diff_calculator_samples}, ", + "richtext_tree_nodes={richtext_tree_nodes}, richtext_chunks={richtext_chunks}, ", + "text_chunks={text_chunks}, style_anchors={style_anchors}, ", + "style_range_tree_nodes={style_range_tree_nodes}, style_range_chunks={style_range_chunks}" + ), + name = name, + scenario = stats.scenario, + peers = stats.peer_count, + changes = stats.change_count, + base_len = stats.base_len, + versions = stats.version_count, + subscribed = stats.subscribed, + samples = totals.samples, + avg_total = totals.total / samples, + avg_frontier_prepare = totals.frontier_prepare / samples, + avg_frontiers_to_vv = totals.frontiers_to_vv / samples, + avg_diff_calc = totals.diff_calc / samples, + avg_causal_vv_materialize = totals.causal_vv_materialize / samples, + causal_vv_materialize_calls = totals.causal_vv_materialize_count, + max_causal_vv_width = totals.max_causal_vv_width, + avg_state_apply = totals.state_apply / samples, + avg_emit_events = totals.emit_events / samples, + avg_richtext_tracker_checkout = totals.richtext_tracker_checkout / samples, + avg_richtext_tracker_diff = totals.richtext_tracker_diff / samples, + avg_richtext_delta_build = totals.richtext_delta_build / samples, + avg_richtext_insert_future_scan = totals.richtext_insert_future_scan / samples, + richtext_tracker_checkout_calls = totals.richtext_tracker_checkout_count, + richtext_tracker_diff_calls = totals.richtext_tracker_diff_count, + richtext_delta_build_calls = totals.richtext_delta_build_count, + richtext_insert_future_scan_calls = totals.richtext_insert_future_scan_count, + avg_future_scan_visited = avg_future_scan_visited, + max_future_scan_visited = totals.richtext_insert_future_scan_max_visited, + max_frontiers_width = totals.max_frontiers_width, + max_vv_width = totals.max_vv_width, + max_diff_containers = totals.max_diff_container_count, + recording_event_samples = totals.recording_event_samples, + forward_diff_calculator_samples = totals.forward_diff_calculator_samples, + richtext_tree_nodes = state_profile.richtext_tree_node_count, + richtext_chunks = state_profile.richtext_chunk_count, + text_chunks = state_profile.text_chunk_count, + style_anchors = state_profile.style_anchor_count, + style_range_tree_nodes = state_profile.style_range_tree_node_count, + style_range_chunks = state_profile.style_range_chunk_count, + ); + } +} + +pub fn dumb(_c: &mut Criterion) {} + +#[cfg(feature = "test_utils")] +criterion_group!(benches, text_checkout::text_checkout); +#[cfg(not(feature = "test_utils"))] +criterion_group!(benches, dumb); +criterion_main!(benches); diff --git a/crates/loro-internal/src/container/richtext/richtext_state.rs b/crates/loro-internal/src/container/richtext/richtext_state.rs index d0e198df3..550bebeaf 100644 --- a/crates/loro-internal/src/container/richtext/richtext_state.rs +++ b/crates/loro-internal/src/container/richtext/richtext_state.rs @@ -1321,6 +1321,40 @@ impl RichtextState { } } + #[cfg(feature = "test_utils")] + pub(crate) fn debug_counts(&self) -> (usize, usize, usize, usize, usize, usize) { + let mut chunk_count = 0; + let mut text_chunk_count = 0; + let mut style_anchor_count = 0; + for chunk in self.tree.iter() { + chunk_count += 1; + match chunk { + RichtextStateChunk::Text(_) => text_chunk_count += 1, + RichtextStateChunk::Style { .. } => style_anchor_count += 1, + } + } + + let style_range_tree_node_count = self + .style_ranges + .as_ref() + .map(|x| x.debug_node_len()) + .unwrap_or(0); + let style_range_chunk_count = self + .style_ranges + .as_ref() + .map(|x| x.debug_chunk_len()) + .unwrap_or(0); + + ( + self.tree.node_len(), + chunk_count, + text_chunk_count, + style_anchor_count, + style_range_tree_node_count, + style_range_chunk_count, + ) + } + pub(crate) fn get_entity_index_for_text_insert( &mut self, pos: usize, @@ -1480,6 +1514,29 @@ impl RichtextState { result } + /// Plain-text insertion path for internal diff application when no style/event data is needed. + pub(crate) fn insert_text_chunk_at_entity_index( + &mut self, + entity_index: usize, + text: TextChunk, + ) { + self.check_cache(); + { + debug_assert!(self.style_ranges.as_ref().map_or(true, |x| !x.has_style())); + let elem = RichtextStateChunk::Text(text); + self.clear_cache(); + match self.tree.query::(&entity_index) { + Some(result) => { + self.tree.insert_by_path(result.cursor, elem); + } + None => { + self.tree.push(elem); + } + } + } + self.check_cache(); + } + /// This is used to accept changes from DiffCalculator. /// /// Return (event_index, styles) @@ -2295,6 +2352,44 @@ impl RichtextState { result } + /// Plain-text deletion path for internal diff application when no style/event data is needed. + #[instrument(skip(self))] + pub(crate) fn drain_plain_text_by_entity_index(&mut self, pos: usize, len: usize) { + if len == 0 { + return; + } + + assert!( + pos + len <= self.len_entity(), + "pos: {}, len: {}, self.len(): {}", + pos, + len, + &self.len_entity(), + ); + debug_assert!(self.style_ranges.as_ref().map_or(true, |x| !x.has_style())); + + self.clear_cache(); + let range = pos..pos + len; + let start = self.tree.query::(&range.start); + let start_cursor = start.unwrap().cursor(); + let elem = self.tree.get_elem(start_cursor.leaf).unwrap(); + if elem.rle_len() >= start_cursor.offset + len { + self.tree.update_leaf(start_cursor.leaf, |elem| match elem { + RichtextStateChunk::Text(text) => { + let (next, _) = text.delete_by_entity_index(start_cursor.offset, len); + (true, next.map(RichtextStateChunk::Text), None) + } + RichtextStateChunk::Style { .. } => { + *elem = RichtextStateChunk::Text(TextChunk::new_empty()); + (true, None, None) + } + }); + } else { + let end = self.tree.query::(&range.end); + for _ in generic_btree::iter::Drain::new(&mut self.tree, start, end) {} + } + } + pub fn entity_index_to_event_index(&self, index: usize) -> usize { if index == 0 { // the tree maybe empty diff --git a/crates/loro-internal/src/container/richtext/style_range_map.rs b/crates/loro-internal/src/container/richtext/style_range_map.rs index f7487d4db..69b572b23 100644 --- a/crates/loro-internal/src/container/richtext/style_range_map.rs +++ b/crates/loro-internal/src/container/richtext/style_range_map.rs @@ -134,6 +134,16 @@ impl StyleRangeMap { } } + #[cfg(feature = "test_utils")] + pub(super) fn debug_node_len(&self) -> usize { + self.tree.node_len() + } + + #[cfg(feature = "test_utils")] + pub(super) fn debug_chunk_len(&self) -> usize { + self.tree.iter().count() + } + pub fn annotate( &mut self, range: Range, diff --git a/crates/loro-internal/src/container/richtext/tracker.rs b/crates/loro-internal/src/container/richtext/tracker.rs index 70cab4f18..0358121f9 100644 --- a/crates/loro-internal/src/container/richtext/tracker.rs +++ b/crates/loro-internal/src/container/richtext/tracker.rs @@ -6,9 +6,10 @@ use generic_btree::{ }; use loro_common::{Counter, HasId, HasIdSpan, IdFull, IdSpan, Lamport, PeerID, ID}; use rle::HasLength as _; +use smallvec::SmallVec; use tracing::instrument; -use crate::{cursor::AbsolutePosition, VersionVector}; +use crate::{cursor::AbsolutePosition, version::CausalVersion, VersionVector}; use self::{crdt_rope::CrdtRope, id_to_cursor::IdToCursor}; @@ -25,6 +26,7 @@ pub(crate) use crdt_rope::CrdtRopeDelta; pub(crate) struct Tracker { applied_vv: VersionVector, current_vv: VersionVector, + current_frontier_hint: Option, rope: CrdtRope, id_to_cursor: IdToCursor, } @@ -43,6 +45,7 @@ impl Tracker { id_to_cursor: IdToCursor::default(), applied_vv: Default::default(), current_vv: Default::default(), + current_frontier_hint: None, }; let result = this.rope.tree.push(FugueSpan { @@ -68,6 +71,7 @@ impl Tracker { id_to_cursor: IdToCursor::default(), applied_vv: Default::default(), current_vv: Default::default(), + current_frontier_hint: None, } } @@ -137,6 +141,7 @@ impl Tracker { let end_id = op_id.inc(content.len() as Counter); self.current_vv.extend_to_include_end_id(end_id.id()); self.applied_vv.extend_to_include_end_id(end_id.id()); + self.current_frontier_hint = Some(ID::new(end_id.peer, end_id.counter - 1)); } fn update_insert_by_split(&mut self, split: &[LeafIndex]) { @@ -229,6 +234,7 @@ impl Tracker { let end_id = op_id.inc(len as Counter); self.current_vv.extend_to_include_end_id(end_id); self.applied_vv.extend_to_include_end_id(end_id); + self.current_frontier_hint = Some(ID::new(end_id.peer, end_id.counter - 1)); } fn skip_applied( @@ -324,6 +330,7 @@ impl Tracker { let end_id = op_id.inc(1); self.current_vv.extend_to_include_end_id(end_id.id()); self.applied_vv.extend_to_include_end_id(end_id.id()); + self.current_frontier_hint = Some(end_id.id().inc(-1)); } #[inline] @@ -331,16 +338,81 @@ impl Tracker { self._checkout(vv, false); } + #[inline] + pub(crate) fn checkout_causal(&mut self, vv: CausalVersion<'_>) { + self._checkout_causal(vv, false); + } + fn _checkout(&mut self, vv: &VersionVector, on_diff_status: bool) { // tracing::info!("Checkout to {:?} from {:?}", vv, self.current_vv); + let current_vv = std::mem::take(&mut self.current_vv); + let retreat: SmallVec<[IdSpan; 4]> = current_vv.sub_iter(vv).collect(); + let forward: SmallVec<[IdSpan; 4]> = vv.sub_iter(¤t_vv).collect(); + self._checkout_spans(current_vv, retreat, forward, on_diff_status, None); + } + + fn _checkout_causal(&mut self, vv: CausalVersion<'_>, on_diff_status: bool) { + if !on_diff_status + && vv + .single_frontier() + .is_some_and(|frontier| self.current_frontier_hint == Some(frontier)) + { + return; + } + + let current_vv = std::mem::take(&mut self.current_vv); + let mut retreat: SmallVec<[IdSpan; 4]> = SmallVec::new(); + for (&peer, &counter) in current_vv.iter() { + let target_end = vv.end_for_peer(peer); + if counter > target_end { + retreat.push(IdSpan::new(peer, target_end, counter)); + } + } + + let mut forward: SmallVec<[IdSpan; 4]> = SmallVec::new(); + for (&peer, &base_end) in vv.base().iter() { + let target_end = if peer == vv.peer() { + base_end.max(vv.peer_end()) + } else { + base_end + }; + let current_end = current_vv.get(&peer).copied().unwrap_or(0); + if target_end > current_end { + forward.push(IdSpan::new(peer, current_end, target_end)); + } + } + + if !vv.base().contains_key(&vv.peer()) { + let target_end = vv.peer_end(); + let current_end = current_vv.get(&vv.peer()).copied().unwrap_or(0); + if target_end > current_end { + forward.push(IdSpan::new(vv.peer(), current_end, target_end)); + } + } + + self._checkout_spans( + current_vv, + retreat, + forward, + on_diff_status, + vv.single_frontier(), + ); + } + + fn _checkout_spans( + &mut self, + mut current_vv: VersionVector, + retreat: SmallVec<[IdSpan; 4]>, + forward: SmallVec<[IdSpan; 4]>, + on_diff_status: bool, + frontier_hint: Option, + ) { if on_diff_status { self.rope.clear_diff_status(); } - let current_vv = std::mem::take(&mut self.current_vv); - let (retreat, forward) = current_vv.diff_iter(vv); let mut updates = Vec::new(); - for span in retreat { + for &span in &retreat { for c in self.id_to_cursor.iter(span) { match c { id_to_cursor::IterCursor::Insert { leaf, id_span } => { @@ -427,12 +499,19 @@ impl Tracker { } } - for span in forward { + for &span in &forward { self.forward(span, &mut updates); } if !on_diff_status { - self.current_vv = vv.clone(); + for span in retreat { + current_vv.set_end(ID::new(span.peer, span.counter.start)); + } + for span in forward { + current_vv.set_end(ID::new(span.peer, span.counter.end)); + } + self.current_vv = current_vv; + self.current_frontier_hint = frontier_hint; } else { self.current_vv = current_vv; } diff --git a/crates/loro-internal/src/container/richtext/tracker/crdt_rope.rs b/crates/loro-internal/src/container/richtext/tracker/crdt_rope.rs index 6c3c901c5..27f5340c2 100644 --- a/crates/loro-internal/src/container/richtext/tracker/crdt_rope.rs +++ b/crates/loro-internal/src/container/richtext/tracker/crdt_rope.rs @@ -136,84 +136,111 @@ impl CrdtRope { let mut insert_pos = start.cursor; if !in_between.is_empty() { + #[cfg(feature = "test_utils")] + let future_scan_start = std::time::Instant::now(); + #[cfg(feature = "test_utils")] + let future_scan_visited = in_between.len(); // find insert pos - let mut scanning = false; - let mut visited: SmallVec<[IdSpan; 4]> = Default::default(); - for (other_leaf, other_elem) in in_between.iter() { - // tracing::info!("Visiting {}", &other_elem.id); - let other_origin_left = other_elem.origin_left; - if other_origin_left != content.origin_left - && other_origin_left - .map(|left| visited.iter().all(|x| !x.contains_id(left.to_id()))) - .unwrap_or(true) - { - // The other_elem's origin_left must be at the left side of content's origin_left. - // So the content must be at the left side of other_elem. - - // tracing::info!("Break because the node's origin_left is at the left side of new_elem's origin left"); - break; + if in_between.iter().all(|(_, other_elem)| { + other_elem.origin_left == content.origin_left + && other_elem.origin_right == content.origin_right + }) { + debug_assert!(in_between + .windows(2) + .all(|window| window[0].1.id.peer <= window[1].1.id.peer)); + let insert_index = in_between + .partition_point(|(_, other_elem)| other_elem.id.peer <= content.id.peer); + if insert_index > 0 { + let (other_leaf, other_elem) = in_between[insert_index - 1]; + insert_pos = Cursor { + leaf: other_leaf, + offset: other_elem.rle_len(), + }; } + } else { + let mut scanning = false; + let mut visited: SmallVec<[IdSpan; 4]> = Default::default(); + for (other_leaf, other_elem) in in_between.iter() { + // tracing::info!("Visiting {}", &other_elem.id); + let other_origin_left = other_elem.origin_left; + if other_origin_left != content.origin_left + && other_origin_left + .map(|left| visited.iter().all(|x| !x.contains_id(left.to_id()))) + .unwrap_or(true) + { + // The other_elem's origin_left must be at the left side of content's origin_left. + // So the content must be at the left side of other_elem. + + // tracing::info!("Break because the node's origin_left is at the left side of new_elem's origin left"); + break; + } - visited.push(IdSpan::new( - other_elem.id.peer, - other_elem.id.counter, - other_elem.id.counter + other_elem.rle_len() as Counter, - )); - - if content.origin_left == other_origin_left { - if other_elem.origin_right == content.origin_right { - // tracing::info!("Same right parent"); - // Same right parent - if other_elem.id.peer > content.id.peer { - // tracing::info!("Break on larger peer"); - break; + visited.push(IdSpan::new( + other_elem.id.peer, + other_elem.id.counter, + other_elem.id.counter + other_elem.rle_len() as Counter, + )); + + if content.origin_left == other_origin_left { + if other_elem.origin_right == content.origin_right { + // tracing::info!("Same right parent"); + // Same right parent + if other_elem.id.peer > content.id.peer { + // tracing::info!("Break on larger peer"); + break; + } else { + scanning = false; + } } else { - scanning = false; - } - } else { - // tracing::info!("Different right parent"); - // Different right parent, we need to compare the right parents' position - - let other_parent_right_idx = - if let Some(other_origin_right) = other_elem.origin_right { - let elem_idx = find_elem(other_origin_right.to_id()); - let elem = self.tree.get_elem(elem_idx).unwrap(); - // It must be the start of the elem - assert_eq!(elem.id.id(), other_origin_right.to_id()); - if elem.origin_left == content.origin_left { - Some(elem_idx) + // tracing::info!("Different right parent"); + // Different right parent, we need to compare the right parents' position + + let other_parent_right_idx = + if let Some(other_origin_right) = other_elem.origin_right { + let elem_idx = find_elem(other_origin_right.to_id()); + let elem = self.tree.get_elem(elem_idx).unwrap(); + // It must be the start of the elem + assert_eq!(elem.id.id(), other_origin_right.to_id()); + if elem.origin_left == content.origin_left { + Some(elem_idx) + } else { + None + } } else { None - } - } else { - None - }; + }; - match self.cmp_pos(other_parent_right_idx, parent_right_leaf) { - Ordering::Less => { - // tracing::info!("Less"); - scanning = true; - } - Ordering::Equal if other_elem.id.peer > content.id.peer => { - // tracing::info!("Break on eq"); - break; - } - _ => { - // tracing::info!("Scanning"); - scanning = false; + match self.cmp_pos(other_parent_right_idx, parent_right_leaf) { + Ordering::Less => { + // tracing::info!("Less"); + scanning = true; + } + Ordering::Equal if other_elem.id.peer > content.id.peer => { + // tracing::info!("Break on eq"); + break; + } + _ => { + // tracing::info!("Scanning"); + scanning = false; + } } } } - } - if !scanning { - insert_pos = Cursor { - leaf: *other_leaf, - offset: other_elem.rle_len(), - }; - // tracing::info!("updating insert pos {:?}", &insert_pos); + if !scanning { + insert_pos = Cursor { + leaf: *other_leaf, + offset: other_elem.rle_len(), + }; + // tracing::info!("updating insert pos {:?}", &insert_pos); + } } } + #[cfg(feature = "test_utils")] + crate::diff_calc::profiling::record_richtext_insert_future_scan( + future_scan_start.elapsed(), + future_scan_visited, + ); } // tracing::info!("Inserting at {:?}", insert_pos); @@ -717,6 +744,38 @@ mod test { span } + fn leaf_of(rope: &CrdtRope, id: ID) -> LeafIndex { + for iter in rope.tree.iter_range(..) { + if iter.elem.id_span().contains_id(id) { + return iter.cursor().leaf; + } + } + + panic!("cannot find leaf for {id:?}") + } + + fn leaf_lookup(rope: &CrdtRope) -> Vec<(IdSpan, LeafIndex)> { + rope.tree + .iter_range(..) + .map(|iter| (iter.elem.id_span(), iter.cursor().leaf)) + .collect() + } + + fn lookup_leaf(lookup: &[(IdSpan, LeafIndex)], id: ID) -> LeafIndex { + lookup + .iter() + .find_map(|(span, leaf)| span.contains_id(id).then_some(*leaf)) + .unwrap_or_else(|| panic!("cannot find leaf for {id:?}")) + } + + fn future_peers(rope: &CrdtRope) -> Vec { + rope.tree + .iter() + .filter(|span| span.status.future) + .map(|span| span.id.peer) + .collect() + } + #[test] fn len_test() { let mut rope = CrdtRope::new(); @@ -852,6 +911,75 @@ mod test { assert_eq!(split.len(), 0); } + #[test] + fn same_parent_future_spans_keep_peer_order() { + let mut rope = CrdtRope::new(); + rope.insert(0, span(0, 0..10), |_| panic!()); + for peer in [5, 3, 7, 4] { + rope.insert(5, future_span(peer, peer * 10..peer * 10 + 1), |_| panic!()); + } + + assert_eq!(future_peers(&rope), vec![3, 4, 5, 7]); + for span in rope.tree.iter().filter(|span| span.status.future) { + assert_eq!(span.origin_left, Some(CompactId::new(0, 4))); + assert_eq!(span.origin_right, Some(CompactId::new(0, 5))); + } + } + + #[test] + fn same_parent_future_spans_keep_order_after_retreat_forward() { + let mut rope = CrdtRope::new(); + rope.insert(0, span(0, 0..10), |_| panic!()); + rope.insert(5, future_span(5, 50..51), |_| panic!()); + rope.insert(5, future_span(3, 30..31), |_| panic!()); + assert_eq!(future_peers(&rope), vec![3, 5]); + + let leaf = leaf_of(&rope, ID::new(3, 0)); + rope.update( + vec![LeafUpdate { + leaf, + id_span: IdSpan::new(3, 0, 1), + set_future: Some(false), + delete_times_diff: 0, + }], + false, + ); + assert_eq!(future_peers(&rope), vec![5]); + + let leaf = leaf_of(&rope, ID::new(3, 0)); + rope.update( + vec![LeafUpdate { + leaf, + id_span: IdSpan::new(3, 0, 1), + set_future: Some(true), + delete_times_diff: 0, + }], + false, + ); + rope.insert(5, future_span(4, 40..41), |_| panic!()); + + assert_eq!(future_peers(&rope), vec![3, 4, 5]); + } + + #[test] + fn mixed_right_parent_future_spans_fall_back_to_general_ordering() { + let mut rope = CrdtRope::new(); + rope.insert(0, span(0, 0..10), |_| panic!()); + rope.insert(5, future_span(2, 20..21), |_| panic!()); + rope.insert(5, span(9, 90..91), |_| panic!()); + + let lookup = leaf_lookup(&rope); + let inserted = rope.insert(5, future_span(3, 30..31), |id| lookup_leaf(&lookup, id)); + assert_eq!(inserted.content.origin_left, Some(CompactId::new(0, 4))); + assert_eq!(inserted.content.origin_right, Some(CompactId::new(9, 0))); + assert_eq!(future_peers(&rope), vec![2, 3]); + + rope.delete(ID::new(10, 0), 5, 1, false, &mut |_| {}); + let lookup = leaf_lookup(&rope); + rope.insert(5, future_span(4, 40..41), |id| lookup_leaf(&lookup, id)); + assert_eq!(future_peers(&rope), vec![2, 3, 4]); + } + #[test] fn checkout() { let mut rope = CrdtRope::new(); diff --git a/crates/loro-internal/src/diff_calc.rs b/crates/loro-internal/src/diff_calc.rs index 71bb4c2c5..2337e91da 100644 --- a/crates/loro-internal/src/diff_calc.rs +++ b/crates/loro-internal/src/diff_calc.rs @@ -36,7 +36,7 @@ use crate::{ event::{DiffVariant, InternalDiff}, op::{InnerContent, RichOp, SliceRange, SliceWithId}, span::{HasId, HasLamport}, - version::Frontiers, + version::{CausalVersion, Frontiers}, InternalString, VersionVector, }; @@ -46,6 +46,91 @@ use self::unknown::UnknownDiffCalculator; use super::{event::InternalContainerDiff, oplog::OpLog}; +#[cfg(feature = "test_utils")] +pub(crate) mod profiling { + use std::{cell::RefCell, time::Duration}; + + #[derive(Debug, Clone, Copy, Default)] + pub(crate) struct DiffCalcProfile { + pub richtext_tracker_checkout: Duration, + pub richtext_tracker_diff: Duration, + pub richtext_delta_build: Duration, + pub richtext_insert_future_scan: Duration, + pub causal_vv_materialize: Duration, + pub richtext_tracker_checkout_count: u64, + pub richtext_tracker_diff_count: u64, + pub richtext_delta_build_count: u64, + pub richtext_insert_future_scan_count: u64, + pub richtext_insert_future_scan_visited: u64, + pub richtext_insert_future_scan_max_visited: usize, + pub causal_vv_materialize_count: u64, + pub max_causal_vv_width: usize, + } + + thread_local! { + static PROFILE: RefCell> = RefCell::new(None); + } + + pub(crate) fn begin() { + PROFILE.with(|profile| { + *profile.borrow_mut() = Some(DiffCalcProfile::default()); + }); + } + + pub(crate) fn finish() -> DiffCalcProfile { + PROFILE.with(|profile| profile.borrow_mut().take().unwrap_or_default()) + } + + pub(crate) fn record_richtext_tracker_checkout(duration: Duration) { + PROFILE.with(|profile| { + if let Some(profile) = profile.borrow_mut().as_mut() { + profile.richtext_tracker_checkout += duration; + profile.richtext_tracker_checkout_count += 1; + } + }); + } + + pub(crate) fn record_richtext_tracker_diff(duration: Duration) { + PROFILE.with(|profile| { + if let Some(profile) = profile.borrow_mut().as_mut() { + profile.richtext_tracker_diff += duration; + profile.richtext_tracker_diff_count += 1; + } + }); + } + + pub(crate) fn record_richtext_delta_build(duration: Duration) { + PROFILE.with(|profile| { + if let Some(profile) = profile.borrow_mut().as_mut() { + profile.richtext_delta_build += duration; + profile.richtext_delta_build_count += 1; + } + }); + } + + pub(crate) fn record_richtext_insert_future_scan(duration: Duration, visited: usize) { + PROFILE.with(|profile| { + if let Some(profile) = profile.borrow_mut().as_mut() { + profile.richtext_insert_future_scan += duration; + profile.richtext_insert_future_scan_count += 1; + profile.richtext_insert_future_scan_visited += visited as u64; + profile.richtext_insert_future_scan_max_visited = + profile.richtext_insert_future_scan_max_visited.max(visited); + } + }); + } + + pub(crate) fn record_causal_vv_materialize(duration: Duration, width: usize) { + PROFILE.with(|profile| { + if let Some(profile) = profile.borrow_mut().as_mut() { + profile.causal_vv_materialize += duration; + profile.causal_vv_materialize_count += 1; + profile.max_causal_vv_width = profile.max_causal_vv_width.max(width); + } + }); + } +} + /// Calculate the diff between two versions. given [OpLog][super::oplog::OpLog] /// and [AppState][super::state::AppState]. /// @@ -172,7 +257,7 @@ impl DiffCalculator { let affected_set = { loro_common::debug!("LCA: {:?} mode={:?}", &lca, diff_mode); let mut started_set = FxHashSet::default(); - for (change, (start_counter, end_counter), vv) in iter { + for (change, (start_counter, end_counter), base_vv, base_frontiers) in iter { let iter_start = change .ops .binary_search_by(|op| op.ctr_last().cmp(&start_counter)) @@ -205,8 +290,14 @@ impl DiffCalculator { op = stack_sliced_op.as_ref().unwrap(); } - let vv = &mut vv.borrow_mut(); - vv.extend_to_include_end_id(ID::new(change.peer(), op.counter)); + let base_peer_end = base_vv.get(&change.peer()).copied().unwrap_or(0); + let single_frontier = if op.counter > base_peer_end { + Some(ID::new(change.peer(), op.counter - 1)) + } else { + base_frontiers.as_single() + }; + let causal_vv = + CausalVersion::new(&base_vv, change.peer(), op.counter, single_frontier); let container = op.container; let depth = oplog.arena.get_depth(container); let (old_depth, calculator) = self.get_or_create_calc(container, depth); @@ -228,7 +319,7 @@ impl DiffCalculator { calculator.apply_change( oplog, RichOp::new_by_change(&change, op), - Some(vv), + Some(causal_vv), ); visited.insert(container); } @@ -389,12 +480,7 @@ impl DiffCalculator { #[enum_dispatch] pub(crate) trait DiffCalculatorTrait { fn start_tracking(&mut self, oplog: &OpLog, vv: &crate::VersionVector, mode: DiffMode); - fn apply_change( - &mut self, - oplog: &OpLog, - op: crate::op::RichOp, - vv: Option<&crate::VersionVector>, - ); + fn apply_change(&mut self, oplog: &OpLog, op: crate::op::RichOp, vv: Option>); fn calculate_diff( &mut self, idx: ContainerIdx, @@ -451,7 +537,7 @@ impl DiffCalculatorTrait for MapDiffCalculator { &mut self, _oplog: &crate::OpLog, op: crate::op::RichOp, - _vv: Option<&crate::VersionVector>, + _vv: Option>, ) { if matches!(self.current_mode, DiffMode::Checkout) { // We need to use history cache anyway @@ -594,10 +680,10 @@ impl DiffCalculatorTrait for ListDiffCalculator { &mut self, _oplog: &OpLog, op: crate::op::RichOp, - vv: Option<&crate::VersionVector>, + vv: Option>, ) { if let Some(vv) = vv { - self.tracker.checkout(vv); + self.tracker.checkout_causal(vv); } match &op.op().content { @@ -795,6 +881,30 @@ impl RichtextDiffCalculator { } } +#[cfg(feature = "test_utils")] +fn richtext_tracker_checkout(tracker: &mut RichtextTracker, vv: &VersionVector) { + let start = std::time::Instant::now(); + tracker.checkout(vv); + profiling::record_richtext_tracker_checkout(start.elapsed()); +} + +#[cfg(feature = "test_utils")] +fn richtext_tracker_checkout_causal(tracker: &mut RichtextTracker, vv: CausalVersion<'_>) { + let start = std::time::Instant::now(); + tracker.checkout_causal(vv); + profiling::record_richtext_tracker_checkout(start.elapsed()); +} + +#[cfg(not(feature = "test_utils"))] +fn richtext_tracker_checkout(tracker: &mut RichtextTracker, vv: &VersionVector) { + tracker.checkout(vv); +} + +#[cfg(not(feature = "test_utils"))] +fn richtext_tracker_checkout_causal(tracker: &mut RichtextTracker, vv: CausalVersion<'_>) { + tracker.checkout_causal(vv); +} + impl DiffCalculatorTrait for RichtextDiffCalculator { fn start_tracking( &mut self, @@ -828,7 +938,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { *start_vv = vv.clone(); } - tracker.checkout(vv); + richtext_tracker_checkout(tracker, vv); } RichtextCalcMode::Linear { .. } => {} } @@ -838,7 +948,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { &mut self, oplog: &super::oplog::OpLog, op: crate::op::RichOp, - vv: Option<&crate::VersionVector>, + vv: Option>, ) { match &mut *self.mode { RichtextCalcMode::Linear { @@ -941,7 +1051,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { start_vv: _, } => { if let Some(vv) = vv { - tracker.checkout(vv); + richtext_tracker_checkout_causal(tracker, vv); } match &op.raw_op().content { crate::op::InnerContent::List(l) => match l { @@ -1078,7 +1188,14 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { tracker, styles, .. } => { let mut delta = DeltaRope::new(); - for item in tracker.diff(info.from_vv, info.to_vv) { + #[cfg(feature = "test_utils")] + let tracker_diff_start = std::time::Instant::now(); + let diff_iter = tracker.diff(info.from_vv, info.to_vv); + #[cfg(feature = "test_utils")] + profiling::record_richtext_tracker_diff(tracker_diff_start.elapsed()); + #[cfg(feature = "test_utils")] + let delta_build_start = std::time::Instant::now(); + for item in diff_iter { match item { CrdtRopeDelta::Retain(len) => { delta.push_retain(len, ()); @@ -1167,6 +1284,8 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { } } } + #[cfg(feature = "test_utils")] + profiling::record_richtext_delta_build(delta_build_start.elapsed()); (InternalDiff::RichtextRaw(delta), DiffMode::Checkout) } @@ -1215,7 +1334,7 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { &mut self, oplog: &OpLog, op: crate::op::RichOp, - vv: Option<&crate::VersionVector>, + vv: Option>, ) { let InnerContent::List(l) = &op.raw_op().content else { unreachable!() @@ -1301,7 +1420,7 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { // Apply change on the list items let this = &mut self.list; if let Some(vv) = vv { - this.tracker.checkout(vv); + this.tracker.checkout_causal(vv); } let real_op = op.op(); diff --git a/crates/loro-internal/src/diff_calc/counter.rs b/crates/loro-internal/src/diff_calc/counter.rs index bf0663b54..9c1627c91 100644 --- a/crates/loro-internal/src/diff_calc/counter.rs +++ b/crates/loro-internal/src/diff_calc/counter.rs @@ -2,7 +2,7 @@ use std::collections::BTreeMap; use loro_common::{ContainerID, ID}; -use crate::{container::idx::ContainerIdx, event::InternalDiff, OpLog}; +use crate::{container::idx::ContainerIdx, event::InternalDiff, version::CausalVersion, OpLog}; use super::{DiffCalcVersionInfo, DiffCalculatorTrait, DiffMode}; @@ -26,7 +26,7 @@ impl DiffCalculatorTrait for CounterDiffCalculator { &mut self, _oplog: &OpLog, op: crate::op::RichOp, - _vv: Option<&crate::VersionVector>, + _vv: Option>, ) { let id = op.id(); self.ops.insert( diff --git a/crates/loro-internal/src/diff_calc/tree.rs b/crates/loro-internal/src/diff_calc/tree.rs index 5dc93eb52..b5523613c 100644 --- a/crates/loro-internal/src/diff_calc/tree.rs +++ b/crates/loro-internal/src/diff_calc/tree.rs @@ -11,7 +11,7 @@ use crate::{ delta::{TreeDelta, TreeDeltaItem, TreeInternalDiff}, event::InternalDiff, state::TreeParentId, - version::Frontiers, + version::{CausalVersion, Frontiers}, OpLog, VersionVector, }; @@ -52,7 +52,7 @@ impl DiffCalculatorTrait for TreeDiffCalculator { &mut self, _oplog: &OpLog, op: crate::op::RichOp, - _vv: Option<&crate::VersionVector>, + _vv: Option>, ) { match &mut self.mode { TreeDiffCalculatorMode::Crdt => {} diff --git a/crates/loro-internal/src/diff_calc/unknown.rs b/crates/loro-internal/src/diff_calc/unknown.rs index 25a6fbff3..0532b4caa 100644 --- a/crates/loro-internal/src/diff_calc/unknown.rs +++ b/crates/loro-internal/src/diff_calc/unknown.rs @@ -1,6 +1,6 @@ use loro_common::ContainerID; -use crate::{container::idx::ContainerIdx, event::InternalDiff, OpLog}; +use crate::{container::idx::ContainerIdx, event::InternalDiff, version::CausalVersion, OpLog}; use super::{DiffCalcVersionInfo, DiffCalculatorTrait, DiffMode}; @@ -14,7 +14,7 @@ impl DiffCalculatorTrait for UnknownDiffCalculator { &mut self, _oplog: &OpLog, _op: crate::op::RichOp, - _vv: Option<&crate::VersionVector>, + _vv: Option>, ) { } diff --git a/crates/loro-internal/src/loro.rs b/crates/loro-internal/src/loro.rs index 2da16a37a..9e4e88b79 100644 --- a/crates/loro-internal/src/loro.rs +++ b/crates/loro-internal/src/loro.rs @@ -61,6 +61,48 @@ use std::{ }; use tracing::{debug_span, info_span, instrument, warn}; +#[cfg(feature = "test_utils")] +#[derive(Debug, Clone, Copy, Default)] +pub struct CheckoutProfile { + pub total: std::time::Duration, + pub frontier_prepare: std::time::Duration, + pub frontiers_to_vv: std::time::Duration, + pub diff_calc: std::time::Duration, + pub state_apply: std::time::Duration, + pub emit_events: std::time::Duration, + pub richtext_tracker_checkout: std::time::Duration, + pub richtext_tracker_diff: std::time::Duration, + pub richtext_delta_build: std::time::Duration, + pub richtext_insert_future_scan: std::time::Duration, + pub causal_vv_materialize: std::time::Duration, + pub diff_container_count: usize, + pub from_frontiers_len: usize, + pub to_frontiers_len: usize, + pub from_vv_len: usize, + pub to_vv_len: usize, + pub richtext_tracker_checkout_count: u64, + pub richtext_tracker_diff_count: u64, + pub richtext_delta_build_count: u64, + pub richtext_insert_future_scan_count: u64, + pub richtext_insert_future_scan_visited: u64, + pub richtext_insert_future_scan_max_visited: usize, + pub causal_vv_materialize_count: u64, + pub max_causal_vv_width: usize, + pub recording_events: bool, + pub forward_diff_calculator: bool, +} + +#[cfg(feature = "test_utils")] +#[derive(Debug, Clone, Copy, Default)] +pub struct TextStateProfile { + pub richtext_tree_node_count: usize, + pub richtext_chunk_count: usize, + pub text_chunk_count: usize, + pub style_anchor_count: usize, + pub style_range_tree_node_count: usize, + pub style_range_chunk_count: usize, +} + impl Default for LoroDoc { fn default() -> Self { Self::new() @@ -1440,6 +1482,66 @@ impl LoroDoc { result } + #[cfg(feature = "test_utils")] + pub fn checkout_with_profile(&self, frontiers: &Frontiers) -> LoroResult { + let total_start = std::time::Instant::now(); + let was_detached = self.is_detached(); + let (options, guard) = self.implicit_commit_then_stop(); + let mut result = self._checkout_without_emitting_profile(frontiers, true, true); + if let Ok(profile) = result.as_mut() { + let emit_start = std::time::Instant::now(); + self.emit_events(); + profile.emit_events = emit_start.elapsed(); + } + drop(guard); + if self.config.detached_editing() { + if result.is_ok() { + self.renew_peer_id(); + } + self.renew_txn_if_auto_commit(options); + } else if result.is_err() { + if !was_detached { + self.renew_txn_if_auto_commit(options); + } + } else if !self.is_detached() { + self.renew_txn_if_auto_commit(options); + } + + if let Ok(profile) = result.as_mut() { + profile.total = total_start.elapsed(); + } + + result + } + + #[cfg(feature = "test_utils")] + pub fn text_state_profile(&self, name: &str) -> Option { + let id = ContainerID::new_root(name, ContainerType::Text); + let idx = self.arena.id_to_idx(&id)?; + let mut state = self.state.lock(); + let ( + richtext_tree_node_count, + richtext_chunk_count, + text_chunk_count, + style_anchor_count, + style_range_tree_node_count, + style_range_chunk_count, + ) = state.with_state_mut(idx, |state| { + state + .as_richtext_state_mut() + .map(|state| state.debug_counts()) + })?; + + Some(TextStateProfile { + richtext_tree_node_count, + richtext_chunk_count, + text_chunk_count, + style_anchor_count, + style_range_tree_node_count, + style_range_chunk_count, + }) + } + /// NOTE: The caller of this method should ensure the txn is locked and set to None #[instrument(level = "info", skip(self))] pub(crate) fn _checkout_without_emitting( @@ -1483,7 +1585,6 @@ impl LoroDoc { } let mut state = self.state.lock(); - let mut calc = self.diff_calculator.lock(); for i in frontiers.iter() { if !oplog.dag.contains(i) { return Err(LoroError::FrontiersNotFound(i)); @@ -1506,8 +1607,14 @@ impl LoroDoc { }; self.set_detached(true); - let (diff, diff_mode) = - calc.calc_diff_internal(&oplog, &before, &state.frontiers, after, &frontiers, None); + let use_forward_diff_calculator = should_use_forward_diff_calculator(&before, after); + let (diff, diff_mode) = if use_forward_diff_calculator { + let mut calc = DiffCalculator::new(false); + calc.calc_diff_internal(&oplog, &before, &state.frontiers, after, &frontiers, None) + } else { + let mut calc = self.diff_calculator.lock(); + calc.calc_diff_internal(&oplog, &before, &state.frontiers, after, &frontiers, None) + }; state.apply_diff( InternalDocDiff { origin: "checkout".into(), @@ -1521,6 +1628,127 @@ impl LoroDoc { Ok(()) } + #[cfg(feature = "test_utils")] + fn _checkout_without_emitting_profile( + &self, + frontiers: &Frontiers, + to_shrink_frontiers: bool, + _to_commit_then_renew: bool, + ) -> Result { + let mut profile = CheckoutProfile::default(); + let prepare_start = std::time::Instant::now(); + if !self.txn.is_locked() { + return Err(LoroError::TransactionError( + "checkout requires the transaction mutex to be held" + .to_string() + .into_boxed_str(), + )); + } + let from_frontiers = self.state_frontiers(); + profile.from_frontiers_len = from_frontiers.len(); + profile.to_frontiers_len = frontiers.len(); + loro_common::info!( + "checkout from={:?} to={:?} cur_vv={:?}", + from_frontiers, + frontiers, + self.oplog_vv() + ); + + if &from_frontiers == frontiers { + profile.frontier_prepare = prepare_start.elapsed(); + return Ok(profile); + } + + let oplog = self.oplog.lock(); + if oplog.dag.is_before_shallow_root(frontiers) { + return Err(LoroError::SwitchToVersionBeforeShallowRoot); + } + + let frontiers = if to_shrink_frontiers { + shrink_frontiers(frontiers, &oplog.dag).map_err(LoroError::FrontiersNotFound)? + } else { + frontiers.clone() + }; + profile.to_frontiers_len = frontiers.len(); + + if from_frontiers == frontiers { + profile.frontier_prepare = prepare_start.elapsed(); + return Ok(profile); + } + + let mut state = self.state.lock(); + for i in frontiers.iter() { + if !oplog.dag.contains(i) { + return Err(LoroError::FrontiersNotFound(i)); + } + } + profile.frontier_prepare = prepare_start.elapsed(); + + let vv_start = std::time::Instant::now(); + let before = oplog.dag.frontiers_to_vv(&state.frontiers).ok_or_else(|| { + LoroError::NotFoundError( + format!( + "Cannot find the current state version {:?}", + state.frontiers + ) + .into_boxed_str(), + ) + })?; + let Some(after) = &oplog.dag.frontiers_to_vv(&frontiers) else { + return Err(LoroError::NotFoundError( + format!("Cannot find the specified version {:?}", frontiers).into_boxed_str(), + )); + }; + profile.frontiers_to_vv = vv_start.elapsed(); + profile.from_vv_len = before.len(); + profile.to_vv_len = after.len(); + profile.recording_events = state.is_recording(); + + self.set_detached(true); + let diff_start = std::time::Instant::now(); + crate::diff_calc::profiling::begin(); + profile.forward_diff_calculator = should_use_forward_diff_calculator(&before, after); + let (diff, diff_mode) = if profile.forward_diff_calculator { + let mut calc = DiffCalculator::new(false); + calc.calc_diff_internal(&oplog, &before, &state.frontiers, after, &frontiers, None) + } else { + let mut calc = self.diff_calculator.lock(); + calc.calc_diff_internal(&oplog, &before, &state.frontiers, after, &frontiers, None) + }; + let diff_profile = crate::diff_calc::profiling::finish(); + profile.diff_calc = diff_start.elapsed(); + profile.richtext_tracker_checkout = diff_profile.richtext_tracker_checkout; + profile.richtext_tracker_diff = diff_profile.richtext_tracker_diff; + profile.richtext_delta_build = diff_profile.richtext_delta_build; + profile.richtext_insert_future_scan = diff_profile.richtext_insert_future_scan; + profile.causal_vv_materialize = diff_profile.causal_vv_materialize; + profile.richtext_tracker_checkout_count = diff_profile.richtext_tracker_checkout_count; + profile.richtext_tracker_diff_count = diff_profile.richtext_tracker_diff_count; + profile.richtext_delta_build_count = diff_profile.richtext_delta_build_count; + profile.richtext_insert_future_scan_count = diff_profile.richtext_insert_future_scan_count; + profile.richtext_insert_future_scan_visited = + diff_profile.richtext_insert_future_scan_visited; + profile.richtext_insert_future_scan_max_visited = + diff_profile.richtext_insert_future_scan_max_visited; + profile.causal_vv_materialize_count = diff_profile.causal_vv_materialize_count; + profile.max_causal_vv_width = diff_profile.max_causal_vv_width; + profile.diff_container_count = diff.len(); + + let apply_start = std::time::Instant::now(); + state.apply_diff( + InternalDocDiff { + origin: "checkout".into(), + diff: Cow::Owned(diff), + by: EventTriggerKind::Checkout, + new_version: Cow::Owned(frontiers.clone()), + }, + diff_mode, + )?; + profile.state_apply = apply_start.elapsed(); + + Ok(profile) + } + #[inline] pub fn vv_to_frontiers(&self, vv: &VersionVector) -> Frontiers { self.oplog.lock().dag.vv_to_frontiers(vv) @@ -2096,6 +2324,10 @@ fn find_last_delete_op(oplog: &OpLog, id: ID, idx: ContainerIdx) -> Option { None } +fn should_use_forward_diff_calculator(before: &VersionVector, after: &VersionVector) -> bool { + matches!(before.partial_cmp(after), Some(Ordering::Less)) +} + #[derive(Debug)] pub struct CommitWhenDrop<'a> { doc: &'a LoroDoc, @@ -2192,7 +2424,10 @@ impl Default for CommitOptions { mod test { use std::panic::AssertUnwindSafe; - use crate::{cursor::PosType, loro::ExportMode, version::Frontiers, LoroDoc, ToJson}; + use crate::{ + cursor::PosType, handler::HandlerTrait, loro::ExportMode, version::Frontiers, LoroDoc, + ToJson, + }; use loro_common::ID; #[test] @@ -2257,6 +2492,125 @@ mod test { } } + #[test] + fn text_checkout_wide_causal_multi_peer() { + let doc = LoroDoc::new_auto_commit(); + doc.set_peer_id(1).unwrap(); + let text = doc.get_text("text"); + text.insert(0, "base", PosType::Unicode).unwrap(); + doc.commit_then_renew(); + + let mut frontiers = vec![doc.oplog_frontiers()]; + let mut expected = vec!["base".to_string()]; + let mut len = 4; + for peer in 0..32 { + let snapshot = doc.export(ExportMode::snapshot()).unwrap(); + let base_vv = doc.oplog_vv(); + let peer_doc = LoroDoc::new_auto_commit(); + peer_doc.import(&snapshot).unwrap(); + peer_doc.set_peer_id(peer + 2).unwrap(); + let peer_text = peer_doc.get_text("text"); + peer_text.insert(len, "x", PosType::Unicode).unwrap(); + peer_doc.commit_then_renew(); + let update = peer_doc.export(ExportMode::updates(&base_vv)).unwrap(); + doc.import(&update).unwrap(); + len += 1; + frontiers.push(doc.oplog_frontiers()); + expected.push(format!("base{}", "x".repeat(peer as usize + 1))); + } + + for idx in (0..frontiers.len()).rev() { + doc.checkout(&frontiers[idx]).unwrap(); + assert_eq!( + text.get_value().as_string().unwrap().as_str(), + expected[idx] + ); + } + + for idx in 0..frontiers.len() { + doc.checkout(&frontiers[idx]).unwrap(); + assert_eq!( + text.get_value().as_string().unwrap().as_str(), + expected[idx] + ); + } + } + + #[test] + fn checkout_same_deps_same_position_frontiers_text_consistent() { + let base_doc = LoroDoc::new_auto_commit(); + base_doc.set_peer_id(1).unwrap(); + let base_text = base_doc.get_text("text"); + base_text.insert(0, "base", PosType::Unicode).unwrap(); + base_doc.commit_then_renew(); + let snapshot = base_doc.export(ExportMode::snapshot()).unwrap(); + let base_vv = base_doc.oplog_vv(); + let base_frontiers = base_doc.oplog_frontiers(); + + let doc = LoroDoc::new_auto_commit(); + doc.import(&snapshot).unwrap(); + let text = doc.get_text("text"); + for peer in 0..32 { + let peer_doc = LoroDoc::new_auto_commit(); + peer_doc.import(&snapshot).unwrap(); + peer_doc.set_peer_id(peer + 2).unwrap(); + let peer_text = peer_doc.get_text("text"); + peer_text.insert(0, "x", PosType::Unicode).unwrap(); + peer_doc.commit_then_renew(); + let update = peer_doc.export(ExportMode::updates(&base_vv)).unwrap(); + doc.import(&update).unwrap(); + } + + let latest_frontiers = doc.oplog_frontiers(); + assert_eq!(latest_frontiers.len(), 32); + let expected = text.get_value().as_string().unwrap().to_string(); + + doc.checkout(&base_frontiers).unwrap(); + assert_eq!(text.get_value().as_string().unwrap().as_str(), "base"); + + doc.checkout(&latest_frontiers).unwrap(); + assert_eq!( + text.get_value().as_string().unwrap().as_str(), + expected.as_str() + ); + doc.check_state_diff_calc_consistency_slow(); + } + + #[test] + fn checkout_to_latest_linear_text_state_consistent() { + let doc = LoroDoc::new_auto_commit(); + doc.set_peer_id(1).unwrap(); + let text = doc.get_text("text"); + text.insert(0, "base", PosType::Unicode).unwrap(); + doc.commit_then_renew(); + + let mut frontiers = vec![doc.oplog_frontiers()]; + let mut expected = vec!["base".to_string()]; + for _ in 0..24 { + let pos = text.get_value().as_string().unwrap().chars().count(); + text.insert(pos, "x", PosType::Unicode).unwrap(); + doc.commit_then_renew(); + frontiers.push(doc.oplog_frontiers()); + expected.push(format!("base{}", "x".repeat(expected.len()))); + } + + let old_idx = 7; + doc.checkout(&frontiers[old_idx]).unwrap(); + assert!(doc.is_detached()); + assert_eq!( + text.get_value().as_string().unwrap().as_str(), + expected[old_idx] + ); + + doc.checkout_to_latest(); + assert!(!doc.is_detached()); + assert_eq!( + text.get_value().as_string().unwrap().as_str(), + expected.last().unwrap() + ); + doc.check_state_diff_calc_consistency_slow(); + } + #[test] fn import_batch_err_181() { let a = LoroDoc::new_auto_commit(); diff --git a/crates/loro-internal/src/oplog.rs b/crates/loro-internal/src/oplog.rs index f9d0e86a4..9888cb354 100644 --- a/crates/loro-internal/src/oplog.rs +++ b/crates/loro-internal/src/oplog.rs @@ -5,9 +5,7 @@ mod pending_changes; use crate::sync::Mutex; use bytes::Bytes; use std::borrow::Cow; -use std::cell::RefCell; use std::cmp::Ordering; -use std::rc::Rc; use tracing::trace_span; use self::change_store::iter::MergedChangeIter; @@ -392,7 +390,7 @@ impl OpLog { /// iterates over all changes between LCA(common ancestors) to the merged version of (`from` and `to`) causally /// - /// Tht iterator will include a version vector when the change is applied + /// The iterator includes the causal base version and frontiers before each change is applied. /// /// returns: (common_ancestor_vv, iterator) /// @@ -414,7 +412,8 @@ impl OpLog { Item = ( BlockChangeRef, (Counter, Counter), - Rc>, + ImVersionVector, + Frontiers, ), > + '_, ) { @@ -433,17 +432,20 @@ impl OpLog { let mut iter = self.dag.iter_causal(common_ancestors, diff); let mut node = iter.next(); let mut cur_cnt = 0; - let vv = Rc::new(RefCell::new(VersionVector::default())); ( common_ancestors_vv.clone(), diff_mode, std::iter::from_fn(move || { if let Some(inner) = &node { - let mut inner_vv = vv.borrow_mut(); - // FIXME: PERF: it looks slow for large vv, like 10000+ entries - inner_vv.clear(); - self.dag.ensure_vv_for(&inner.data); - inner_vv.extend_to_include_vv(inner.data.vv.get().unwrap().iter()); + #[cfg(feature = "test_utils")] + let vv_prepare_start = std::time::Instant::now(); + let base_vv = self.dag.ensure_vv_for(&inner.data); + #[cfg(feature = "test_utils")] + crate::diff_calc::profiling::record_causal_vv_materialize( + vv_prepare_start.elapsed(), + base_vv.len(), + ); + let base_frontiers = inner.data.deps.clone(); let peer = inner.data.peer; let cnt = inner .data @@ -461,9 +463,7 @@ impl OpLog { cur_cnt = 0; } - inner_vv.extend_to_include_end_id(change.id); - - Some((change, (cnt, dag_node_end), vv.clone())) + Some((change, (cnt, dag_node_end), base_vv, base_frontiers)) } else { None } diff --git a/crates/loro-internal/src/state/richtext_state.rs b/crates/loro-internal/src/state/richtext_state.rs index 32f969376..e305ab0e3 100644 --- a/crates/loro-internal/src/state/richtext_state.rs +++ b/crates/loro-internal/src/state/richtext_state.rs @@ -147,6 +147,11 @@ impl RichtextState { } } + #[cfg(feature = "test_utils")] + pub(crate) fn debug_counts(&mut self) -> (usize, usize, usize, usize, usize, usize) { + self.state.get_mut().debug_counts() + } + fn get_style_start( &mut self, style_starts: &mut FxHashMap, Pos>, @@ -582,57 +587,54 @@ impl ContainerState for RichtextState { // Rebuilding avoids repeated BTree queries and mutations when the delta is very "choppy" // (many small edit spans), but it allocates and clones chunks, so it can be slower for // small deltas. Use a cheap cost model to enable it only when it's likely beneficial. - let should_fast_apply = { - #[inline] - fn ilog2_ceil(x: usize) -> usize { - debug_assert!(x > 0); - (usize::BITS - (x - 1).leading_zeros()) as usize - } + #[inline] + fn ilog2_ceil(x: usize) -> usize { + debug_assert!(x > 0); + (usize::BITS - (x - 1).leading_zeros()) as usize + } - let state = self.state.get_mut(); - if state.has_styles() { - false - } else { - // `edit_actions` approximates how many BTree mutations the incremental path will do: - // each Replace with delete>0 becomes a drain, and each Replace with value>0 becomes an insert. - let mut edit_actions: usize = 0; - let mut is_plain_text_delta = true; - for span in richtext.iter() { - match span { - loro_delta::DeltaItem::Retain { .. } => {} - loro_delta::DeltaItem::Replace { value, delete, .. } => { - if *delete > 0 { - edit_actions += 1; - } - if value.rle_len() > 0 { - if !matches!(value, RichtextStateChunk::Text(_)) { - is_plain_text_delta = false; - break; - } - edit_actions += 1; - } + // `edit_actions` approximates how many BTree mutations the incremental path will do: + // each Replace with delete>0 becomes a drain, and each Replace with value>0 becomes an insert. + let mut edit_actions: usize = 0; + let mut is_plain_text_delta = true; + for span in richtext.iter() { + match span { + loro_delta::DeltaItem::Retain { .. } => {} + loro_delta::DeltaItem::Replace { value, delete, .. } => { + if *delete > 0 { + edit_actions += 1; + } + if value.rle_len() > 0 { + if !matches!(value, RichtextStateChunk::Text(_)) { + is_plain_text_delta = false; + break; } + edit_actions += 1; } } - - if !is_plain_text_delta || edit_actions == 0 { - false - } else { - let content_nodes = state.content_node_len().max(1); - let log_n = ilog2_ceil(content_nodes + 1).max(1); - let incremental_score = edit_actions.saturating_mul(log_n); - let rebuild_score = content_nodes.saturating_add(edit_actions); - - let old_len = richtext.old_len().max(1); - let avg_action_span = old_len / edit_actions; - // A very rough proxy for "choppiness": many edit actions with small average span. - // The thresholds are intentionally conservative to avoid rebuilding for small or - // localized deltas. - let is_choppy = edit_actions >= 256 && avg_action_span <= 32; - - is_choppy && incremental_score >= rebuild_score.saturating_mul(4) - } } + } + + let state_has_styles = self.state.get_mut().has_styles(); + let use_plain_text_no_event_path = + !state_has_styles && is_plain_text_delta && edit_actions > 0; + let should_fast_apply = if use_plain_text_no_event_path { + let state = self.state.get_mut(); + let content_nodes = state.content_node_len().max(1); + let log_n = ilog2_ceil(content_nodes + 1).max(1); + let incremental_score = edit_actions.saturating_mul(log_n); + let rebuild_score = content_nodes.saturating_add(edit_actions); + + let old_len = richtext.old_len().max(1); + let avg_action_span = old_len / edit_actions; + // A very rough proxy for "choppiness": many edit actions with small average span. + // The thresholds are intentionally conservative to avoid rebuilding for small or + // localized deltas. + let is_choppy = edit_actions >= 256 && avg_action_span <= 32; + + is_choppy && incremental_score >= rebuild_score.saturating_mul(4) + } else { + false }; if should_fast_apply { @@ -730,18 +732,30 @@ impl ContainerState for RichtextState { loro_delta::DeltaItem::Replace { value, delete, .. } => { if *delete > 0 { // Deletions - self.state - .get_mut() - .drain_by_entity_index(entity_index, *delete, None); + if use_plain_text_no_event_path { + self.state + .get_mut() + .drain_plain_text_by_entity_index(entity_index, *delete); + } else { + self.state + .get_mut() + .drain_by_entity_index(entity_index, *delete, None); + } } if value.rle_len() > 0 { // Insertions match value { RichtextStateChunk::Text(s) => { - self.state.get_mut().insert_elem_at_entity_index( - entity_index, - RichtextStateChunk::Text(s.clone()), - ); + if use_plain_text_no_event_path { + self.state + .get_mut() + .insert_text_chunk_at_entity_index(entity_index, s.clone()); + } else { + self.state.get_mut().insert_elem_at_entity_index( + entity_index, + RichtextStateChunk::Text(s.clone()), + ); + } } RichtextStateChunk::Style { style, anchor_type } => { self.state.get_mut().insert_elem_at_entity_index( @@ -937,11 +951,6 @@ impl RichtextState { } } - #[inline] - pub(crate) fn has_styles(&mut self) -> bool { - self.state.get_mut().has_styles() - } - pub(crate) fn has_style_key_in_entity_range( &mut self, range: Range, diff --git a/crates/loro-internal/src/version.rs b/crates/loro-internal/src/version.rs index fa3a42f45..4598400e5 100644 --- a/crates/loro-internal/src/version.rs +++ b/crates/loro-internal/src/version.rs @@ -2,6 +2,7 @@ mod frontiers; pub use frontiers::Frontiers; use crate::{ + dag::Dag, id::{Counter, ID}, oplog::AppDag, span::{CounterSpan, IdSpan}, @@ -159,6 +160,66 @@ impl VersionRange { #[derive(Debug, Clone, Default, Serialize, Deserialize)] pub struct ImVersionVector(im::HashMap); +/// A lightweight causal version used while replaying changes in causal order. +/// +/// It represents `base` plus the current peer advanced to at least `peer_end`. +/// This avoids rebuilding a full mutable [VersionVector] for every replayed +/// DAG node/op in checkout diff calculation. +#[derive(Clone, Copy, Debug)] +pub(crate) struct CausalVersion<'a> { + base: &'a ImVersionVector, + peer: PeerID, + peer_end: Counter, + single_frontier: Option, +} + +impl<'a> CausalVersion<'a> { + #[inline] + pub(crate) fn new( + base: &'a ImVersionVector, + peer: PeerID, + peer_end: Counter, + single_frontier: Option, + ) -> Self { + Self { + base, + peer, + peer_end, + single_frontier, + } + } + + #[inline] + pub(crate) fn base(&self) -> &'a ImVersionVector { + self.base + } + + #[inline] + pub(crate) fn peer(&self) -> PeerID { + self.peer + } + + #[inline] + pub(crate) fn peer_end(&self) -> Counter { + self.peer_end + } + + #[inline] + pub(crate) fn single_frontier(&self) -> Option { + self.single_frontier + } + + #[inline] + pub(crate) fn end_for_peer(&self, peer: PeerID) -> Counter { + let base_end = self.base.get(&peer).copied().unwrap_or(0); + if peer == self.peer { + base_end.max(self.peer_end) + } else { + base_end + } + } +} + impl ImVersionVector { pub fn new() -> Self { Self(Default::default()) @@ -886,6 +947,30 @@ pub fn shrink_frontiers(last_ids: &Frontiers, dag: &AppDag) -> Result 1 { + let first_id = last_ids[0].id(); + let Some(first_node) = dag.get(first_id) else { + return Err(first_id); + }; + let first_deps = first_node.deps.clone(); + let mut all_share_deps = true; + for id in &last_ids[1..] { + let frontier = id.id(); + let Some(node) = dag.get(frontier) else { + return Err(frontier); + }; + if node.deps != first_deps { + all_share_deps = false; + break; + } + } + + if all_share_deps { + last_ids.sort_by_key(|x| x.lamport); + return Ok(last_ids.into_iter().rev().map(|x| x.id()).collect()); + } + } + let mut frontiers = Vec::new(); // Iterate from the greatest lamport to the smallest last_ids.sort_by_key(|x| x.lamport); From e4a9181db5eb6a652fe126aab7afdf986dec1919 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Wed, 22 Apr 2026 07:55:50 +0000 Subject: [PATCH 02/46] docs: add text checkout performance plan --- TMP_PLAN.md | 196 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 TMP_PLAN.md diff --git a/TMP_PLAN.md b/TMP_PLAN.md new file mode 100644 index 000000000..6678f96bb --- /dev/null +++ b/TMP_PLAN.md @@ -0,0 +1,196 @@ +# Text Checkout Performance Plan + +本计划用于追踪 text checkout 性能优化。范围限定为前五项,不考虑为 text insert 缓存或编码 origin anchors、container/frontier text checkpoints 等长期缓存方案,因为这类缓存容易遗漏 underwater 数据和隐藏状态。 + +## 目标场景 + +- 多协作者异步编辑,peer 数量最多上千。 +- 类 Obsidian/代码的 plain text:大文档、局部编辑、长历史、频繁 checkout。 +- 类 Notion 的 rich text:样式范围、并发 mark、订阅事件转换。 +- 高冲突场景:大量 peer 在同一位置或相邻位置插入。 +- detached/checkout-to-latest 往返、离线分支合并后切换版本。 + +## 阶段 1: 建立 Text Checkout 专用 Benchmark + +- [x] 新增 benchmark 覆盖 text checkout,而不是只覆盖 apply/import。 +- [x] 场景 A:1000 peer 随机位置小编辑,随机 checkout 到历史 frontiers。 +- [x] 场景 B:1000 peer 同一位置或相邻位置插入,验证 future sibling 扫描成本。 +- [x] 场景 A2:1000 peer 顺序多 peer 编辑,causal VV 宽度增长到 1000,验证 per-node wide VV 成本。 +- [x] 场景 C:plain code/markdown,大文档、长事务和 one-op-one-txn 两种历史形态。 +- [x] 场景 D:rich text,样式 start/end、重叠 mark、删除样式范围。 +- [x] 场景 E:有订阅与无订阅各跑一组,拆出 event conversion 成本。 +- [x] 在 benchmark 中分段计时:`frontiers_to_vv`、`diff_calc`、`RichtextTracker::checkout/diff`、`RichtextState::apply_diff`、event conversion。 + - 当前输出 `frontier_prepare`、`frontiers_to_vv`、`diff_calc`、`richtext_tracker_checkout`、`richtext_tracker_diff`、`richtext_delta_build`、`richtext_insert_future_scan`、`state.apply_diff`、`emit_events`。 +- [x] 给 benchmark 输出保留关键规模参数:peer 数、change 数、text 长度、text rope node 数、style node 数、diff item 数。 + - 当前输出 peer/change/text/version、VV/frontier 宽度、diff container 数、richtext tree node/chunk、text chunk、style anchor、style range tree node/chunk。 + +验收标准: + +- [x] 能稳定复现当前 text checkout 的主要热点。 +- [x] 能区分 VV 宽度、causal 切换、CRDT rope 插入扫描、state apply、event conversion 的占比。 + +## 阶段 2: 降低 Per-change VersionVector 成本 + +当前疑点: + +- `OpLog::iter_from_lca_causally` 每个 DAG node 都构造/清空/扩展完整 `VersionVector`。 +- 1000 peer 时,即便每个 change 很小,也会有 O(changes * peers) 的成本。 + +计划: + +- [x] 将 `iter_from_lca_causally` 输出的 per-node VV 从完整复制改成轻量上下文。 +- [x] 优先尝试用 `(base ImVersionVector, current peer end counter)` 或等价 view 表达当前因果版本。 +- [x] 为 `RichtextTracker::checkout` 增加直接消费 retreat/forward spans 的内部接口,避免为了 diff 两个 VV 再扫描所有 peer。 +- [x] 保持 public API 不变,所有改动限制在 internal diff calc/tracker 路径。 +- [x] 加回归测试覆盖多 peer、并发分支、checkout 前后状态一致。 + +验收标准: + +- [x] 1000 peer 小 change 场景中,`diff_calc` 时间随 peer 数增长明显降低。 +- [x] 现有 checkout/import/fuzz 相关轻量测试通过。 + +## 阶段 3: 可比版本走 Forward/Linear Fast Path + +当前疑点: + +- persist `DiffCalculator` 会把 diff mode 强制为 `Checkout`,导致可比版本也走更慢、更通用的 CRDT checkout 路径。 +- 对 `from < to` 或 checkout-to-latest,很多时候可以使用更便宜的 forward/linear/import-greater 逻辑。 + +计划: + +- [x] 明确区分目标:真实历史 checkout 与单调前进 checkout-to-latest。 +- [x] 在安全条件满足时,让 text diff 保持 `Linear` 或 `ImportGreaterUpdates` 路径。 +- [x] 如果复用 persistent richtext tracker 会破坏缓存状态,则选择失效 tracker 或延迟重建,而不是强制所有路径进入 `Checkout`。 +- [x] 覆盖 detached 状态、checkout-to-latest、多容器 revive、订阅事件。 + +验收标准: + +- [x] checkout-to-latest 在可比版本场景中避开 CRDT tracker 的双 checkout。 +- [x] 不改变 public checkout 语义和事件语义。 + +## 阶段 4: 优化 Plain Text Apply Diff 和 Event Conversion + +当前疑点: + +- `RichtextState::apply_diff` 已有 plain text choppy rebuild fast path,但 no-style/no-event 情况还可以更直接。 +- 有订阅时 `apply_diff_and_convert` 会生成 external text delta,style/event index 转换会放大成本。 + +计划: + +- [x] 拆出 no-style/no-event 的 plain text apply fast path。 +- [x] 为 `drain_by_entity_index` 增加不需要 event index 和 affected style range 的内部路径。 +- [x] 优化单 leaf 删除与插入,避免重复 query 和 cursor conversion。 +- [ ] 对有订阅场景,减少 `style_delta.compose` 次数,能批量 compose 时批量处理。 +- [x] 保持内部不变量:无效外部输入返回 `Err`,内部状态不一致继续 fail-fast。 + +验收标准: + +- [x] plain text 无订阅 checkout apply 成本下降。 +- [x] 有订阅场景外部 event delta 保持一致。 +- [x] rich text 样式事件测试不回退。 + +## 阶段 5: 优化同位置高并发插入扫描 + +当前疑点: + +- `CrdtRope::insert` 在当前位置向右扫描 future spans,以确定并发插入顺序。 +- 多 peer 同一位置插入时,future sibling 扫描可能接近二次行为。 + +计划: + +- [x] 用 benchmark 场景 B 先确认瓶颈规模和触发条件。 +- [x] 研究为相同 `(origin_left, origin_right)` 或同 active position 的 future group 建辅助索引。 +- [x] 确认暂不引入需要随着 leaf split、future/active 状态变化维护的持久索引,先用局部 fast path 避免错误顺序风险。 +- [x] 先实现最小内部索引、局部缓存或局部 fast path,只覆盖同一位置冲突热点。 +- [x] 加测试覆盖 peer id 排序、不同 right parent、future spans、delete/retreat/forward 后再次插入。 + +验收标准: + +- [ ] 同位置 1000 peer 插入 checkout/import 成本从接近二次趋势降到接近 `N log N` 或更好。 +- [ ] Fugue ordering 与现有测试/fuzz 结果一致。 + +## 执行顺序 + +1. 先做阶段 1,避免没有基线就改热点。 +2. 再做阶段 2,因为 VV 宽度是多协作者场景最确定的通用成本。 +3. 接着做阶段 3,优化 checkout-to-latest 和单调前进版本切换。 +4. 然后做阶段 4,降低 state apply 和事件转换成本。 +5. 最后做阶段 5,它对高冲突文本最关键,但实现风险最高。 + +## 每阶段记录 + +每完成一个阶段,在这里补充: + +- commit 或 patch 范围: +- benchmark 命令: +- before/after 数据: +- 发现的新瓶颈: +- 是否需要调整下一阶段: + +### 阶段 1 记录 + +- patch 范围:`loro.rs` 增加 `test_utils` only `CheckoutProfile`/`checkout_with_profile`;新增 `benches/text_checkout.rs`;`Cargo.toml` 注册 bench。 +- benchmark 命令:`LORO_TEXT_CHECKOUT_PROFILE=1 cargo bench -p loro-internal --features test_utils --bench text_checkout`。 +- 参数:`LORO_TEXT_CHECKOUT_PEERS`、`LORO_TEXT_CHECKOUT_BASE_LEN`、`LORO_TEXT_CHECKOUT_CHANGES` 可覆盖默认规模。 +- 验证命令:`cargo check -p loro-internal --features test_utils --bench text_checkout`;small smoke:`LORO_TEXT_CHECKOUT_PROFILE=1 LORO_TEXT_CHECKOUT_PEERS=8 LORO_TEXT_CHECKOUT_BASE_LEN=128 LORO_TEXT_CHECKOUT_CHANGES=16 cargo bench -p loro-internal --features test_utils --bench text_checkout -- --warm-up-time 0.1 --measurement-time 0.1 --sample-size 10`。 +- 增量补充:rich text subscribed mark 场景、rich text unmark/style deletion 场景、wide-causal sequential multi-peer 场景、richtext/style range BTree node/chunk 统计、RichtextTracker checkout/diff/delta build 分段。 +- before/after 数据:阶段 2 已记录 wide-causal 数据;阶段 1 作为基准与埋点保留。 +- 发现的新瓶颈:wide-causal 场景显示 `RichtextTracker::checkout` 的 causal target 扫描比 per-node VV materialization 更重。 +- 是否需要调整下一阶段:rich text 删除样式范围和 rope/style node 数已补;阶段 2 已增加 causal view 与单 frontier fast path。 + +### 阶段 2 记录 + +- 前置 profile:在 `iter_from_lca_causally` 的 per-node VV materialization 位置记录 `avg_causal_vv_materialize`、`causal_vv_materialize_calls`、`max_causal_vv_width`。 +- 目的:先把 `clear + extend_to_include_vv` 的 O(node * peer) 成本从 `diff_calc` 中拆出来,再做轻量 VV/view 优化。 +- 首个优化:`RichtextTracker::_checkout` 不再 clone 目标 `VersionVector` 到 `current_vv`,改为复用 diff 出来的 retreat/forward spans 增量更新当前 VV。这个不解决 `iter_from_lca_causally` per-node materialization,但先移除 tracker checkout 内部的 O(peer) clone。 +- 第二个优化:`iter_from_lca_causally` 不再为每个 replayed change 清空并扩展完整 `VersionVector`,改为返回 O(1) clone 的 `ImVersionVector` 基底和 DAG deps frontiers;`DiffCalculator` 构造 `CausalVersion(base, peer_end, single_frontier_hint)` 传给 text/list tracker。 +- 第三个优化:`RichtextTracker::checkout_causal` 直接从轻量 causal view 计算 spans;同时维护 `current_frontier_hint`,当 replay target 正好是刚应用过的单 frontier 时跳过 checkout span 扫描。这个覆盖线性/顺序多人编辑和同一事务连续 op;分叉、多 frontier、历史跳转仍走完整 causal checkout。 +- 新增回归测试:`loro::test::text_checkout_wide_causal_multi_peer`,覆盖 32 peer 顺序编辑后前后 checkout。 +- 验证命令:同阶段 1 的 `cargo check` 与 small smoke bench;`cargo check -p loro-internal`;`cargo test -p loro-internal tracker:: --features test_utils`;`cargo test -p loro-internal richtext --features test_utils`;`cargo test -p loro-internal checkout --features test_utils`;`cargo test -p loro-internal import --features test_utils`。 +- 100 peer profile smoke:`plain/random-peer-checkout` 平均约 645us,`richtext_tracker_checkout` 平均约 51us,`max_frontiers_width=100`,`max_vv_width=101`。 +- 100 peer wide-causal smoke:`plain/wide-causal-peer-checkout` 平均约 244us,`max_causal_vv_width=100`,`max_vv_width=101`。 +- 1000 peer wide-causal before fast hint:平均约 5.13ms,`avg_diff_calc=4.90ms`,`avg_richtext_tracker_checkout=3.47ms`,`max_causal_vv_width=1000`。 +- 1000 peer wide-causal after fast hint:平均约 1.61ms,`avg_diff_calc=1.39ms`,`avg_richtext_tracker_checkout=37.6us`,`max_causal_vv_width=1000`。 +- 轻量 fuzz 验证:`cargo test -p fuzz random_fuzz_1s -- --nocapture`,2-site/5-site 的 6 个 1 秒 arbtest 随机用例通过。 +- 未运行:libFuzzer targets;如继续合并前需要再决定是否跑 `cargo fuzz run all` 或 `crates/fuzz/fuzz` 的相关目标。 + +### 阶段 3 记录 + +- patch 范围:`LoroDoc::_checkout_without_emitting` 和 profile 版本在 `before < after` 时使用临时 `DiffCalculator::new(false)`,保留 `find_common_ancestor` 推导出的 `Linear` / `ImportGreaterUpdates`;历史/并发 checkout 继续使用持久 `diff_calculator` 的 `Checkout` 路径。 +- 缓存策略:forward checkout 不复用持久 richtext tracker,避免把持久 tracker 切到 `Linear` mode 或污染历史 checkout cache;后续历史 checkout 若需要 tracker,会按现有 `all_vv` 检查重建。 +- benchmark 增量:新增 `code/checkout-to-latest-linear`,每次先 checkout 到旧版本,再只计量 checkout 回 latest 的耗时;profile 输出 `forward_diff_calculator_samples`。 +- smoke 命令:`LORO_TEXT_CHECKOUT_PROFILE=1 LORO_TEXT_CHECKOUT_PEERS=50 LORO_TEXT_CHECKOUT_BASE_LEN=1024 LORO_TEXT_CHECKOUT_CHANGES=128 cargo bench -p loro-internal --features test_utils --bench text_checkout -- code/checkout-to-latest-linear --warm-up-time 0.05 --measurement-time 0.1 --sample-size 10`。 +- smoke 数据:平均约 65us,`avg_diff_calc=44.7us`,`richtext_tracker_checkout_calls=0`,`richtext_tracker_diff_calls=0`,`forward_diff_calculator_samples=640`。 +- 新增回归测试:`loro::test::checkout_to_latest_linear_text_state_consistent`,覆盖 detached 旧版本 -> checkout_to_latest,验证文本内容、attached 状态和 `check_state_diff_calc_consistency_slow`。 +- 验证命令:`cargo check -p loro-internal --features test_utils --bench text_checkout`;`cargo check -p loro-internal`;`cargo test -p loro-internal checkout --features test_utils`;`cargo test -p loro-internal richtext --features test_utils`;`cargo test -p loro-internal import --features test_utils`。 + +### 阶段 4 记录 + +- patch 范围:`InnerState` 增加 plain text 专用 `insert_text_chunk_at_entity_index` 和 `drain_plain_text_by_entity_index`;`RichtextState::apply_diff` 在无 style、plain text delta、无 event conversion 的路径上绕过 style range/event index 维护。 +- 实现边界:仅当当前 state 没有 style、delta value 全是 text、且存在 edit action 时启用;rich text style anchor/range 继续走原通用路径。 +- choppy rebuild:沿用原先 plain text rebuild 思路,但与 no-style 判定共用一次 delta 扫描;小 delta 仍走增量 apply,避免为局部编辑重建全文。 +- 回滚过的尝试:最初在 direct insert 中维护 cursor cache,`checkout-to-latest-linear` smoke 反而从约 65us 退化到约 99us;改为 direct entity query + clear cache 后恢复。 +- smoke 命令:`LORO_TEXT_CHECKOUT_PROFILE=1 LORO_TEXT_CHECKOUT_PEERS=50 LORO_TEXT_CHECKOUT_BASE_LEN=1024 LORO_TEXT_CHECKOUT_CHANGES=128 cargo bench -p loro-internal --features test_utils --bench text_checkout -- code/checkout-to-latest-linear --warm-up-time 0.05 --measurement-time 0.1 --sample-size 10`。 +- smoke 数据:阶段 3 基准平均约 65.4us、`avg_state_apply=19.2us`;阶段 4 最终平均约 65.3us、`avg_state_apply=18.7us`。这个场景中主要收益很小,说明 forward diff 已经是主优化;但 no-style apply 路径现在避免了 style/event 相关维护成本。 +- 验证命令:`cargo check -p loro-internal --features test_utils --bench text_checkout`;`cargo test -p loro-internal checkout --features test_utils`;`cargo test -p loro-internal richtext --features test_utils`;`cargo test -p loro-internal import --features test_utils`;`cargo check -p loro-internal`。 +- 未完成:`style_delta.compose` 批量化还没做;这只影响有订阅/rich event conversion 的后续阶段 4 子项。 +- 轻量 fuzz 验证:`cargo test -p fuzz random_fuzz_1s -- --nocapture` 通过。 +- 未运行:libFuzzer targets;如合并前需要覆盖 checkout/import/state replay 的长时间模糊测试,还需要单独安排。 + +### 阶段 5 记录 + +- 首轮定位:`plain/same-position-peer-checkout` 在 300 peer 下先暴露的最大热点不是 rope 插入扫描,而是宽 frontier 的重复 `shrink_frontiers`。before:平均约 4.93ms,`avg_frontier_prepare=3.04ms`,`avg_diff_calc=1.77ms`。 +- frontier 优化:`shrink_frontiers` 增加 same-deps fast path。去重后的 frontier DAG nodes 如果共享同一 deps,则它们互相并发,直接按原 lamport 降序返回,不做 ancestor walk;这不是长期缓存,不依赖 underwater 数据。 +- 300 peer same-position after frontier fast path:平均约 1.78ms,`avg_frontier_prepare=37.8us`,`avg_diff_calc=1.65ms`。 +- 1000 peer same-position after frontier fast path:平均约 16.6ms,`avg_frontier_prepare=240us`,`avg_frontiers_to_vv=450us`,`avg_diff_calc=15.85ms`。剩余主成本回到 replay/diff_calc。 +- profile 增量:新增 `richtext_insert_future_scan`、scan calls、avg/max visited,用来隔离 `CrdtRope::insert` 内同 active position 的 future sibling 扫描。 +- future scan 定位:1000 peer same-position 下,加入 profile 后平均约 20.56ms,`avg_richtext_insert_future_scan=1.83ms`,`richtext_insert_future_scan_calls=9674`,`avg_future_scan_visited=383`,`max_future_scan_visited=999`。 +- future scan 优化:当 `in_between` 全部和待插入 span 具有相同 `origin_left/origin_right` 时,跳过通用 visited/right-parent 比较逻辑,直接按 peer 排序用 `partition_point` 找插入点;混合 right-parent 继续走原路径,并用 debug assert 固定同父 fast path 的 peer 有序前提。 +- 1000 peer same-position after same-parent fast path:平均约 15.85ms,`avg_richtext_insert_future_scan=575us`,`avg_future_scan_visited=383`,`max_future_scan_visited=999`。 +- 新增回归测试:`loro::test::checkout_same_deps_same_position_frontiers_text_consistent`,覆盖 32 peer 从同一 base 同位置插入后,用宽 frontiers checkout 到 base 再回 latest,并检查状态/diff consistency。 +- 新增低层回归测试:`same_parent_future_spans_keep_peer_order`、`same_parent_future_spans_keep_order_after_retreat_forward`、`mixed_right_parent_future_spans_fall_back_to_general_ordering`,覆盖 peer id 排序、不同 right parent、future spans、delete/retreat/forward 后再次插入。 +- 验证命令:`cargo test -p loro-internal richtext --features test_utils`;`cargo test -p loro-internal checkout --features test_utils`;`cargo test -p loro-internal checkout_same_deps_same_position_frontiers_text_consistent --features test_utils`。 +- 验证补充:`cargo test -p loro-internal crdt_rope::test --features test_utils`。 +- 未完成:还没有实现随 leaf split/future-active 状态维护的真正 sibling index;当前是低风险 fast path,因此不能把同位置 1000 peer 的扫描复杂度标为已经降到 `N log N`。 +- 轻量 fuzz 验证:`cargo test -p fuzz random_fuzz_1s -- --nocapture` 通过。 +- 未运行:libFuzzer targets;Fugue ordering 合并前应优先跑相关 `cargo fuzz` 目标。 From ed54924560fe7fc5df2a9abc5f2a9f70bf18c0ff Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Wed, 22 Apr 2026 08:04:06 +0000 Subject: [PATCH 03/46] docs: remove temporary text checkout plan --- TMP_PLAN.md | 196 ---------------------------------------------------- 1 file changed, 196 deletions(-) delete mode 100644 TMP_PLAN.md diff --git a/TMP_PLAN.md b/TMP_PLAN.md deleted file mode 100644 index 6678f96bb..000000000 --- a/TMP_PLAN.md +++ /dev/null @@ -1,196 +0,0 @@ -# Text Checkout Performance Plan - -本计划用于追踪 text checkout 性能优化。范围限定为前五项,不考虑为 text insert 缓存或编码 origin anchors、container/frontier text checkpoints 等长期缓存方案,因为这类缓存容易遗漏 underwater 数据和隐藏状态。 - -## 目标场景 - -- 多协作者异步编辑,peer 数量最多上千。 -- 类 Obsidian/代码的 plain text:大文档、局部编辑、长历史、频繁 checkout。 -- 类 Notion 的 rich text:样式范围、并发 mark、订阅事件转换。 -- 高冲突场景:大量 peer 在同一位置或相邻位置插入。 -- detached/checkout-to-latest 往返、离线分支合并后切换版本。 - -## 阶段 1: 建立 Text Checkout 专用 Benchmark - -- [x] 新增 benchmark 覆盖 text checkout,而不是只覆盖 apply/import。 -- [x] 场景 A:1000 peer 随机位置小编辑,随机 checkout 到历史 frontiers。 -- [x] 场景 B:1000 peer 同一位置或相邻位置插入,验证 future sibling 扫描成本。 -- [x] 场景 A2:1000 peer 顺序多 peer 编辑,causal VV 宽度增长到 1000,验证 per-node wide VV 成本。 -- [x] 场景 C:plain code/markdown,大文档、长事务和 one-op-one-txn 两种历史形态。 -- [x] 场景 D:rich text,样式 start/end、重叠 mark、删除样式范围。 -- [x] 场景 E:有订阅与无订阅各跑一组,拆出 event conversion 成本。 -- [x] 在 benchmark 中分段计时:`frontiers_to_vv`、`diff_calc`、`RichtextTracker::checkout/diff`、`RichtextState::apply_diff`、event conversion。 - - 当前输出 `frontier_prepare`、`frontiers_to_vv`、`diff_calc`、`richtext_tracker_checkout`、`richtext_tracker_diff`、`richtext_delta_build`、`richtext_insert_future_scan`、`state.apply_diff`、`emit_events`。 -- [x] 给 benchmark 输出保留关键规模参数:peer 数、change 数、text 长度、text rope node 数、style node 数、diff item 数。 - - 当前输出 peer/change/text/version、VV/frontier 宽度、diff container 数、richtext tree node/chunk、text chunk、style anchor、style range tree node/chunk。 - -验收标准: - -- [x] 能稳定复现当前 text checkout 的主要热点。 -- [x] 能区分 VV 宽度、causal 切换、CRDT rope 插入扫描、state apply、event conversion 的占比。 - -## 阶段 2: 降低 Per-change VersionVector 成本 - -当前疑点: - -- `OpLog::iter_from_lca_causally` 每个 DAG node 都构造/清空/扩展完整 `VersionVector`。 -- 1000 peer 时,即便每个 change 很小,也会有 O(changes * peers) 的成本。 - -计划: - -- [x] 将 `iter_from_lca_causally` 输出的 per-node VV 从完整复制改成轻量上下文。 -- [x] 优先尝试用 `(base ImVersionVector, current peer end counter)` 或等价 view 表达当前因果版本。 -- [x] 为 `RichtextTracker::checkout` 增加直接消费 retreat/forward spans 的内部接口,避免为了 diff 两个 VV 再扫描所有 peer。 -- [x] 保持 public API 不变,所有改动限制在 internal diff calc/tracker 路径。 -- [x] 加回归测试覆盖多 peer、并发分支、checkout 前后状态一致。 - -验收标准: - -- [x] 1000 peer 小 change 场景中,`diff_calc` 时间随 peer 数增长明显降低。 -- [x] 现有 checkout/import/fuzz 相关轻量测试通过。 - -## 阶段 3: 可比版本走 Forward/Linear Fast Path - -当前疑点: - -- persist `DiffCalculator` 会把 diff mode 强制为 `Checkout`,导致可比版本也走更慢、更通用的 CRDT checkout 路径。 -- 对 `from < to` 或 checkout-to-latest,很多时候可以使用更便宜的 forward/linear/import-greater 逻辑。 - -计划: - -- [x] 明确区分目标:真实历史 checkout 与单调前进 checkout-to-latest。 -- [x] 在安全条件满足时,让 text diff 保持 `Linear` 或 `ImportGreaterUpdates` 路径。 -- [x] 如果复用 persistent richtext tracker 会破坏缓存状态,则选择失效 tracker 或延迟重建,而不是强制所有路径进入 `Checkout`。 -- [x] 覆盖 detached 状态、checkout-to-latest、多容器 revive、订阅事件。 - -验收标准: - -- [x] checkout-to-latest 在可比版本场景中避开 CRDT tracker 的双 checkout。 -- [x] 不改变 public checkout 语义和事件语义。 - -## 阶段 4: 优化 Plain Text Apply Diff 和 Event Conversion - -当前疑点: - -- `RichtextState::apply_diff` 已有 plain text choppy rebuild fast path,但 no-style/no-event 情况还可以更直接。 -- 有订阅时 `apply_diff_and_convert` 会生成 external text delta,style/event index 转换会放大成本。 - -计划: - -- [x] 拆出 no-style/no-event 的 plain text apply fast path。 -- [x] 为 `drain_by_entity_index` 增加不需要 event index 和 affected style range 的内部路径。 -- [x] 优化单 leaf 删除与插入,避免重复 query 和 cursor conversion。 -- [ ] 对有订阅场景,减少 `style_delta.compose` 次数,能批量 compose 时批量处理。 -- [x] 保持内部不变量:无效外部输入返回 `Err`,内部状态不一致继续 fail-fast。 - -验收标准: - -- [x] plain text 无订阅 checkout apply 成本下降。 -- [x] 有订阅场景外部 event delta 保持一致。 -- [x] rich text 样式事件测试不回退。 - -## 阶段 5: 优化同位置高并发插入扫描 - -当前疑点: - -- `CrdtRope::insert` 在当前位置向右扫描 future spans,以确定并发插入顺序。 -- 多 peer 同一位置插入时,future sibling 扫描可能接近二次行为。 - -计划: - -- [x] 用 benchmark 场景 B 先确认瓶颈规模和触发条件。 -- [x] 研究为相同 `(origin_left, origin_right)` 或同 active position 的 future group 建辅助索引。 -- [x] 确认暂不引入需要随着 leaf split、future/active 状态变化维护的持久索引,先用局部 fast path 避免错误顺序风险。 -- [x] 先实现最小内部索引、局部缓存或局部 fast path,只覆盖同一位置冲突热点。 -- [x] 加测试覆盖 peer id 排序、不同 right parent、future spans、delete/retreat/forward 后再次插入。 - -验收标准: - -- [ ] 同位置 1000 peer 插入 checkout/import 成本从接近二次趋势降到接近 `N log N` 或更好。 -- [ ] Fugue ordering 与现有测试/fuzz 结果一致。 - -## 执行顺序 - -1. 先做阶段 1,避免没有基线就改热点。 -2. 再做阶段 2,因为 VV 宽度是多协作者场景最确定的通用成本。 -3. 接着做阶段 3,优化 checkout-to-latest 和单调前进版本切换。 -4. 然后做阶段 4,降低 state apply 和事件转换成本。 -5. 最后做阶段 5,它对高冲突文本最关键,但实现风险最高。 - -## 每阶段记录 - -每完成一个阶段,在这里补充: - -- commit 或 patch 范围: -- benchmark 命令: -- before/after 数据: -- 发现的新瓶颈: -- 是否需要调整下一阶段: - -### 阶段 1 记录 - -- patch 范围:`loro.rs` 增加 `test_utils` only `CheckoutProfile`/`checkout_with_profile`;新增 `benches/text_checkout.rs`;`Cargo.toml` 注册 bench。 -- benchmark 命令:`LORO_TEXT_CHECKOUT_PROFILE=1 cargo bench -p loro-internal --features test_utils --bench text_checkout`。 -- 参数:`LORO_TEXT_CHECKOUT_PEERS`、`LORO_TEXT_CHECKOUT_BASE_LEN`、`LORO_TEXT_CHECKOUT_CHANGES` 可覆盖默认规模。 -- 验证命令:`cargo check -p loro-internal --features test_utils --bench text_checkout`;small smoke:`LORO_TEXT_CHECKOUT_PROFILE=1 LORO_TEXT_CHECKOUT_PEERS=8 LORO_TEXT_CHECKOUT_BASE_LEN=128 LORO_TEXT_CHECKOUT_CHANGES=16 cargo bench -p loro-internal --features test_utils --bench text_checkout -- --warm-up-time 0.1 --measurement-time 0.1 --sample-size 10`。 -- 增量补充:rich text subscribed mark 场景、rich text unmark/style deletion 场景、wide-causal sequential multi-peer 场景、richtext/style range BTree node/chunk 统计、RichtextTracker checkout/diff/delta build 分段。 -- before/after 数据:阶段 2 已记录 wide-causal 数据;阶段 1 作为基准与埋点保留。 -- 发现的新瓶颈:wide-causal 场景显示 `RichtextTracker::checkout` 的 causal target 扫描比 per-node VV materialization 更重。 -- 是否需要调整下一阶段:rich text 删除样式范围和 rope/style node 数已补;阶段 2 已增加 causal view 与单 frontier fast path。 - -### 阶段 2 记录 - -- 前置 profile:在 `iter_from_lca_causally` 的 per-node VV materialization 位置记录 `avg_causal_vv_materialize`、`causal_vv_materialize_calls`、`max_causal_vv_width`。 -- 目的:先把 `clear + extend_to_include_vv` 的 O(node * peer) 成本从 `diff_calc` 中拆出来,再做轻量 VV/view 优化。 -- 首个优化:`RichtextTracker::_checkout` 不再 clone 目标 `VersionVector` 到 `current_vv`,改为复用 diff 出来的 retreat/forward spans 增量更新当前 VV。这个不解决 `iter_from_lca_causally` per-node materialization,但先移除 tracker checkout 内部的 O(peer) clone。 -- 第二个优化:`iter_from_lca_causally` 不再为每个 replayed change 清空并扩展完整 `VersionVector`,改为返回 O(1) clone 的 `ImVersionVector` 基底和 DAG deps frontiers;`DiffCalculator` 构造 `CausalVersion(base, peer_end, single_frontier_hint)` 传给 text/list tracker。 -- 第三个优化:`RichtextTracker::checkout_causal` 直接从轻量 causal view 计算 spans;同时维护 `current_frontier_hint`,当 replay target 正好是刚应用过的单 frontier 时跳过 checkout span 扫描。这个覆盖线性/顺序多人编辑和同一事务连续 op;分叉、多 frontier、历史跳转仍走完整 causal checkout。 -- 新增回归测试:`loro::test::text_checkout_wide_causal_multi_peer`,覆盖 32 peer 顺序编辑后前后 checkout。 -- 验证命令:同阶段 1 的 `cargo check` 与 small smoke bench;`cargo check -p loro-internal`;`cargo test -p loro-internal tracker:: --features test_utils`;`cargo test -p loro-internal richtext --features test_utils`;`cargo test -p loro-internal checkout --features test_utils`;`cargo test -p loro-internal import --features test_utils`。 -- 100 peer profile smoke:`plain/random-peer-checkout` 平均约 645us,`richtext_tracker_checkout` 平均约 51us,`max_frontiers_width=100`,`max_vv_width=101`。 -- 100 peer wide-causal smoke:`plain/wide-causal-peer-checkout` 平均约 244us,`max_causal_vv_width=100`,`max_vv_width=101`。 -- 1000 peer wide-causal before fast hint:平均约 5.13ms,`avg_diff_calc=4.90ms`,`avg_richtext_tracker_checkout=3.47ms`,`max_causal_vv_width=1000`。 -- 1000 peer wide-causal after fast hint:平均约 1.61ms,`avg_diff_calc=1.39ms`,`avg_richtext_tracker_checkout=37.6us`,`max_causal_vv_width=1000`。 -- 轻量 fuzz 验证:`cargo test -p fuzz random_fuzz_1s -- --nocapture`,2-site/5-site 的 6 个 1 秒 arbtest 随机用例通过。 -- 未运行:libFuzzer targets;如继续合并前需要再决定是否跑 `cargo fuzz run all` 或 `crates/fuzz/fuzz` 的相关目标。 - -### 阶段 3 记录 - -- patch 范围:`LoroDoc::_checkout_without_emitting` 和 profile 版本在 `before < after` 时使用临时 `DiffCalculator::new(false)`,保留 `find_common_ancestor` 推导出的 `Linear` / `ImportGreaterUpdates`;历史/并发 checkout 继续使用持久 `diff_calculator` 的 `Checkout` 路径。 -- 缓存策略:forward checkout 不复用持久 richtext tracker,避免把持久 tracker 切到 `Linear` mode 或污染历史 checkout cache;后续历史 checkout 若需要 tracker,会按现有 `all_vv` 检查重建。 -- benchmark 增量:新增 `code/checkout-to-latest-linear`,每次先 checkout 到旧版本,再只计量 checkout 回 latest 的耗时;profile 输出 `forward_diff_calculator_samples`。 -- smoke 命令:`LORO_TEXT_CHECKOUT_PROFILE=1 LORO_TEXT_CHECKOUT_PEERS=50 LORO_TEXT_CHECKOUT_BASE_LEN=1024 LORO_TEXT_CHECKOUT_CHANGES=128 cargo bench -p loro-internal --features test_utils --bench text_checkout -- code/checkout-to-latest-linear --warm-up-time 0.05 --measurement-time 0.1 --sample-size 10`。 -- smoke 数据:平均约 65us,`avg_diff_calc=44.7us`,`richtext_tracker_checkout_calls=0`,`richtext_tracker_diff_calls=0`,`forward_diff_calculator_samples=640`。 -- 新增回归测试:`loro::test::checkout_to_latest_linear_text_state_consistent`,覆盖 detached 旧版本 -> checkout_to_latest,验证文本内容、attached 状态和 `check_state_diff_calc_consistency_slow`。 -- 验证命令:`cargo check -p loro-internal --features test_utils --bench text_checkout`;`cargo check -p loro-internal`;`cargo test -p loro-internal checkout --features test_utils`;`cargo test -p loro-internal richtext --features test_utils`;`cargo test -p loro-internal import --features test_utils`。 - -### 阶段 4 记录 - -- patch 范围:`InnerState` 增加 plain text 专用 `insert_text_chunk_at_entity_index` 和 `drain_plain_text_by_entity_index`;`RichtextState::apply_diff` 在无 style、plain text delta、无 event conversion 的路径上绕过 style range/event index 维护。 -- 实现边界:仅当当前 state 没有 style、delta value 全是 text、且存在 edit action 时启用;rich text style anchor/range 继续走原通用路径。 -- choppy rebuild:沿用原先 plain text rebuild 思路,但与 no-style 判定共用一次 delta 扫描;小 delta 仍走增量 apply,避免为局部编辑重建全文。 -- 回滚过的尝试:最初在 direct insert 中维护 cursor cache,`checkout-to-latest-linear` smoke 反而从约 65us 退化到约 99us;改为 direct entity query + clear cache 后恢复。 -- smoke 命令:`LORO_TEXT_CHECKOUT_PROFILE=1 LORO_TEXT_CHECKOUT_PEERS=50 LORO_TEXT_CHECKOUT_BASE_LEN=1024 LORO_TEXT_CHECKOUT_CHANGES=128 cargo bench -p loro-internal --features test_utils --bench text_checkout -- code/checkout-to-latest-linear --warm-up-time 0.05 --measurement-time 0.1 --sample-size 10`。 -- smoke 数据:阶段 3 基准平均约 65.4us、`avg_state_apply=19.2us`;阶段 4 最终平均约 65.3us、`avg_state_apply=18.7us`。这个场景中主要收益很小,说明 forward diff 已经是主优化;但 no-style apply 路径现在避免了 style/event 相关维护成本。 -- 验证命令:`cargo check -p loro-internal --features test_utils --bench text_checkout`;`cargo test -p loro-internal checkout --features test_utils`;`cargo test -p loro-internal richtext --features test_utils`;`cargo test -p loro-internal import --features test_utils`;`cargo check -p loro-internal`。 -- 未完成:`style_delta.compose` 批量化还没做;这只影响有订阅/rich event conversion 的后续阶段 4 子项。 -- 轻量 fuzz 验证:`cargo test -p fuzz random_fuzz_1s -- --nocapture` 通过。 -- 未运行:libFuzzer targets;如合并前需要覆盖 checkout/import/state replay 的长时间模糊测试,还需要单独安排。 - -### 阶段 5 记录 - -- 首轮定位:`plain/same-position-peer-checkout` 在 300 peer 下先暴露的最大热点不是 rope 插入扫描,而是宽 frontier 的重复 `shrink_frontiers`。before:平均约 4.93ms,`avg_frontier_prepare=3.04ms`,`avg_diff_calc=1.77ms`。 -- frontier 优化:`shrink_frontiers` 增加 same-deps fast path。去重后的 frontier DAG nodes 如果共享同一 deps,则它们互相并发,直接按原 lamport 降序返回,不做 ancestor walk;这不是长期缓存,不依赖 underwater 数据。 -- 300 peer same-position after frontier fast path:平均约 1.78ms,`avg_frontier_prepare=37.8us`,`avg_diff_calc=1.65ms`。 -- 1000 peer same-position after frontier fast path:平均约 16.6ms,`avg_frontier_prepare=240us`,`avg_frontiers_to_vv=450us`,`avg_diff_calc=15.85ms`。剩余主成本回到 replay/diff_calc。 -- profile 增量:新增 `richtext_insert_future_scan`、scan calls、avg/max visited,用来隔离 `CrdtRope::insert` 内同 active position 的 future sibling 扫描。 -- future scan 定位:1000 peer same-position 下,加入 profile 后平均约 20.56ms,`avg_richtext_insert_future_scan=1.83ms`,`richtext_insert_future_scan_calls=9674`,`avg_future_scan_visited=383`,`max_future_scan_visited=999`。 -- future scan 优化:当 `in_between` 全部和待插入 span 具有相同 `origin_left/origin_right` 时,跳过通用 visited/right-parent 比较逻辑,直接按 peer 排序用 `partition_point` 找插入点;混合 right-parent 继续走原路径,并用 debug assert 固定同父 fast path 的 peer 有序前提。 -- 1000 peer same-position after same-parent fast path:平均约 15.85ms,`avg_richtext_insert_future_scan=575us`,`avg_future_scan_visited=383`,`max_future_scan_visited=999`。 -- 新增回归测试:`loro::test::checkout_same_deps_same_position_frontiers_text_consistent`,覆盖 32 peer 从同一 base 同位置插入后,用宽 frontiers checkout 到 base 再回 latest,并检查状态/diff consistency。 -- 新增低层回归测试:`same_parent_future_spans_keep_peer_order`、`same_parent_future_spans_keep_order_after_retreat_forward`、`mixed_right_parent_future_spans_fall_back_to_general_ordering`,覆盖 peer id 排序、不同 right parent、future spans、delete/retreat/forward 后再次插入。 -- 验证命令:`cargo test -p loro-internal richtext --features test_utils`;`cargo test -p loro-internal checkout --features test_utils`;`cargo test -p loro-internal checkout_same_deps_same_position_frontiers_text_consistent --features test_utils`。 -- 验证补充:`cargo test -p loro-internal crdt_rope::test --features test_utils`。 -- 未完成:还没有实现随 leaf split/future-active 状态维护的真正 sibling index;当前是低风险 fast path,因此不能把同位置 1000 peer 的扫描复杂度标为已经降到 `N log N`。 -- 轻量 fuzz 验证:`cargo test -p fuzz random_fuzz_1s -- --nocapture` 通过。 -- 未运行:libFuzzer targets;Fugue ordering 合并前应优先跑相关 `cargo fuzz` 目标。 From 377afd4e8aafe1e110844740400358bbebbf6b25 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Wed, 22 Apr 2026 08:19:35 +0000 Subject: [PATCH 04/46] perf: batch rich text style event deltas --- .../loro-internal/src/state/richtext_state.rs | 75 ++++++++++++++++++- 1 file changed, 73 insertions(+), 2 deletions(-) diff --git a/crates/loro-internal/src/state/richtext_state.rs b/crates/loro-internal/src/state/richtext_state.rs index e305ab0e3..b90f259ee 100644 --- a/crates/loro-internal/src/state/richtext_state.rs +++ b/crates/loro-internal/src/state/richtext_state.rs @@ -1,6 +1,6 @@ use generic_btree::{rle::HasLength, rle::Sliceable as _, Cursor}; use loro_common::{ContainerID, InternalString, LoroError, LoroResult, LoroValue, ID}; -use loro_delta::DeltaRopeBuilder; +use loro_delta::{delta_trait::DeltaAttr, DeltaRopeBuilder}; use rustc_hash::{FxHashMap, FxHashSet}; use std::ops::Range; use std::sync::{Arc, Weak}; @@ -43,6 +43,66 @@ struct Pos { event_index: usize, } +fn flush_pending_style_delta(style_delta: &mut TextDiff, pending_delta: &mut TextDiff) { + if !pending_delta.is_empty() { + style_delta.compose(pending_delta); + *pending_delta = TextDiff::new(); + } +} + +fn try_append_retain_only_style_delta( + pending_delta: &mut TextDiff, + pending_len: &mut usize, + delta: &TextDiff, +) -> bool { + // Adjacent, non-overlapping style retains can be composed once as a batch. + // Overlapping deltas are flushed to preserve the original compose order. + let mut index = 0; + let mut first_styled_start = None; + for item in delta.iter() { + match item { + loro_delta::DeltaItem::Retain { len, attr } => { + if !attr.attr_is_empty() { + first_styled_start.get_or_insert(index); + } + index += len; + } + loro_delta::DeltaItem::Replace { .. } => return false, + } + } + + let Some(first_styled_start) = first_styled_start else { + return true; + }; + + if first_styled_start < *pending_len { + return false; + } + + index = 0; + for item in delta.iter() { + let loro_delta::DeltaItem::Retain { len, attr } = item else { + unreachable!("non-retain style deltas are rejected in the first pass") + }; + if !attr.attr_is_empty() { + if index < *pending_len { + return false; + } + + if index > *pending_len { + pending_delta.push_retain(index - *pending_len, Default::default()); + *pending_len = index; + } + + pending_delta.push_retain(*len, attr.clone()); + *pending_len += len; + } + index += len; + } + + true +} + impl RichtextState { #[inline] pub fn new(idx: ContainerIdx, config: Arc>) -> Self { @@ -568,9 +628,20 @@ impl ContainerState for RichtextState { } } + let mut pending_style_delta = TextDiff::new(); + let mut pending_style_delta_len = 0; for s in new_style_deltas { - style_delta.compose(&s); + if !try_append_retain_only_style_delta( + &mut pending_style_delta, + &mut pending_style_delta_len, + &s, + ) { + flush_pending_style_delta(&mut style_delta, &mut pending_style_delta); + pending_style_delta_len = 0; + style_delta.compose(&s); + } } + flush_pending_style_delta(&mut style_delta, &mut pending_style_delta); // self.check_consistency_between_content_and_style_ranges(); ans.compose(&style_delta); Diff::Text(ans) From 7d27febf73f77a10b7440d5374826863d7aacb8a Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Wed, 22 Apr 2026 13:50:25 +0000 Subject: [PATCH 05/46] fix: handle fuzzed text checkout edge cases --- .../src/container/richtext/tracker.rs | 29 +++++++++++++------ crates/loro-internal/src/diff/diff_impl.rs | 5 ++++ crates/loro-internal/src/handler.rs | 16 ++++++++-- crates/loro-wasm/src/lib.rs | 13 +++++++-- crates/loro/tests/loro_rust_test.rs | 20 ++++++++++++- 5 files changed, 68 insertions(+), 15 deletions(-) diff --git a/crates/loro-internal/src/container/richtext/tracker.rs b/crates/loro-internal/src/container/richtext/tracker.rs index 0358121f9..cd6cd7457 100644 --- a/crates/loro-internal/src/container/richtext/tracker.rs +++ b/crates/loro-internal/src/container/richtext/tracker.rs @@ -352,14 +352,6 @@ impl Tracker { } fn _checkout_causal(&mut self, vv: CausalVersion<'_>, on_diff_status: bool) { - if !on_diff_status - && vv - .single_frontier() - .is_some_and(|frontier| self.current_frontier_hint == Some(frontier)) - { - return; - } - let current_vv = std::mem::take(&mut self.current_vv); let mut retreat: SmallVec<[IdSpan; 4]> = SmallVec::new(); for (&peer, &counter) in current_vv.iter() { @@ -715,7 +707,11 @@ impl Tracker { #[cfg(test)] mod test { - use crate::{container::richtext::RichtextChunk, vv}; + use crate::{ + container::richtext::RichtextChunk, + version::{CausalVersion, ImVersionVector}, + vv, + }; use generic_btree::rle::HasLength; use super::*; @@ -735,6 +731,21 @@ mod test { assert_eq!(t.rope.len(), 4); } + #[test] + fn checkout_causal_same_frontier_hint_retreats_other_peers() { + let mut t = Tracker::new(); + t.insert(IdFull::new(2, 0, 0), 0, RichtextChunk::new_text(0..2)); + t.insert(IdFull::new(1, 0, 0), 2, RichtextChunk::new_text(2..4)); + assert_eq!(t.rope.len(), 4); + assert_eq!(t.current_frontier_hint, Some(ID::new(1, 1))); + + let base = ImVersionVector::new(); + t.checkout_causal(CausalVersion::new(&base, 1, 2, Some(ID::new(1, 1)))); + + assert_eq!(t.rope.len(), 2); + assert_eq!(t.current_vv, vv!(1 => 2)); + } + #[test] fn test_retreat_and_forward_delete() { let mut t = Tracker::new(); diff --git a/crates/loro-internal/src/diff/diff_impl.rs b/crates/loro-internal/src/diff/diff_impl.rs index aef44cd5b..79a7a9b07 100644 --- a/crates/loro-internal/src/diff/diff_impl.rs +++ b/crates/loro-internal/src/diff/diff_impl.rs @@ -15,6 +15,7 @@ //! The implementation of this algorithm is based on the implementation by //! Brandon Williams. use crate::change::get_sys_timestamp; +use loro_common::ContainerID; use rustc_hash::FxHashMap; use std::cmp::Ordering; use std::collections::BinaryHeap; @@ -44,6 +45,10 @@ impl Default for UpdateOptions { pub enum UpdateTimeoutError { #[error("Timeout")] Timeout, + #[error( + "The container {container} is deleted. You cannot apply the op on a deleted container." + )] + ContainerDeleted { container: Box }, } /// Utility function to check if a range is empty that works on older rust versions diff --git a/crates/loro-internal/src/handler.rs b/crates/loro-internal/src/handler.rs index 24761691a..359af4d60 100644 --- a/crates/loro-internal/src/handler.rs +++ b/crates/loro-internal/src/handler.rs @@ -8,7 +8,7 @@ use crate::{ }, cursor::{Cursor, Side}, delta::{DeltaItem, Meta, StyleMeta, TreeExternalDiff}, - diff::{diff, diff_impl::UpdateTimeoutError, OperateProxy}, + diff::{diff, OperateProxy}, event::{Diff, TextDiff, TextDiffItem, TextMeta}, op::ListSlice, state::{IndexType, State, TreeParentId}, @@ -28,7 +28,7 @@ use serde::{Deserialize, Serialize}; use std::{borrow::Cow, cmp::Reverse, collections::BinaryHeap, fmt::Debug, ops::Deref, sync::Arc}; use tracing::{error, instrument}; -pub use crate::diff::diff_impl::UpdateOptions; +pub use crate::diff::diff_impl::{UpdateOptions, UpdateTimeoutError}; pub use tree::TreeHandler; mod movable_list_apply_delta; mod tree; @@ -2398,6 +2398,7 @@ impl TextHandler { } pub fn update(&self, text: &str, options: UpdateOptions) -> Result<(), UpdateTimeoutError> { + self.ensure_not_deleted_for_update()?; let old_str = self.to_string(); let new = text.chars().map(|x| x as u32).collect::>(); let old = old_str.chars().map(|x| x as u32).collect::>(); @@ -2415,6 +2416,7 @@ impl TextHandler { text: &str, options: UpdateOptions, ) -> Result<(), UpdateTimeoutError> { + self.ensure_not_deleted_for_update()?; let hook = text_update::DiffHookForLine::new(self, text); let old_lines = hook.get_old_arr().to_vec(); let new_lines = hook.get_new_arr().to_vec(); @@ -2426,6 +2428,16 @@ impl TextHandler { ) } + fn ensure_not_deleted_for_update(&self) -> Result<(), UpdateTimeoutError> { + if self.is_deleted() { + return Err(UpdateTimeoutError::ContainerDeleted { + container: Box::new(self.id()), + }); + } + + Ok(()) + } + #[allow(clippy::inherent_to_string)] pub fn to_string(&self) -> String { match &self.inner { diff --git a/crates/loro-wasm/src/lib.rs b/crates/loro-wasm/src/lib.rs index 437d5e2a3..3d374b851 100644 --- a/crates/loro-wasm/src/lib.rs +++ b/crates/loro-wasm/src/lib.rs @@ -20,7 +20,7 @@ use loro_internal::{ event::Index, handler::{ Handler, ListHandler, MapHandler, TextDelta, TextHandler, TreeHandler, UpdateOptions, - ValueOrHandler, + UpdateTimeoutError, ValueOrHandler, }, id::{Counter, PeerID, TreeID, ID}, loro::{CommitOptions, ExportMode}, @@ -2490,6 +2490,13 @@ fn convert_container_path_to_js_value(path: &[(ContainerID, Index)]) -> JsContai v.into() } +fn update_error_to_js(e: UpdateTimeoutError) -> JsValue { + match e { + UpdateTimeoutError::Timeout => JsError::new("Update timeout").into(), + err => JsError::new(&err.to_string()).into(), + } +} + /// The handler of a text container. It supports rich text CRDT. /// /// Learn more at https://loro.dev/docs/tutorial/text @@ -2604,7 +2611,7 @@ impl LoroText { }; self.handler .update(text, options) - .map_err(|_| JsError::new("Update timeout").into()) + .map_err(update_error_to_js) } /// Update the current text to the target text, the difference is calculated line by line. @@ -2634,7 +2641,7 @@ impl LoroText { }; self.handler .update_by_line(text, options) - .map_err(|_| JsError::new("Update timeout").into()) + .map_err(update_error_to_js) } /// Insert the string at the given index (utf-16 index). diff --git a/crates/loro/tests/loro_rust_test.rs b/crates/loro/tests/loro_rust_test.rs index 31182e564..3382f7d30 100644 --- a/crates/loro/tests/loro_rust_test.rs +++ b/crates/loro/tests/loro_rust_test.rs @@ -18,7 +18,7 @@ use loro::{ event::{Diff, DiffBatch, ListDiffItem}, loro_value, CommitOptions, ContainerID, ContainerTrait, ContainerType, ExportMode, Frontiers, FrontiersNotIncluded, IdSpan, Index, LoroDoc, LoroError, LoroList, LoroMap, LoroMapValue, - LoroStringValue, LoroText, LoroValue, ToJson, TreeParentId, + LoroStringValue, LoroText, LoroValue, ToJson, TreeParentId, UpdateTimeoutError, }; use loro_internal::{ encoding::EncodedBlobMode, fx_map, handler::TextDelta, id::ID, version_range, vv, LoroResult, @@ -1780,6 +1780,24 @@ fn perform_action_on_deleted_container_should_return_error() { assert!(text.is_deleted()); } +#[test] +#[parallel] +fn update_deleted_text_should_return_error() { + let doc = LoroDoc::new(); + let list = doc.get_movable_list("list"); + let text = list.push_container(LoroText::new()).unwrap(); + list.set(0, 1).unwrap(); + + assert!(matches!( + text.update("Hello", Default::default()), + Err(UpdateTimeoutError::ContainerDeleted { .. }) + )); + assert!(matches!( + text.update_by_line("Hello", Default::default()), + Err(UpdateTimeoutError::ContainerDeleted { .. }) + )); +} + #[test] #[parallel] fn checkout_should_reset_container_deleted_cache() { From 4ce926f9188ee783cdfd35973ea84708223454bf Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Wed, 22 Apr 2026 15:38:41 +0000 Subject: [PATCH 06/46] fix: handle shallow root frontiers in fuzzed imports --- crates/fuzz/src/container/mod.rs | 1 + crates/loro-internal/src/oplog/loro_dag.rs | 15 +++++---- .../integration_test/shallow_snapshot_test.rs | 31 +++++++++++++++++++ 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/crates/fuzz/src/container/mod.rs b/crates/fuzz/src/container/mod.rs index 0574365b3..7de74ede9 100644 --- a/crates/fuzz/src/container/mod.rs +++ b/crates/fuzz/src/container/mod.rs @@ -17,6 +17,7 @@ fn unwrap(r: LoroResult) -> Option { match r { Ok(v) => Some(v), Err(LoroError::ContainerDeleted { .. }) => None, + Err(LoroError::TreeError(LoroTreeError::CyclicMoveError)) => None, Err(LoroError::TreeError(LoroTreeError::TreeNodeDeletedOrNotExist(..))) => None, Err(e) => panic!("Error: {}", e), } diff --git a/crates/loro-internal/src/oplog/loro_dag.rs b/crates/loro-internal/src/oplog/loro_dag.rs index e18c197fe..b0453d790 100644 --- a/crates/loro-internal/src/oplog/loro_dag.rs +++ b/crates/loro-internal/src/oplog/loro_dag.rs @@ -585,15 +585,14 @@ impl AppDag { return true; } - if deps.iter().any(|x| self.shallow_since_vv.includes_id(x)) { - return true; - } + for id in deps.iter() { + if self.shallow_since_frontiers.contains(&id) { + continue; + } - if deps - .iter() - .any(|x| self.shallow_since_frontiers.contains(&x)) - { - return deps != &self.shallow_since_frontiers; + if self.shallow_since_vv.includes_id(id) { + return true; + } } false diff --git a/crates/loro/tests/integration_test/shallow_snapshot_test.rs b/crates/loro/tests/integration_test/shallow_snapshot_test.rs index d4d43b508..87eeadc94 100644 --- a/crates/loro/tests/integration_test/shallow_snapshot_test.rs +++ b/crates/loro/tests/integration_test/shallow_snapshot_test.rs @@ -40,6 +40,37 @@ fn state_only_at_concurrent_frontiers_excludes_later_ops() -> anyhow::Result<()> Ok(()) } +#[test] +fn state_only_import_allows_frontiers_that_include_shallow_root() -> anyhow::Result<()> { + let doc = LoroDoc::new(); + doc.set_peer_id(1)?; + doc.set_change_merge_interval(0); + + let text = doc.get_text("text"); + text.insert(0, "root")?; + doc.commit(); + let shallow_root = doc.state_frontiers(); + + doc.set_peer_id(2)?; + text.insert(text.len_unicode(), " latest")?; + doc.commit(); + let latest = doc.state_frontiers(); + let expected = doc.get_deep_value(); + + let target = Frontiers::from([ + shallow_root.as_single().unwrap(), + latest.as_single().unwrap(), + ]); + let bytes = doc.export(ExportMode::state_only(Some(&target)))?; + let new_doc = LoroDoc::new(); + new_doc.import(&bytes)?; + + assert!(new_doc.is_shallow()); + assert_eq!(new_doc.shallow_since_frontiers(), shallow_root); + assert_eq!(new_doc.get_deep_value(), expected); + Ok(()) +} + #[test] fn test_gc() -> anyhow::Result<()> { let doc = LoroDoc::new(); From 87dd3333dd5703b986cb84971cb54b67e412c37d Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 8 May 2026 04:26:54 +0000 Subject: [PATCH 07/46] fix: clear deleted cache on checkout --- crates/loro-internal/src/loro.rs | 4 ++-- crates/loro-internal/src/state.rs | 11 ++++------- crates/loro/tests/loro_rust_test.rs | 17 +++++++++++++++++ 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/crates/loro-internal/src/loro.rs b/crates/loro-internal/src/loro.rs index 4d432d41a..be0f44160 100644 --- a/crates/loro-internal/src/loro.rs +++ b/crates/loro-internal/src/loro.rs @@ -2442,8 +2442,8 @@ fn find_last_delete_op(oplog: &OpLog, id: ID, idx: ContainerIdx) -> Option { if let InnerContent::List(InnerListOp::Delete(d)) = &op.content { if d.id_start.to_span(d.atom_len()).contains(id) { debug_assert!(op.counter >= change.id().counter); - let op_lamport = change.lamport - + (op.counter - change.id().counter) as loro_common::Lamport; + let op_lamport = + change.lamport + (op.counter - change.id().counter) as loro_common::Lamport; let key = (op_lamport, peer); if best.map_or(true, |(bk, _)| key > bk) { best = Some((key, ID::new(peer, op.counter))); diff --git a/crates/loro-internal/src/state.rs b/crates/loro-internal/src/state.rs index 545d414ac..2ba11ddfb 100644 --- a/crates/loro-internal/src/state.rs +++ b/crates/loro-internal/src/state.rs @@ -541,13 +541,10 @@ impl DocState { return Err(LoroError::internal("state apply failpoint")); } } - match diff_mode { - DiffMode::Checkout => { - self.dead_containers_cache.clear(); - } - _ => { - self.dead_containers_cache.clear_alive(); - } + if diff.by.is_checkout() || diff_mode == DiffMode::Checkout { + self.dead_containers_cache.clear(); + } else { + self.dead_containers_cache.clear_alive(); } self.pre_txn(diff.origin.clone(), diff.by); diff --git a/crates/loro/tests/loro_rust_test.rs b/crates/loro/tests/loro_rust_test.rs index 45f145c55..b104a03a2 100644 --- a/crates/loro/tests/loro_rust_test.rs +++ b/crates/loro/tests/loro_rust_test.rs @@ -1843,6 +1843,23 @@ fn checkout_should_reset_container_deleted_cache() { assert!(!text.is_deleted()); } +#[test] +#[parallel] +fn checkout_forward_should_reset_container_deleted_cache() { + let doc = LoroDoc::new(); + let list = doc.get_movable_list("list"); + let text = list.push_container(LoroText::new()).unwrap(); + doc.commit(); + let f = doc.state_frontiers(); + + doc.checkout(&Frontiers::default()).unwrap(); + // This populates the deleted-container cache. In debug builds, is_deleted() + // recomputes and repairs stale entries; release builds return from the cache. + assert!(text.is_deleted()); + doc.checkout(&f).unwrap(); + assert!(!text.is_deleted()); +} + #[test] #[parallel] fn test_fork_at_target_frontiers() { From 6e1b49acbe1a4869a9fbc995d941edaeb5ff2ddc Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 8 May 2026 11:28:27 +0000 Subject: [PATCH 08/46] fix: reject partial shallow root checkout --- .../src/encoding/shallow_snapshot.rs | 30 +++-------- crates/loro-internal/src/oplog/loro_dag.rs | 51 ++++++++++++++----- .../integration_test/shallow_snapshot_test.rs | 33 ++++++++++++ 3 files changed, 77 insertions(+), 37 deletions(-) diff --git a/crates/loro-internal/src/encoding/shallow_snapshot.rs b/crates/loro-internal/src/encoding/shallow_snapshot.rs index c4991bcbd..c47f09e42 100644 --- a/crates/loro-internal/src/encoding/shallow_snapshot.rs +++ b/crates/loro-internal/src/encoding/shallow_snapshot.rs @@ -41,26 +41,6 @@ pub(crate) fn export_shallow_snapshot_inner( start_vv.insert(id.peer, id.counter); } - #[cfg(debug_assertions)] - { - use crate::dag::Dag; - if !start_from.is_empty() { - assert!(start_from.len() == 1); - let id = start_from.as_single().unwrap(); - let node = oplog.dag.get(id).unwrap(); - if id.counter == node.cnt { - let vv = oplog.dag().frontiers_to_vv(&node.deps).unwrap(); - assert_eq!(vv, start_vv); - } else { - let vv = oplog - .dag() - .frontiers_to_vv(&Frontiers::from(id.inc(-1))) - .unwrap(); - assert_eq!(vv, start_vv); - } - } - } - loro_common::debug!( "start version vv={:?} frontiers={:?}", &start_vv, @@ -264,10 +244,12 @@ fn calc_shallow_doc_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Fronti } i += 2; } - if next == current { - // Cannot converge further (pairwise GCAs are the nodes themselves). - // Fall back to empty frontiers, meaning export full history. - return Frontiers::default(); + if next.is_empty() || next == current { + // Cannot converge further (no non-empty GCA, or pairwise GCAs are + // the nodes themselves). + // Keep the multi-frontier start so the shallow root still represents + // the complete boundary instead of falling back to full history. + break; } current = next; } diff --git a/crates/loro-internal/src/oplog/loro_dag.rs b/crates/loro-internal/src/oplog/loro_dag.rs index 86faf5ab1..51451fca0 100644 --- a/crates/loro-internal/src/oplog/loro_dag.rs +++ b/crates/loro-internal/src/oplog/loro_dag.rs @@ -635,11 +635,16 @@ impl AppDag { self.frontiers = v.frontiers; if let Some((vv, f)) = v.start_version { if !f.is_empty() { - assert!(f.len() == 1); - let id = f.as_single().unwrap(); - let node = self.get(id).unwrap(); - assert!(node.cnt == id.counter); - self.shallow_root_frontiers_deps = node.deps.clone(); + let deps: Frontiers = vv + .iter() + .filter_map(|(&peer, &counter)| { + (counter > 0).then_some(ID::new(peer, counter - 1)) + }) + .collect(); + for id in f.iter() { + self.get(id).unwrap(); + } + self.shallow_root_frontiers_deps = deps; } self.shallow_since_frontiers = f; self.shallow_since_vv = ImVersionVector::from_vv(&vv); @@ -779,17 +784,24 @@ impl AppDag { return true; } - for id in deps.iter() { - if self.shallow_since_frontiers.contains(&id) { - continue; - } + let Some(vv) = self.frontiers_to_vv(deps) else { + return deps.iter().any(|id| self.shallow_since_vv.includes_id(id)); + }; - if self.shallow_since_vv.includes_id(id) { - return true; - } + if self + .shallow_since_vv + .iter() + .any(|(&peer, &counter)| vv.get(&peer).copied().unwrap_or(0) < counter) + { + return true; } - false + // The shallow boundary can be a multi-frontier root. A target at that + // boundary must include every root frontier; a proper subset is not a + // representable state in the shallow history. + self.shallow_since_frontiers + .iter() + .any(|id| !vv.includes_id(id)) } /// Travel the ancestors of the given id, and call the callback for each node @@ -1073,6 +1085,10 @@ impl AppDag { } else { let mut all_deps_processed = true; for id in top_node.deps.iter() { + if self.shallow_since_vv.includes_id(id) { + continue; + } + let node = self.get(id).expect("deps should be in the dag"); if node.vv.get().is_none() { if all_deps_processed { @@ -1089,6 +1105,15 @@ impl AppDag { } for id in top_node.deps.iter() { + if self.shallow_since_vv.includes_id(id) { + if ans_vv.is_empty() { + ans_vv = self.shallow_since_vv.clone(); + } else { + ans_vv.extend_to_include_vv(self.shallow_since_vv.iter()); + } + continue; + } + let node = self.get(id).expect("deps should be in the dag"); let dep_vv = node.vv.get().unwrap(); if ans_vv.is_empty() { diff --git a/crates/loro/tests/integration_test/shallow_snapshot_test.rs b/crates/loro/tests/integration_test/shallow_snapshot_test.rs index 4502fcd97..61a3a6190 100644 --- a/crates/loro/tests/integration_test/shallow_snapshot_test.rs +++ b/crates/loro/tests/integration_test/shallow_snapshot_test.rs @@ -71,6 +71,39 @@ fn state_only_import_allows_frontiers_that_include_shallow_root() -> anyhow::Res Ok(()) } +#[test] +fn checkout_subset_of_multi_frontier_shallow_root_should_error() -> anyhow::Result<()> { + let doc = LoroDoc::new(); + doc.set_detached_editing(true); + + doc.set_peer_id(1)?; + doc.get_text("left").insert(0, "left")?; + doc.commit(); + let left = doc.state_frontiers(); + + doc.checkout(&Frontiers::default())?; + doc.set_peer_id(2)?; + doc.get_text("right").insert(0, "right")?; + doc.commit(); + let right = doc.state_frontiers(); + + let mut shallow_root = left.clone(); + shallow_root.merge_with_greater(&right); + let shallow_root = doc + .minimize_frontiers(&shallow_root) + .expect("frontiers should be reachable"); + assert_eq!(shallow_root.len(), 2); + + doc.checkout(&shallow_root)?; + let bytes = doc.export(ExportMode::shallow_snapshot(&shallow_root))?; + let shallow_doc = LoroDoc::new(); + shallow_doc.import(&bytes)?; + + let subset = Frontiers::from([shallow_root.iter().next().unwrap()]); + assert!(shallow_doc.checkout(&subset).is_err()); + Ok(()) +} + #[test] fn test_gc() -> anyhow::Result<()> { let doc = LoroDoc::new(); From 162ff392befeac51b3665a2f99da8be014535121 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 8 May 2026 11:51:40 +0000 Subject: [PATCH 09/46] fix: reexport shallow root snapshots --- .../src/encoding/shallow_snapshot.rs | 4 ++ .../integration_test/shallow_snapshot_test.rs | 46 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/crates/loro-internal/src/encoding/shallow_snapshot.rs b/crates/loro-internal/src/encoding/shallow_snapshot.rs index c47f09e42..f50e5476d 100644 --- a/crates/loro-internal/src/encoding/shallow_snapshot.rs +++ b/crates/loro-internal/src/encoding/shallow_snapshot.rs @@ -224,6 +224,10 @@ fn restore_export_doc_state( /// It should be the LCA of the user given version and the latest version. /// Otherwise, users cannot replay the history from the initial version till the latest version. fn calc_shallow_doc_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Frontiers { + if !oplog.shallow_since_vv().is_empty() && frontiers == oplog.shallow_since_frontiers() { + return frontiers.clone(); + } + // Find the LCA of the given frontiers by iteratively pairwise GCA. // This converges to a single frontier or empty if there is no common ancestor. let mut current = frontiers.clone(); diff --git a/crates/loro/tests/integration_test/shallow_snapshot_test.rs b/crates/loro/tests/integration_test/shallow_snapshot_test.rs index 61a3a6190..2e75b79be 100644 --- a/crates/loro/tests/integration_test/shallow_snapshot_test.rs +++ b/crates/loro/tests/integration_test/shallow_snapshot_test.rs @@ -104,6 +104,52 @@ fn checkout_subset_of_multi_frontier_shallow_root_should_error() -> anyhow::Resu Ok(()) } +#[test] +fn reexport_multi_frontier_shallow_root_snapshot_imports() -> anyhow::Result<()> { + let doc = LoroDoc::new(); + doc.set_detached_editing(true); + + doc.set_peer_id(1)?; + doc.get_text("left").insert(0, "left")?; + doc.commit(); + let left = doc.state_frontiers(); + + doc.checkout(&Frontiers::default())?; + doc.set_peer_id(2)?; + doc.get_text("right").insert(0, "right")?; + doc.commit(); + let right = doc.state_frontiers(); + + let mut shallow_root = left.clone(); + shallow_root.merge_with_greater(&right); + let shallow_root = doc + .minimize_frontiers(&shallow_root) + .expect("frontiers should be reachable"); + doc.checkout(&shallow_root)?; + let expected = doc.get_deep_value(); + + let bytes = doc.export(ExportMode::shallow_snapshot(&shallow_root))?; + let imported = LoroDoc::new(); + imported.import(&bytes)?; + + let reexported = match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + imported.export(ExportMode::shallow_snapshot(&shallow_root)) + })) { + Ok(result) => result?, + Err(_) => { + std::mem::forget(imported); + panic!("re-exporting a multi-frontier shallow root snapshot should not panic"); + } + }; + let imported_again = LoroDoc::new(); + imported_again.import(&reexported)?; + + assert!(imported_again.is_shallow()); + assert_eq!(imported_again.shallow_since_frontiers(), shallow_root); + assert_eq!(imported_again.get_deep_value(), expected); + Ok(()) +} + #[test] fn test_gc() -> anyhow::Result<()> { let doc = LoroDoc::new(); From dccd17259630b3f4b79b49adc303fafeb2232115 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 8 May 2026 12:04:54 +0000 Subject: [PATCH 10/46] fix: avoid shallow gca on reexport --- .../src/encoding/shallow_snapshot.rs | 5 +- .../integration_test/shallow_snapshot_test.rs | 156 +++++++++++++++--- 2 files changed, 137 insertions(+), 24 deletions(-) diff --git a/crates/loro-internal/src/encoding/shallow_snapshot.rs b/crates/loro-internal/src/encoding/shallow_snapshot.rs index f50e5476d..64418c585 100644 --- a/crates/loro-internal/src/encoding/shallow_snapshot.rs +++ b/crates/loro-internal/src/encoding/shallow_snapshot.rs @@ -224,7 +224,10 @@ fn restore_export_doc_state( /// It should be the LCA of the user given version and the latest version. /// Otherwise, users cannot replay the history from the initial version till the latest version. fn calc_shallow_doc_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Frontiers { - if !oplog.shallow_since_vv().is_empty() && frontiers == oplog.shallow_since_frontiers() { + if !oplog.shallow_since_vv().is_empty() { + // The target frontiers have already been checked by the caller. On a + // shallow doc, searching for a lower GCA can walk into trimmed history. + // Keep the requested boundary instead. return frontiers.clone(); } diff --git a/crates/loro/tests/integration_test/shallow_snapshot_test.rs b/crates/loro/tests/integration_test/shallow_snapshot_test.rs index 2e75b79be..118d4fccf 100644 --- a/crates/loro/tests/integration_test/shallow_snapshot_test.rs +++ b/crates/loro/tests/integration_test/shallow_snapshot_test.rs @@ -6,6 +6,33 @@ use std::{ use super::gen_action; use loro::{cursor::CannotFindRelativePosition, ExportMode, Frontiers, LoroDoc, ID}; +fn multi_frontier_shallow_snapshot() -> anyhow::Result<(Vec, Frontiers, loro::LoroValue)> { + let doc = LoroDoc::new(); + doc.set_detached_editing(true); + + doc.set_peer_id(1)?; + doc.get_text("left").insert(0, "left")?; + doc.commit(); + let left = doc.state_frontiers(); + + doc.checkout(&Frontiers::default())?; + doc.set_peer_id(2)?; + doc.get_text("right").insert(0, "right")?; + doc.commit(); + let right = doc.state_frontiers(); + + let mut shallow_root = left.clone(); + shallow_root.merge_with_greater(&right); + let shallow_root = doc + .minimize_frontiers(&shallow_root) + .expect("frontiers should be reachable"); + doc.checkout(&shallow_root)?; + let expected = doc.get_deep_value(); + + let bytes = doc.export(ExportMode::shallow_snapshot(&shallow_root))?; + Ok((bytes, shallow_root, expected)) +} + #[test] fn state_only_at_concurrent_frontiers_excludes_later_ops() -> anyhow::Result<()> { let doc = LoroDoc::new(); @@ -106,29 +133,7 @@ fn checkout_subset_of_multi_frontier_shallow_root_should_error() -> anyhow::Resu #[test] fn reexport_multi_frontier_shallow_root_snapshot_imports() -> anyhow::Result<()> { - let doc = LoroDoc::new(); - doc.set_detached_editing(true); - - doc.set_peer_id(1)?; - doc.get_text("left").insert(0, "left")?; - doc.commit(); - let left = doc.state_frontiers(); - - doc.checkout(&Frontiers::default())?; - doc.set_peer_id(2)?; - doc.get_text("right").insert(0, "right")?; - doc.commit(); - let right = doc.state_frontiers(); - - let mut shallow_root = left.clone(); - shallow_root.merge_with_greater(&right); - let shallow_root = doc - .minimize_frontiers(&shallow_root) - .expect("frontiers should be reachable"); - doc.checkout(&shallow_root)?; - let expected = doc.get_deep_value(); - - let bytes = doc.export(ExportMode::shallow_snapshot(&shallow_root))?; + let (bytes, shallow_root, expected) = multi_frontier_shallow_snapshot()?; let imported = LoroDoc::new(); imported.import(&bytes)?; @@ -150,6 +155,111 @@ fn reexport_multi_frontier_shallow_root_snapshot_imports() -> anyhow::Result<()> Ok(()) } +#[test] +fn snapshot_export_preserves_multi_frontier_shallow_root() -> anyhow::Result<()> { + let (bytes, shallow_root, expected) = multi_frontier_shallow_snapshot()?; + let imported = LoroDoc::new(); + imported.import(&bytes)?; + + let snapshot = imported.export(ExportMode::Snapshot)?; + let imported_again = LoroDoc::new(); + imported_again.import(&snapshot)?; + + assert!(imported_again.is_shallow()); + assert_eq!(imported_again.shallow_since_frontiers(), shallow_root); + assert_eq!(imported_again.get_deep_value(), expected); + Ok(()) +} + +#[test] +fn state_only_export_preserves_multi_frontier_shallow_root() -> anyhow::Result<()> { + let (bytes, shallow_root, expected) = multi_frontier_shallow_snapshot()?; + let imported = LoroDoc::new(); + imported.import(&bytes)?; + + let state_only = imported.export(ExportMode::state_only(Some(&shallow_root)))?; + let imported_again = LoroDoc::new(); + imported_again.import(&state_only)?; + + assert!(imported_again.is_shallow()); + assert_eq!(imported_again.shallow_since_frontiers(), shallow_root); + assert_eq!(imported_again.get_deep_value(), expected); + Ok(()) +} + +#[test] +fn state_correctness_check_handles_multi_frontier_shallow_root() -> anyhow::Result<()> { + let (bytes, _, _) = multi_frontier_shallow_snapshot()?; + let imported = LoroDoc::new(); + imported.import(&bytes)?; + + imported.check_state_correctness_slow(); + Ok(()) +} + +#[test] +fn shallow_doc_with_multi_frontier_root_can_export_concurrent_tail() -> anyhow::Result<()> { + let (bytes, shallow_root, _) = multi_frontier_shallow_snapshot()?; + let imported = LoroDoc::new(); + imported.import(&bytes)?; + imported.set_detached_editing(true); + + imported.checkout(&shallow_root)?; + imported.set_peer_id(3)?; + imported.get_text("tail_a").insert(0, "a")?; + imported.get_tree("tail_tree").create(None)?; + imported.commit(); + let tail_a = imported.state_frontiers(); + + imported.checkout(&shallow_root)?; + imported.set_peer_id(4)?; + imported.get_text("tail_b").insert(0, "b")?; + imported.get_tree("tail_tree").create(None)?; + imported.commit(); + let tail_b = imported.state_frontiers(); + + let mut target = tail_a; + target.merge_with_greater(&tail_b); + let target = imported + .minimize_frontiers(&target) + .expect("tail frontiers should be reachable"); + imported.checkout(&target)?; + let expected = imported.get_deep_value(); + + imported.checkout(&shallow_root)?; + imported.checkout(&target)?; + assert_eq!(imported.get_deep_value(), expected); + + let bytes = imported.export(ExportMode::shallow_snapshot(&target))?; + let imported_again = LoroDoc::new(); + imported_again.import(&bytes)?; + + assert!(imported_again.is_shallow()); + assert_eq!(imported_again.get_deep_value(), expected); + + let state_only = imported.export(ExportMode::state_only(Some(&target)))?; + let state_only_imported = LoroDoc::new(); + state_only_imported.import(&state_only)?; + + assert!(state_only_imported.is_shallow()); + assert_eq!(state_only_imported.get_deep_value(), expected); + + let latest_state_only = imported.export(ExportMode::state_only(None))?; + let latest_state_only_imported = LoroDoc::new(); + latest_state_only_imported.import(&latest_state_only)?; + + assert!(latest_state_only_imported.is_shallow()); + assert_eq!(latest_state_only_imported.get_deep_value(), expected); + + let snapshot = imported.export(ExportMode::Snapshot)?; + let snapshot_imported = LoroDoc::new(); + snapshot_imported.import(&snapshot)?; + + assert!(snapshot_imported.is_shallow()); + assert_eq!(snapshot_imported.get_deep_value(), expected); + Ok(()) +} + #[test] fn test_gc() -> anyhow::Result<()> { let doc = LoroDoc::new(); From 4ce3408c2a515fcea28f84993cd0936a17700dcc Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 8 May 2026 12:22:58 +0000 Subject: [PATCH 11/46] fix: reject unreachable shallow frontiers --- crates/loro-internal/src/oplog/loro_dag.rs | 14 +++++++- .../integration_test/shallow_snapshot_test.rs | 35 +++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/crates/loro-internal/src/oplog/loro_dag.rs b/crates/loro-internal/src/oplog/loro_dag.rs index 51451fca0..a95ad47be 100644 --- a/crates/loro-internal/src/oplog/loro_dag.rs +++ b/crates/loro-internal/src/oplog/loro_dag.rs @@ -788,6 +788,14 @@ impl AppDag { return deps.iter().any(|id| self.shallow_since_vv.includes_id(id)); }; + self.vv_is_before_shallow_root(&vv) + } + + fn vv_is_before_shallow_root(&self, vv: &VersionVector) -> bool { + if self.shallow_since_vv.is_empty() { + return false; + } + if self .shallow_since_vv .iter() @@ -1186,6 +1194,10 @@ impl AppDag { vv.extend_to_include_last_id(id); } + if self.vv_is_before_shallow_root(&vv) { + return None; + } + Some(vv) } @@ -1316,7 +1328,7 @@ impl AppDag { pub fn cmp_with_frontiers(&self, other: &Frontiers) -> Ordering { if &self.frontiers == other { Ordering::Equal - } else if other.iter().all(|id| self.vv.includes_id(id)) { + } else if self.frontiers_to_vv(other).is_some() { Ordering::Greater } else { Ordering::Less diff --git a/crates/loro/tests/integration_test/shallow_snapshot_test.rs b/crates/loro/tests/integration_test/shallow_snapshot_test.rs index 118d4fccf..c0690f951 100644 --- a/crates/loro/tests/integration_test/shallow_snapshot_test.rs +++ b/crates/loro/tests/integration_test/shallow_snapshot_test.rs @@ -131,6 +131,41 @@ fn checkout_subset_of_multi_frontier_shallow_root_should_error() -> anyhow::Resu Ok(()) } +#[test] +fn frontiers_to_vv_rejects_unrepresentable_shallow_root_versions() -> anyhow::Result<()> { + let (bytes, shallow_root, _) = multi_frontier_shallow_snapshot()?; + let shallow_doc = LoroDoc::new(); + shallow_doc.import(&bytes)?; + + let subset = Frontiers::from([shallow_root.iter().next().unwrap()]); + assert!(shallow_doc.frontiers_to_vv(&Frontiers::default()).is_none()); + assert!(shallow_doc.frontiers_to_vv(&subset).is_none()); + assert!(shallow_doc + .cmp_frontiers(&Frontiers::default(), &shallow_root) + .is_err()); + assert!(shallow_doc.cmp_frontiers(&subset, &shallow_root).is_err()); + assert_eq!( + shallow_doc.cmp_with_frontiers(&Frontiers::default()), + std::cmp::Ordering::Less + ); + assert_eq!( + shallow_doc.cmp_with_frontiers(&subset), + std::cmp::Ordering::Less + ); + + let shallow_root_vv = shallow_doc + .frontiers_to_vv(&shallow_root) + .expect("complete shallow root should be included"); + assert_eq!(shallow_doc.vv_to_frontiers(&shallow_root_vv), shallow_root); + assert_eq!( + shallow_doc + .cmp_frontiers(&shallow_root, &shallow_root) + .expect("complete shallow root should be comparable"), + Some(std::cmp::Ordering::Equal) + ); + Ok(()) +} + #[test] fn reexport_multi_frontier_shallow_root_snapshot_imports() -> anyhow::Result<()> { let (bytes, shallow_root, expected) = multi_frontier_shallow_snapshot()?; From df2498cd9125dfe173a5052e012ef6dc24deac51 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 8 May 2026 13:18:30 +0000 Subject: [PATCH 12/46] fix: reject shallow root dependency frontiers --- crates/loro-internal/src/oplog/loro_dag.rs | 2 +- .../integration_test/shallow_snapshot_test.rs | 32 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/crates/loro-internal/src/oplog/loro_dag.rs b/crates/loro-internal/src/oplog/loro_dag.rs index a95ad47be..270d96972 100644 --- a/crates/loro-internal/src/oplog/loro_dag.rs +++ b/crates/loro-internal/src/oplog/loro_dag.rs @@ -1183,7 +1183,7 @@ impl AppDag { pub fn frontiers_to_vv(&self, frontiers: &Frontiers) -> Option { if frontiers == &self.shallow_root_frontiers_deps { let vv = VersionVector::from_im_vv(&self.shallow_since_vv); - return Some(vv); + return (!self.vv_is_before_shallow_root(&vv)).then_some(vv); } let mut vv: VersionVector = Default::default(); diff --git a/crates/loro/tests/integration_test/shallow_snapshot_test.rs b/crates/loro/tests/integration_test/shallow_snapshot_test.rs index c0690f951..51686f2a6 100644 --- a/crates/loro/tests/integration_test/shallow_snapshot_test.rs +++ b/crates/loro/tests/integration_test/shallow_snapshot_test.rs @@ -166,6 +166,38 @@ fn frontiers_to_vv_rejects_unrepresentable_shallow_root_versions() -> anyhow::Re Ok(()) } +#[test] +fn frontiers_to_vv_rejects_shallow_root_deps() -> anyhow::Result<()> { + let doc = LoroDoc::new(); + doc.set_peer_id(1)?; + doc.get_text("text").insert(0, "abcdef")?; + doc.commit(); + + let shallow_root = Frontiers::from_id(ID::new(1, 3)); + let before_root = Frontiers::from_id(ID::new(1, 2)); + let bytes = doc.export(ExportMode::shallow_snapshot(&shallow_root))?; + let shallow_doc = LoroDoc::new(); + shallow_doc.import(&bytes)?; + + assert_eq!(shallow_doc.shallow_since_frontiers(), shallow_root); + assert!(shallow_doc.checkout(&before_root).is_err()); + assert!(shallow_doc + .export(ExportMode::shallow_snapshot(&before_root)) + .is_err()); + assert!(shallow_doc + .export(ExportMode::state_only(Some(&before_root))) + .is_err()); + assert!(shallow_doc.frontiers_to_vv(&before_root).is_none()); + assert!(shallow_doc + .cmp_frontiers(&before_root, &shallow_root) + .is_err()); + assert_eq!( + shallow_doc.cmp_with_frontiers(&before_root), + std::cmp::Ordering::Less + ); + Ok(()) +} + #[test] fn reexport_multi_frontier_shallow_root_snapshot_imports() -> anyhow::Result<()> { let (bytes, shallow_root, expected) = multi_frontier_shallow_snapshot()?; From fdedbd707e1f45bc0bc5912174d8c310bebfed99 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 8 May 2026 13:42:29 +0000 Subject: [PATCH 13/46] fix: guard shallow frontier utilities --- crates/loro-internal/src/loro.rs | 17 ++++++++- crates/loro-internal/src/oplog/loro_dag.rs | 4 +- crates/loro-internal/src/version.rs | 7 ++++ .../integration_test/shallow_snapshot_test.rs | 38 +++++++++++++++++++ 4 files changed, 63 insertions(+), 3 deletions(-) diff --git a/crates/loro-internal/src/loro.rs b/crates/loro-internal/src/loro.rs index be0f44160..fed627d26 100644 --- a/crates/loro-internal/src/loro.rs +++ b/crates/loro-internal/src/loro.rs @@ -13,7 +13,7 @@ use crate::{ IntoContainerId, }, cursor::{AbsolutePosition, CannotFindRelativePosition, Cursor, PosQueryResult}, - dag::{Dag, DagUtils}, + dag::Dag, diff_calc::DiffCalculator, encoding::{ self, decode_snapshot, export_fast_snapshot, export_fast_updates, @@ -2259,7 +2259,20 @@ impl LoroDoc { #[inline] pub fn find_id_spans_between(&self, from: &Frontiers, to: &Frontiers) -> VersionVectorDiff { - self.oplog().lock().dag.find_path(from, to) + let oplog = self.oplog().lock(); + let from_vv = oplog + .dag + .frontiers_to_vv(from) + .expect("from frontiers should be included by the document history"); + let to_vv = oplog + .dag + .frontiers_to_vv(to) + .expect("to frontiers should be included by the document history"); + + VersionVectorDiff { + retreat: from_vv.sub_vec(&to_vv), + forward: to_vv.sub_vec(&from_vv), + } } /// Subscribe to the first commit from a peer. Operations performed on the `LoroDoc` within this callback diff --git a/crates/loro-internal/src/oplog/loro_dag.rs b/crates/loro-internal/src/oplog/loro_dag.rs index 270d96972..2f97142a9 100644 --- a/crates/loro-internal/src/oplog/loro_dag.rs +++ b/crates/loro-internal/src/oplog/loro_dag.rs @@ -785,7 +785,9 @@ impl AppDag { } let Some(vv) = self.frontiers_to_vv(deps) else { - return deps.iter().any(|id| self.shallow_since_vv.includes_id(id)); + return deps.iter().any(|id| { + self.shallow_since_vv.includes_id(id) || self.shallow_since_frontiers.contains(&id) + }); }; self.vv_is_before_shallow_root(&vv) diff --git a/crates/loro-internal/src/version.rs b/crates/loro-internal/src/version.rs index 4598400e5..6b92fbfa4 100644 --- a/crates/loro-internal/src/version.rs +++ b/crates/loro-internal/src/version.rs @@ -924,6 +924,13 @@ impl VersionVector { pub fn shrink_frontiers(last_ids: &Frontiers, dag: &AppDag) -> Result { // it only keep the ids of ops that are concurrent to each other + if !last_ids.is_empty() && dag.is_before_shallow_root(last_ids) { + return Err(last_ids + .iter() + .next() + .expect("non-empty frontiers should have at least one id")); + } + if last_ids.len() <= 1 { return Ok(last_ids.clone()); } diff --git a/crates/loro/tests/integration_test/shallow_snapshot_test.rs b/crates/loro/tests/integration_test/shallow_snapshot_test.rs index 51686f2a6..349d29bfe 100644 --- a/crates/loro/tests/integration_test/shallow_snapshot_test.rs +++ b/crates/loro/tests/integration_test/shallow_snapshot_test.rs @@ -144,6 +144,7 @@ fn frontiers_to_vv_rejects_unrepresentable_shallow_root_versions() -> anyhow::Re .cmp_frontiers(&Frontiers::default(), &shallow_root) .is_err()); assert!(shallow_doc.cmp_frontiers(&subset, &shallow_root).is_err()); + assert!(shallow_doc.minimize_frontiers(&subset).is_err()); assert_eq!( shallow_doc.cmp_with_frontiers(&Frontiers::default()), std::cmp::Ordering::Less @@ -191,6 +192,7 @@ fn frontiers_to_vv_rejects_shallow_root_deps() -> anyhow::Result<()> { assert!(shallow_doc .cmp_frontiers(&before_root, &shallow_root) .is_err()); + assert!(shallow_doc.minimize_frontiers(&before_root).is_err()); assert_eq!( shallow_doc.cmp_with_frontiers(&before_root), std::cmp::Ordering::Less @@ -297,6 +299,42 @@ fn shallow_doc_with_multi_frontier_root_can_export_concurrent_tail() -> anyhow:: imported.checkout(&target)?; assert_eq!(imported.get_deep_value(), expected); + let root_to_target = imported.find_id_spans_between(&shallow_root, &target); + assert!(root_to_target.retreat.is_empty()); + assert!(root_to_target.forward.contains_key(&3)); + assert!(root_to_target.forward.contains_key(&4)); + + let target_to_root = imported.find_id_spans_between(&target, &shallow_root); + assert!(target_to_root.forward.is_empty()); + assert!(target_to_root.retreat.contains_key(&3)); + assert!(target_to_root.retreat.contains_key(&4)); + + let tail_updates = imported.export(ExportMode::updates_in_range( + root_to_target.get_id_spans_right().collect::>(), + ))?; + let updated_from_root = LoroDoc::new(); + updated_from_root.import(&bytes)?; + updated_from_root.import(&tail_updates)?; + assert_eq!(updated_from_root.get_deep_value(), expected); + + let root_vv = imported + .frontiers_to_vv(&shallow_root) + .expect("shallow root should be included"); + let target_vv = imported + .frontiers_to_vv(&target) + .expect("target should be included"); + let tail_json = imported.export_json_updates(&root_vv, &target_vv); + let json_updated_from_root = LoroDoc::new(); + json_updated_from_root.import(&bytes)?; + json_updated_from_root.import_json_updates(tail_json)?; + assert_eq!(json_updated_from_root.get_deep_value(), expected); + + let all_tail_json = imported.export_json_updates(&Default::default(), &target_vv); + let json_all_updated_from_root = LoroDoc::new(); + json_all_updated_from_root.import(&bytes)?; + json_all_updated_from_root.import_json_updates(all_tail_json)?; + assert_eq!(json_all_updated_from_root.get_deep_value(), expected); + let bytes = imported.export(ExportMode::shallow_snapshot(&target))?; let imported_again = LoroDoc::new(); imported_again.import(&bytes)?; From 5d046be6e3fe0c037e865618345472a0b3bb9146 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 8 May 2026 13:50:21 +0000 Subject: [PATCH 14/46] fix: clamp shallow frontier conversions --- crates/loro-internal/src/loro.rs | 24 ++++++++++++------- crates/loro-internal/src/oplog/loro_dag.rs | 11 +++++++++ .../loro/tests/contracts/version_frontiers.rs | 2 ++ .../integration_test/shallow_snapshot_test.rs | 11 ++++++++- 4 files changed, 39 insertions(+), 9 deletions(-) diff --git a/crates/loro-internal/src/loro.rs b/crates/loro-internal/src/loro.rs index fed627d26..b3c720754 100644 --- a/crates/loro-internal/src/loro.rs +++ b/crates/loro-internal/src/loro.rs @@ -2260,14 +2260,22 @@ impl LoroDoc { #[inline] pub fn find_id_spans_between(&self, from: &Frontiers, to: &Frontiers) -> VersionVectorDiff { let oplog = self.oplog().lock(); - let from_vv = oplog - .dag - .frontiers_to_vv(from) - .expect("from frontiers should be included by the document history"); - let to_vv = oplog - .dag - .frontiers_to_vv(to) - .expect("to frontiers should be included by the document history"); + let frontiers_to_vv = |frontiers: &Frontiers, side: &str| { + if let Some(vv) = oplog.dag.frontiers_to_vv(frontiers) { + return vv; + } + + if oplog.dag.is_before_shallow_root(frontiers) { + return oplog + .dag + .frontiers_to_vv(oplog.dag.shallow_since_frontiers()) + .expect("shallow root frontiers should be included by the document history"); + } + + panic!("{side} frontiers should be included by the document history"); + }; + let from_vv = frontiers_to_vv(from, "from"); + let to_vv = frontiers_to_vv(to, "to"); VersionVectorDiff { retreat: from_vv.sub_vec(&to_vv), diff --git a/crates/loro-internal/src/oplog/loro_dag.rs b/crates/loro-internal/src/oplog/loro_dag.rs index 2f97142a9..738c70e07 100644 --- a/crates/loro-internal/src/oplog/loro_dag.rs +++ b/crates/loro-internal/src/oplog/loro_dag.rs @@ -1237,6 +1237,13 @@ impl AppDag { return Default::default(); } + if !self.shallow_since_vv.is_empty() { + let version = VersionVector::from_im_vv(vv); + if self.vv_is_before_shallow_root(&version) { + return self.shallow_since_frontiers.clone(); + } + } + let this = vv; let last_ids: Frontiers = this .iter() @@ -1268,6 +1275,10 @@ impl AppDag { return Default::default(); } + if self.vv_is_before_shallow_root(vv) { + return self.shallow_since_frontiers.clone(); + } + let this = vv; let last_ids: Frontiers = this .iter() diff --git a/crates/loro/tests/contracts/version_frontiers.rs b/crates/loro/tests/contracts/version_frontiers.rs index ddf99ee5d..7a55fc2e2 100644 --- a/crates/loro/tests/contracts/version_frontiers.rs +++ b/crates/loro/tests/contracts/version_frontiers.rs @@ -362,6 +362,8 @@ fn frontiers_contracts_follow_semantics() -> anyhow::Result<()> { .expect("foreign frontiers should remain unchanged"), foreign.state_frontiers() ); + let foreign_vv = foreign.frontiers_to_vv(&foreign.state_frontiers()).unwrap(); + assert_eq!(doc.vv_to_frontiers(&foreign_vv), foreign.state_frontiers()); let minimized = doc .minimize_frontiers(&doc_frontiers) diff --git a/crates/loro/tests/integration_test/shallow_snapshot_test.rs b/crates/loro/tests/integration_test/shallow_snapshot_test.rs index 349d29bfe..9978c0891 100644 --- a/crates/loro/tests/integration_test/shallow_snapshot_test.rs +++ b/crates/loro/tests/integration_test/shallow_snapshot_test.rs @@ -4,7 +4,7 @@ use std::{ }; use super::gen_action; -use loro::{cursor::CannotFindRelativePosition, ExportMode, Frontiers, LoroDoc, ID}; +use loro::{cursor::CannotFindRelativePosition, ExportMode, Frontiers, LoroDoc, VersionVector, ID}; fn multi_frontier_shallow_snapshot() -> anyhow::Result<(Vec, Frontiers, loro::LoroValue)> { let doc = LoroDoc::new(); @@ -158,6 +158,9 @@ fn frontiers_to_vv_rejects_unrepresentable_shallow_root_versions() -> anyhow::Re .frontiers_to_vv(&shallow_root) .expect("complete shallow root should be included"); assert_eq!(shallow_doc.vv_to_frontiers(&shallow_root_vv), shallow_root); + let mut subset_vv = VersionVector::new(); + subset_vv.set_last(subset.as_single().unwrap()); + assert_eq!(shallow_doc.vv_to_frontiers(&subset_vv), shallow_root); assert_eq!( shallow_doc .cmp_frontiers(&shallow_root, &shallow_root) @@ -304,11 +307,17 @@ fn shallow_doc_with_multi_frontier_root_can_export_concurrent_tail() -> anyhow:: assert!(root_to_target.forward.contains_key(&3)); assert!(root_to_target.forward.contains_key(&4)); + let clamped_start_to_target = imported.find_id_spans_between(&Frontiers::default(), &target); + assert_eq!(clamped_start_to_target, root_to_target); + let target_to_root = imported.find_id_spans_between(&target, &shallow_root); assert!(target_to_root.forward.is_empty()); assert!(target_to_root.retreat.contains_key(&3)); assert!(target_to_root.retreat.contains_key(&4)); + let target_to_clamped_start = imported.find_id_spans_between(&target, &Frontiers::default()); + assert_eq!(target_to_clamped_start, target_to_root); + let tail_updates = imported.export(ExportMode::updates_in_range( root_to_target.get_id_spans_right().collect::>(), ))?; From 05a5b62439f08c8a47f5a75abb758c2ced755115 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 8 May 2026 13:57:02 +0000 Subject: [PATCH 15/46] fix: clamp empty shallow version frontiers --- crates/loro-internal/src/oplog/loro_dag.rs | 16 ++++++++-------- .../integration_test/shallow_snapshot_test.rs | 4 ++++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/crates/loro-internal/src/oplog/loro_dag.rs b/crates/loro-internal/src/oplog/loro_dag.rs index 738c70e07..847d3a424 100644 --- a/crates/loro-internal/src/oplog/loro_dag.rs +++ b/crates/loro-internal/src/oplog/loro_dag.rs @@ -1233,10 +1233,6 @@ impl AppDag { } pub fn im_vv_to_frontiers(&self, vv: &ImVersionVector) -> Frontiers { - if vv.is_empty() { - return Default::default(); - } - if !self.shallow_since_vv.is_empty() { let version = VersionVector::from_im_vv(vv); if self.vv_is_before_shallow_root(&version) { @@ -1244,6 +1240,10 @@ impl AppDag { } } + if vv.is_empty() { + return Default::default(); + } + let this = vv; let last_ids: Frontiers = this .iter() @@ -1271,14 +1271,14 @@ impl AppDag { } pub fn vv_to_frontiers(&self, vv: &VersionVector) -> Frontiers { - if vv.is_empty() { - return Default::default(); - } - if self.vv_is_before_shallow_root(vv) { return self.shallow_since_frontiers.clone(); } + if vv.is_empty() { + return Default::default(); + } + let this = vv; let last_ids: Frontiers = this .iter() diff --git a/crates/loro/tests/integration_test/shallow_snapshot_test.rs b/crates/loro/tests/integration_test/shallow_snapshot_test.rs index 9978c0891..b77606f75 100644 --- a/crates/loro/tests/integration_test/shallow_snapshot_test.rs +++ b/crates/loro/tests/integration_test/shallow_snapshot_test.rs @@ -157,6 +157,10 @@ fn frontiers_to_vv_rejects_unrepresentable_shallow_root_versions() -> anyhow::Re let shallow_root_vv = shallow_doc .frontiers_to_vv(&shallow_root) .expect("complete shallow root should be included"); + assert_eq!( + shallow_doc.vv_to_frontiers(&VersionVector::default()), + shallow_root + ); assert_eq!(shallow_doc.vv_to_frontiers(&shallow_root_vv), shallow_root); let mut subset_vv = VersionVector::new(); subset_vv.set_last(subset.as_single().unwrap()); From 0b656b71ff3898563585074a53c58d39d9478255 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 8 May 2026 14:07:21 +0000 Subject: [PATCH 16/46] fix: normalize shallow reexport frontiers --- .../src/encoding/shallow_snapshot.rs | 7 +-- .../integration_test/shallow_snapshot_test.rs | 53 +++++++++++++++++++ 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/crates/loro-internal/src/encoding/shallow_snapshot.rs b/crates/loro-internal/src/encoding/shallow_snapshot.rs index 64418c585..8adacfa89 100644 --- a/crates/loro-internal/src/encoding/shallow_snapshot.rs +++ b/crates/loro-internal/src/encoding/shallow_snapshot.rs @@ -9,7 +9,7 @@ use crate::{ dag::DagUtils, encoding::fast_snapshot::{_encode_snapshot, Snapshot}, state::container_store::FRONTIERS_KEY, - version::{Frontiers, VersionVector}, + version::{shrink_frontiers, Frontiers, VersionVector}, LoroDoc, }; @@ -227,8 +227,9 @@ fn calc_shallow_doc_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Fronti if !oplog.shallow_since_vv().is_empty() { // The target frontiers have already been checked by the caller. On a // shallow doc, searching for a lower GCA can walk into trimmed history. - // Keep the requested boundary instead. - return frontiers.clone(); + // Keep the requested boundary, but normalize redundant frontiers so the + // exported shallow root does not include an op and its ancestor together. + return shrink_frontiers(frontiers, oplog.dag()).unwrap_or_else(|_| frontiers.clone()); } // Find the LCA of the given frontiers by iteratively pairwise GCA. diff --git a/crates/loro/tests/integration_test/shallow_snapshot_test.rs b/crates/loro/tests/integration_test/shallow_snapshot_test.rs index b77606f75..d21b72e5d 100644 --- a/crates/loro/tests/integration_test/shallow_snapshot_test.rs +++ b/crates/loro/tests/integration_test/shallow_snapshot_test.rs @@ -263,6 +263,25 @@ fn state_only_export_preserves_multi_frontier_shallow_root() -> anyhow::Result<( Ok(()) } +#[test] +fn state_only_multi_frontier_shallow_root_can_accept_local_edits() -> anyhow::Result<()> { + let (bytes, shallow_root, _) = multi_frontier_shallow_snapshot()?; + let imported = LoroDoc::new(); + imported.import(&bytes)?; + let state_only = imported.export(ExportMode::state_only(Some(&shallow_root)))?; + + let edited = LoroDoc::new(); + edited.import(&state_only)?; + edited.set_peer_id(3)?; + edited.get_text("tail").insert(0, "tail")?; + edited.commit(); + + assert!(edited.is_shallow()); + assert_eq!(edited.shallow_since_frontiers(), shallow_root); + assert_eq!(edited.get_text("tail").to_string(), "tail"); + Ok(()) +} + #[test] fn state_correctness_check_handles_multi_frontier_shallow_root() -> anyhow::Result<()> { let (bytes, _, _) = multi_frontier_shallow_snapshot()?; @@ -273,6 +292,38 @@ fn state_correctness_check_handles_multi_frontier_shallow_root() -> anyhow::Resu Ok(()) } +#[test] +fn reexport_shallow_snapshot_with_redundant_root_frontier_imports() -> anyhow::Result<()> { + let (bytes, shallow_root, _) = multi_frontier_shallow_snapshot()?; + let imported = LoroDoc::new(); + imported.import(&bytes)?; + imported.set_detached_editing(true); + + imported.checkout(&shallow_root)?; + imported.set_peer_id(3)?; + imported.get_text("tail").insert(0, "tail")?; + imported.commit(); + let tail = imported.state_frontiers(); + let expected = imported.get_deep_value(); + + let mut redundant_target = tail.clone(); + redundant_target.push(shallow_root.iter().next().unwrap()); + let minimized_target = imported + .minimize_frontiers(&redundant_target) + .expect("target should be reachable"); + assert_ne!(minimized_target, redundant_target); + + let snapshot = imported.export(ExportMode::shallow_snapshot(&redundant_target))?; + let imported_again = LoroDoc::new(); + imported_again.import(&snapshot)?; + + assert!(imported_again.is_shallow()); + assert_eq!(imported_again.shallow_since_frontiers(), minimized_target); + assert_eq!(imported_again.get_deep_value(), expected); + assert!(imported_again.frontiers_to_vv(&minimized_target).is_some()); + Ok(()) +} + #[test] fn shallow_doc_with_multi_frontier_root_can_export_concurrent_tail() -> anyhow::Result<()> { let (bytes, shallow_root, _) = multi_frontier_shallow_snapshot()?; @@ -337,12 +388,14 @@ fn shallow_doc_with_multi_frontier_root_can_export_concurrent_tail() -> anyhow:: .frontiers_to_vv(&target) .expect("target should be included"); let tail_json = imported.export_json_updates(&root_vv, &target_vv); + assert_eq!(tail_json.start_version, shallow_root); let json_updated_from_root = LoroDoc::new(); json_updated_from_root.import(&bytes)?; json_updated_from_root.import_json_updates(tail_json)?; assert_eq!(json_updated_from_root.get_deep_value(), expected); let all_tail_json = imported.export_json_updates(&Default::default(), &target_vv); + assert_eq!(all_tail_json.start_version, shallow_root); let json_all_updated_from_root = LoroDoc::new(); json_all_updated_from_root.import(&bytes)?; json_all_updated_from_root.import_json_updates(all_tail_json)?; From 3e9edca4898cae6a2248e3b8ad9f854259029f12 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 8 May 2026 14:17:16 +0000 Subject: [PATCH 17/46] fix: normalize shallow snapshot targets --- .../src/encoding/shallow_snapshot.rs | 18 +++++-- .../integration_test/shallow_snapshot_test.rs | 47 +++++++++++++++++++ 2 files changed, 60 insertions(+), 5 deletions(-) diff --git a/crates/loro-internal/src/encoding/shallow_snapshot.rs b/crates/loro-internal/src/encoding/shallow_snapshot.rs index 8adacfa89..7d0729ed3 100644 --- a/crates/loro-internal/src/encoding/shallow_snapshot.rs +++ b/crates/loro-internal/src/encoding/shallow_snapshot.rs @@ -135,7 +135,7 @@ pub(crate) fn export_state_only_snapshot( w: &mut W, ) -> Result { let oplog = doc.oplog().lock(); - let start_from = calc_shallow_doc_start(&oplog, target_frontiers); + let start_from = calc_state_only_doc_start(&oplog, target_frontiers); let mut start_vv = frontiers_to_vv_for_export(&oplog, &start_from, "export_state_only_snapshot")?; for id in start_from.iter() { @@ -224,17 +224,25 @@ fn restore_export_doc_state( /// It should be the LCA of the user given version and the latest version. /// Otherwise, users cannot replay the history from the initial version till the latest version. fn calc_shallow_doc_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Frontiers { + let frontiers = shrink_frontiers(frontiers, oplog.dag()).unwrap_or_else(|_| frontiers.clone()); + calc_shallow_doc_start_from(oplog, frontiers) +} + +fn calc_state_only_doc_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Frontiers { + calc_shallow_doc_start_from(oplog, frontiers.clone()) +} + +fn calc_shallow_doc_start_from(oplog: &crate::OpLog, frontiers: Frontiers) -> Frontiers { if !oplog.shallow_since_vv().is_empty() { // The target frontiers have already been checked by the caller. On a // shallow doc, searching for a lower GCA can walk into trimmed history. - // Keep the requested boundary, but normalize redundant frontiers so the - // exported shallow root does not include an op and its ancestor together. - return shrink_frontiers(frontiers, oplog.dag()).unwrap_or_else(|_| frontiers.clone()); + // Keep the requested boundary. + return frontiers; } // Find the LCA of the given frontiers by iteratively pairwise GCA. // This converges to a single frontier or empty if there is no common ancestor. - let mut current = frontiers.clone(); + let mut current = frontiers; while current.len() > 1 { let ids: Vec = current.iter().collect(); let mut next = Frontiers::new(); diff --git a/crates/loro/tests/integration_test/shallow_snapshot_test.rs b/crates/loro/tests/integration_test/shallow_snapshot_test.rs index d21b72e5d..f1404854c 100644 --- a/crates/loro/tests/integration_test/shallow_snapshot_test.rs +++ b/crates/loro/tests/integration_test/shallow_snapshot_test.rs @@ -324,6 +324,53 @@ fn reexport_shallow_snapshot_with_redundant_root_frontier_imports() -> anyhow::R Ok(()) } +#[test] +fn shallow_snapshot_export_normalizes_redundant_target_frontiers() -> anyhow::Result<()> { + let doc = LoroDoc::new(); + doc.set_detached_editing(true); + + doc.set_peer_id(1)?; + doc.get_text("left").insert(0, "left")?; + doc.commit(); + let left = doc.state_frontiers(); + + doc.checkout(&Frontiers::default())?; + doc.set_peer_id(2)?; + doc.get_text("right").insert(0, "right")?; + doc.commit(); + let right = doc.state_frontiers(); + + let mut root = left.clone(); + root.merge_with_greater(&right); + let root = doc + .minimize_frontiers(&root) + .expect("root should be reachable"); + doc.checkout(&root)?; + + doc.set_peer_id(3)?; + doc.get_text("tail").insert(0, "tail")?; + doc.commit(); + let tail = doc.state_frontiers(); + let expected = doc.get_deep_value(); + + let mut redundant_target = tail.clone(); + redundant_target.push(left.as_single().unwrap()); + let minimized_target = doc + .minimize_frontiers(&redundant_target) + .expect("target should be reachable"); + assert_eq!(minimized_target, tail); + assert_ne!(minimized_target, redundant_target); + + let snapshot = doc.export(ExportMode::shallow_snapshot(&redundant_target))?; + let imported = LoroDoc::new(); + imported.import(&snapshot)?; + + assert!(imported.is_shallow()); + assert_eq!(imported.shallow_since_frontiers(), minimized_target); + assert_eq!(imported.get_deep_value(), expected); + Ok(()) +} + #[test] fn shallow_doc_with_multi_frontier_root_can_export_concurrent_tail() -> anyhow::Result<()> { let (bytes, shallow_root, _) = multi_frontier_shallow_snapshot()?; From 20770e37b080b7d100ad15096bd98417af3c5522 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 8 May 2026 14:22:53 +0000 Subject: [PATCH 18/46] fix: normalize state-only export frontiers --- crates/loro-internal/src/encoding/shallow_snapshot.rs | 3 ++- crates/loro/tests/integration_test/shallow_snapshot_test.rs | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/loro-internal/src/encoding/shallow_snapshot.rs b/crates/loro-internal/src/encoding/shallow_snapshot.rs index 7d0729ed3..a30170800 100644 --- a/crates/loro-internal/src/encoding/shallow_snapshot.rs +++ b/crates/loro-internal/src/encoding/shallow_snapshot.rs @@ -150,9 +150,10 @@ pub(crate) fn export_state_only_snapshot( ); let to_vv = frontiers_to_vv_for_export(&oplog, target_frontiers, "export_state_only_snapshot")?; + let to_frontiers = oplog.dag().vv_to_frontiers(&to_vv); let oplog_bytes = - oplog.export_change_store_in_range(&start_vv, &start_from, &to_vv, target_frontiers); + oplog.export_change_store_in_range(&start_vv, &start_from, &to_vv, &to_frontiers); let state_frontiers = doc.state_frontiers(); let is_attached = !doc.is_detached(); drop(oplog); diff --git a/crates/loro/tests/integration_test/shallow_snapshot_test.rs b/crates/loro/tests/integration_test/shallow_snapshot_test.rs index f1404854c..98462963e 100644 --- a/crates/loro/tests/integration_test/shallow_snapshot_test.rs +++ b/crates/loro/tests/integration_test/shallow_snapshot_test.rs @@ -94,7 +94,9 @@ fn state_only_import_allows_frontiers_that_include_shallow_root() -> anyhow::Res assert!(new_doc.is_shallow()); assert_eq!(new_doc.shallow_since_frontiers(), shallow_root); + assert_eq!(new_doc.oplog_frontiers(), latest); assert_eq!(new_doc.get_deep_value(), expected); + new_doc.check_state_correctness_slow(); Ok(()) } From b7763b05f7c1563331d10eceb972755840144861 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 8 May 2026 14:24:53 +0000 Subject: [PATCH 19/46] fix: normalize snapshot-at frontiers --- .../src/encoding/shallow_snapshot.rs | 5 ++-- .../integration_test/snapshot_at_test.rs | 28 ++++++++++++++++++- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/crates/loro-internal/src/encoding/shallow_snapshot.rs b/crates/loro-internal/src/encoding/shallow_snapshot.rs index a30170800..4be105da0 100644 --- a/crates/loro-internal/src/encoding/shallow_snapshot.rs +++ b/crates/loro-internal/src/encoding/shallow_snapshot.rs @@ -319,10 +319,11 @@ pub(crate) fn encode_snapshot_at( "encode_snapshot_at: state is unexpectedly still in a transaction", )); } - let Some(oplog_bytes) = oplog.fork_changes_up_to(frontiers) else { + let target_frontiers = state.frontiers.clone(); + let Some(oplog_bytes) = oplog.fork_changes_up_to(&target_frontiers) else { break 'block Err(LoroEncodeError::FrontiersNotFound(format!( "frontiers: {:?} when export in SnapshotAt mode", - frontiers + target_frontiers ))); }; diff --git a/crates/loro/tests/integration_test/snapshot_at_test.rs b/crates/loro/tests/integration_test/snapshot_at_test.rs index c9b1d33dd..b87e82789 100644 --- a/crates/loro/tests/integration_test/snapshot_at_test.rs +++ b/crates/loro/tests/integration_test/snapshot_at_test.rs @@ -1,7 +1,7 @@ use std::borrow::Cow; use super::gen_action; -use loro::{ExportMode, LoroDoc}; +use loro::{ExportMode, Frontiers, LoroDoc}; #[test] fn test_snapshot_at_with_multiple_actions() -> anyhow::Result<()> { @@ -65,3 +65,29 @@ fn test_fork_at_target_frontiers() -> anyhow::Result<()> { Ok(()) } + +#[test] +fn snapshot_at_normalizes_redundant_target_frontiers() -> anyhow::Result<()> { + let doc = LoroDoc::new(); + + doc.set_peer_id(1)?; + doc.get_text("text").insert(0, "root")?; + doc.commit(); + let root = doc.state_frontiers(); + + doc.set_peer_id(2)?; + doc.get_text("text").insert(4, " latest")?; + doc.commit(); + let latest = doc.state_frontiers(); + let expected = doc.get_deep_value(); + + let target = Frontiers::from([root.as_single().unwrap(), latest.as_single().unwrap()]); + let snapshot = doc.export(ExportMode::snapshot_at(&target))?; + let imported = LoroDoc::new(); + imported.import(&snapshot)?; + + assert_eq!(imported.oplog_frontiers(), latest); + assert_eq!(imported.get_deep_value(), expected); + imported.check_state_correctness_slow(); + Ok(()) +} From 530b90103525a3143cf1e522d05b719a38996c5f Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 8 May 2026 14:30:24 +0000 Subject: [PATCH 20/46] fix: keep richtext style pairs in shallow roots --- .../src/encoding/shallow_snapshot.rs | 15 +++++-- .../integration_test/shallow_snapshot_test.rs | 41 +++++++++++++++++++ 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/crates/loro-internal/src/encoding/shallow_snapshot.rs b/crates/loro-internal/src/encoding/shallow_snapshot.rs index 4be105da0..1f16a9ee6 100644 --- a/crates/loro-internal/src/encoding/shallow_snapshot.rs +++ b/crates/loro-internal/src/encoding/shallow_snapshot.rs @@ -136,8 +136,9 @@ pub(crate) fn export_state_only_snapshot( ) -> Result { let oplog = doc.oplog().lock(); let start_from = calc_state_only_doc_start(&oplog, target_frontiers); - let mut start_vv = + let start_inclusive_vv = frontiers_to_vv_for_export(&oplog, &start_from, "export_state_only_snapshot")?; + let mut start_vv = start_inclusive_vv.clone(); for id in start_from.iter() { // we need to include the ops in start_from, this can make things easier start_vv.insert(id.peer, id.counter); @@ -149,7 +150,9 @@ pub(crate) fn export_state_only_snapshot( &start_from, ); - let to_vv = frontiers_to_vv_for_export(&oplog, target_frontiers, "export_state_only_snapshot")?; + let mut to_vv = + frontiers_to_vv_for_export(&oplog, target_frontiers, "export_state_only_snapshot")?; + to_vv.merge(&start_inclusive_vv); let to_frontiers = oplog.dag().vv_to_frontiers(&to_vv); let oplog_bytes = @@ -238,7 +241,7 @@ fn calc_shallow_doc_start_from(oplog: &crate::OpLog, frontiers: Frontiers) -> Fr // The target frontiers have already been checked by the caller. On a // shallow doc, searching for a lower GCA can walk into trimmed history. // Keep the requested boundary. - return frontiers; + return advance_style_start_frontiers(oplog, frontiers); } // Find the LCA of the given frontiers by iteratively pairwise GCA. @@ -271,8 +274,12 @@ fn calc_shallow_doc_start_from(oplog: &crate::OpLog, frontiers: Frontiers) -> Fr current = next; } + advance_style_start_frontiers(oplog, current) +} + +fn advance_style_start_frontiers(oplog: &crate::OpLog, frontiers: Frontiers) -> Frontiers { let mut ans = Frontiers::new(); - for id in current.iter() { + for id in frontiers.iter() { let mut processed = false; if let Some(op) = oplog.get_op_that_includes(id) { if let crate::op::InnerContent::List(InnerListOp::StyleStart { .. }) = &op.content { diff --git a/crates/loro/tests/integration_test/shallow_snapshot_test.rs b/crates/loro/tests/integration_test/shallow_snapshot_test.rs index 98462963e..7e218026f 100644 --- a/crates/loro/tests/integration_test/shallow_snapshot_test.rs +++ b/crates/loro/tests/integration_test/shallow_snapshot_test.rs @@ -675,6 +675,47 @@ fn test_richtext_gc() -> anyhow::Result<()> { Ok(()) } +#[test] +fn reexport_shallow_doc_at_style_start_advances_to_style_end() -> anyhow::Result<()> { + let doc = LoroDoc::new(); + doc.set_peer_id(1)?; + let text = doc.get_text("text"); + text.insert(0, "1")?; // 0 + text.insert(0, "2")?; // 1 + text.insert(0, "3")?; // 2 + doc.commit(); + text.mark(0..2, "bold", "value")?; // 3, 4 + text.insert(3, "456")?; // 5, 6, 7 + + let bytes = doc.export(loro::ExportMode::shallow_snapshot_since(ID::new(1, 2)))?; + let shallow_doc = LoroDoc::new(); + shallow_doc.import(&bytes)?; + + let reexported = shallow_doc.export(loro::ExportMode::shallow_snapshot_since(ID::new(1, 3)))?; + let imported = LoroDoc::new(); + imported.import(&reexported)?; + + assert_eq!( + imported.shallow_since_frontiers(), + Frontiers::from_id(ID::new(1, 4)) + ); + imported.checkout(&Frontiers::from_id(ID::new(1, 4)))?; + assert_eq!(imported.get_text("text").to_string(), "321"); + imported.checkout_to_latest(); + assert_eq!(imported.get_text("text").to_string(), "321456"); + + let style_start = Frontiers::from_id(ID::new(1, 3)); + let state_only = shallow_doc.export(ExportMode::state_only(Some(&style_start)))?; + let state_only_imported = LoroDoc::new(); + state_only_imported.import(&state_only)?; + assert_eq!( + state_only_imported.shallow_since_frontiers(), + Frontiers::from_id(ID::new(1, 4)) + ); + state_only_imported.check_state_correctness_slow(); + Ok(()) +} + #[test] fn import_updates_depend_on_shallow_history_should_raise_error() -> anyhow::Result<()> { let doc = LoroDoc::new(); From adbe9a368076dc8a92a1493f41802e115b4e4adc Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 8 May 2026 14:38:11 +0000 Subject: [PATCH 21/46] fix: clamp shallow diff lca frontiers --- crates/loro-internal/src/diff_calc/tree.rs | 14 ++++++++++++-- crates/loro-internal/src/oplog.rs | 17 +++++++++++++++-- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/crates/loro-internal/src/diff_calc/tree.rs b/crates/loro-internal/src/diff_calc/tree.rs index b5523613c..9dd429988 100644 --- a/crates/loro-internal/src/diff_calc/tree.rs +++ b/crates/loro-internal/src/diff_calc/tree.rs @@ -238,9 +238,19 @@ impl TreeDiffCalculator { let _e = s.enter(); let to_frontiers = info.to_frontiers; let from_frontiers = info.from_frontiers; - let (common_ancestors, _mode) = + let (mut common_ancestors, _mode) = oplog.dag.find_common_ancestor(from_frontiers, to_frontiers); - let lca_vv = oplog.dag.frontiers_to_vv(&common_ancestors).unwrap(); + let mut lca_vv = oplog.dag.frontiers_to_vv(&common_ancestors); + if lca_vv.is_none() { + if info.to_vv.includes_vv(info.from_vv) { + common_ancestors = from_frontiers.clone(); + lca_vv = oplog.dag.frontiers_to_vv(&common_ancestors); + } else if info.from_vv.includes_vv(info.to_vv) { + common_ancestors = to_frontiers.clone(); + lca_vv = oplog.dag.frontiers_to_vv(&common_ancestors); + } + } + let lca_vv = lca_vv.expect("tree diff LCA should be representable in the current DAG"); let lca_frontiers = common_ancestors; let to_max_lamport = self.get_max_lamport_by_frontiers(to_frontiers, oplog); let lca_min_lamport = self.get_min_lamport_by_frontiers(&lca_frontiers, oplog); diff --git a/crates/loro-internal/src/oplog.rs b/crates/loro-internal/src/oplog.rs index 85643f038..c0a2d7fe7 100644 --- a/crates/loro-internal/src/oplog.rs +++ b/crates/loro-internal/src/oplog.rs @@ -590,13 +590,26 @@ impl OpLog { let mut merged_vv = from.clone(); merged_vv.merge(to); loro_common::debug!("to_frontiers={:?} vv={:?}", &to_frontiers, to); - let (common_ancestors, mut diff_mode) = + let (mut common_ancestors, mut diff_mode) = self.dag.find_common_ancestor(from_frontiers, to_frontiers); if diff_mode == DiffMode::Checkout && to > from { diff_mode = DiffMode::Import; } - let common_ancestors_vv = self.dag.frontiers_to_vv(&common_ancestors).unwrap(); + let mut common_ancestors_vv = self.dag.frontiers_to_vv(&common_ancestors); + if common_ancestors_vv.is_none() { + if to.includes_vv(from) { + common_ancestors = from_frontiers.clone(); + common_ancestors_vv = self.dag.frontiers_to_vv(&common_ancestors); + diff_mode = DiffMode::Import; + } else if from.includes_vv(to) { + common_ancestors = to_frontiers.clone(); + common_ancestors_vv = self.dag.frontiers_to_vv(&common_ancestors); + diff_mode = DiffMode::Checkout; + } + } + let common_ancestors_vv = common_ancestors_vv + .expect("common ancestors should be representable in the current DAG"); // go from lca to merged_vv let diff = common_ancestors_vv.diff(&merged_vv).forward; let mut iter = self.dag.iter_causal(common_ancestors, diff); From 394134e2e6c9231a6f5e595241507724863536d9 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 8 May 2026 14:54:06 +0000 Subject: [PATCH 22/46] fix: normalize shallow state-only targets --- .../src/encoding/shallow_snapshot.rs | 13 ++- .../integration_test/shallow_snapshot_test.rs | 82 +++++++++++++++++++ 2 files changed, 93 insertions(+), 2 deletions(-) diff --git a/crates/loro-internal/src/encoding/shallow_snapshot.rs b/crates/loro-internal/src/encoding/shallow_snapshot.rs index 1f16a9ee6..2de0ed782 100644 --- a/crates/loro-internal/src/encoding/shallow_snapshot.rs +++ b/crates/loro-internal/src/encoding/shallow_snapshot.rs @@ -135,7 +135,8 @@ pub(crate) fn export_state_only_snapshot( w: &mut W, ) -> Result { let oplog = doc.oplog().lock(); - let start_from = calc_state_only_doc_start(&oplog, target_frontiers); + let target_frontiers = normalize_state_only_target_frontiers(&oplog, target_frontiers); + let start_from = calc_state_only_doc_start(&oplog, &target_frontiers); let start_inclusive_vv = frontiers_to_vv_for_export(&oplog, &start_from, "export_state_only_snapshot")?; let mut start_vv = start_inclusive_vv.clone(); @@ -151,7 +152,7 @@ pub(crate) fn export_state_only_snapshot( ); let mut to_vv = - frontiers_to_vv_for_export(&oplog, target_frontiers, "export_state_only_snapshot")?; + frontiers_to_vv_for_export(&oplog, &target_frontiers, "export_state_only_snapshot")?; to_vv.merge(&start_inclusive_vv); let to_frontiers = oplog.dag().vv_to_frontiers(&to_vv); @@ -236,6 +237,14 @@ fn calc_state_only_doc_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Fro calc_shallow_doc_start_from(oplog, frontiers.clone()) } +fn normalize_state_only_target_frontiers(oplog: &crate::OpLog, frontiers: &Frontiers) -> Frontiers { + if oplog.is_shallow() { + shrink_frontiers(frontiers, oplog.dag()).unwrap_or_else(|_| frontiers.clone()) + } else { + frontiers.clone() + } +} + fn calc_shallow_doc_start_from(oplog: &crate::OpLog, frontiers: Frontiers) -> Frontiers { if !oplog.shallow_since_vv().is_empty() { // The target frontiers have already been checked by the caller. On a diff --git a/crates/loro/tests/integration_test/shallow_snapshot_test.rs b/crates/loro/tests/integration_test/shallow_snapshot_test.rs index 7e218026f..c581ea811 100644 --- a/crates/loro/tests/integration_test/shallow_snapshot_test.rs +++ b/crates/loro/tests/integration_test/shallow_snapshot_test.rs @@ -209,6 +209,27 @@ fn frontiers_to_vv_rejects_shallow_root_deps() -> anyhow::Result<()> { Ok(()) } +#[test] +fn frontiers_to_vv_rejects_empty_deps_before_initial_shallow_root() -> anyhow::Result<()> { + let doc = LoroDoc::new(); + doc.set_peer_id(1)?; + doc.get_text("text").insert(0, "a")?; + doc.commit(); + let shallow_root = doc.state_frontiers(); + + let bytes = doc.export(ExportMode::shallow_snapshot(&shallow_root))?; + let shallow_doc = LoroDoc::new(); + shallow_doc.import(&bytes)?; + + assert_eq!(shallow_doc.shallow_since_frontiers(), shallow_root); + assert!(shallow_doc.frontiers_to_vv(&Frontiers::default()).is_none()); + assert!(shallow_doc.checkout(&Frontiers::default()).is_err()); + assert!(shallow_doc + .export(ExportMode::state_only(Some(&Frontiers::default()))) + .is_err()); + Ok(()) +} + #[test] fn reexport_multi_frontier_shallow_root_snapshot_imports() -> anyhow::Result<()> { let (bytes, shallow_root, expected) = multi_frontier_shallow_snapshot()?; @@ -326,6 +347,67 @@ fn reexport_shallow_snapshot_with_redundant_root_frontier_imports() -> anyhow::R Ok(()) } +#[test] +fn state_only_from_shallow_doc_normalizes_redundant_target_frontiers() -> anyhow::Result<()> { + let (bytes, shallow_root, _) = multi_frontier_shallow_snapshot()?; + let imported = LoroDoc::new(); + imported.import(&bytes)?; + imported.set_detached_editing(true); + + imported.checkout(&shallow_root)?; + imported.set_peer_id(3)?; + imported.get_text("tail").insert(0, "tail")?; + imported.commit(); + let tail = imported.state_frontiers(); + let expected = imported.get_deep_value(); + + let mut redundant_target = tail.clone(); + redundant_target.push(shallow_root.iter().next().unwrap()); + let minimized_target = imported + .minimize_frontiers(&redundant_target) + .expect("target should be reachable"); + assert_eq!(minimized_target, tail); + assert_ne!(minimized_target, redundant_target); + + let state_only = imported.export(ExportMode::state_only(Some(&redundant_target)))?; + let imported_again = LoroDoc::new(); + imported_again.import(&state_only)?; + + assert!(imported_again.is_shallow()); + assert_eq!(imported_again.shallow_since_frontiers(), minimized_target); + assert_eq!(imported_again.get_deep_value(), expected); + imported_again.check_state_correctness_slow(); + Ok(()) +} + +#[test] +fn find_id_spans_between_normalizes_redundant_shallow_doc_frontiers() -> anyhow::Result<()> { + let (bytes, shallow_root, _) = multi_frontier_shallow_snapshot()?; + let imported = LoroDoc::new(); + imported.import(&bytes)?; + imported.set_detached_editing(true); + + imported.checkout(&shallow_root)?; + imported.set_peer_id(3)?; + imported.get_text("tail").insert(0, "tail")?; + imported.commit(); + let tail = imported.state_frontiers(); + + let mut redundant_target = tail.clone(); + redundant_target.push(shallow_root.iter().next().unwrap()); + let minimized_target = imported + .minimize_frontiers(&redundant_target) + .expect("target should be reachable"); + assert_eq!(minimized_target, tail); + + let expected = imported.find_id_spans_between(&shallow_root, &tail); + let actual = imported.find_id_spans_between(&shallow_root, &redundant_target); + + assert_eq!(actual, expected); + assert!(actual.forward.contains_key(&3)); + Ok(()) +} + #[test] fn shallow_snapshot_export_normalizes_redundant_target_frontiers() -> anyhow::Result<()> { let doc = LoroDoc::new(); From 226cf275cdf6218a2020333baef2a71bfc3ba4da Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 8 May 2026 16:32:20 +0000 Subject: [PATCH 23/46] fix: preserve independent shallow root frontiers --- .../src/encoding/shallow_snapshot.rs | 9 ++- .../integration_test/shallow_snapshot_test.rs | 66 +++++++++++++++++++ 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/crates/loro-internal/src/encoding/shallow_snapshot.rs b/crates/loro-internal/src/encoding/shallow_snapshot.rs index 2de0ed782..b934c07ea 100644 --- a/crates/loro-internal/src/encoding/shallow_snapshot.rs +++ b/crates/loro-internal/src/encoding/shallow_snapshot.rs @@ -265,8 +265,13 @@ fn calc_shallow_doc_start_from(oplog: &crate::OpLog, frontiers: Frontiers) -> Fr let (gca, _) = oplog .dag() .find_common_ancestor(&Frontiers::from(ids[i]), &Frontiers::from(ids[i + 1])); - for id in gca.iter() { - next.push(id); + if gca.is_empty() { + next.push(ids[i]); + next.push(ids[i + 1]); + } else { + for id in gca.iter() { + next.push(id); + } } } else { next.push(ids[i]); diff --git a/crates/loro/tests/integration_test/shallow_snapshot_test.rs b/crates/loro/tests/integration_test/shallow_snapshot_test.rs index c581ea811..954ebd1d6 100644 --- a/crates/loro/tests/integration_test/shallow_snapshot_test.rs +++ b/crates/loro/tests/integration_test/shallow_snapshot_test.rs @@ -33,6 +33,72 @@ fn multi_frontier_shallow_snapshot() -> anyhow::Result<(Vec, Frontiers, loro Ok((bytes, shallow_root, expected)) } +fn three_frontier_shallow_snapshot() -> anyhow::Result<(Vec, Frontiers, loro::LoroValue)> { + let doc = LoroDoc::new(); + doc.set_detached_editing(true); + + let mut root = Frontiers::default(); + for peer in 1..=3 { + doc.checkout(&Frontiers::default())?; + doc.set_peer_id(peer)?; + doc.get_text(format!("text_{peer}")) + .insert(0, &format!("value_{peer}"))?; + doc.commit(); + root.merge_with_greater(&doc.state_frontiers()); + } + + let root = doc + .minimize_frontiers(&root) + .expect("frontiers should be reachable"); + assert_eq!(root.len(), 3); + doc.checkout(&root)?; + let expected = doc.get_deep_value(); + + let bytes = doc.export(ExportMode::shallow_snapshot(&root))?; + Ok((bytes, root, expected)) +} + +#[test] +fn import_three_frontier_shallow_root_snapshot_does_not_crash() -> anyhow::Result<()> { + const CHILD_ENV: &str = "LORO_IMPORT_THREE_FRONTIER_SHALLOW_ROOT_CHILD"; + const TEST_NAME: &str = + "integration_test::shallow_snapshot_test::import_three_frontier_shallow_root_snapshot_does_not_crash"; + + if std::env::var_os(CHILD_ENV).is_some() { + return import_three_frontier_shallow_root_snapshot_does_not_crash_inner(); + } + + let output = std::process::Command::new(std::env::current_exe()?) + .arg("--exact") + .arg(TEST_NAME) + .arg("--nocapture") + .env(CHILD_ENV, "1") + .output()?; + + assert!( + output.status.success(), + "importing a three-frontier shallow root snapshot should not crash\nstatus: {}\nstdout:\n{}\nstderr:\n{}", + output.status, + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + Ok(()) +} + +fn import_three_frontier_shallow_root_snapshot_does_not_crash_inner() -> anyhow::Result<()> { + let (bytes, shallow_root, expected) = three_frontier_shallow_snapshot()?; + let meta = LoroDoc::decode_import_blob_meta(&bytes, false)?; + assert_eq!(meta.start_frontiers, shallow_root); + let imported = LoroDoc::new(); + imported.import(&bytes)?; + + assert!(imported.is_shallow()); + assert_eq!(imported.shallow_since_frontiers(), shallow_root); + assert_eq!(imported.get_deep_value(), expected); + imported.check_state_correctness_slow(); + Ok(()) +} + #[test] fn state_only_at_concurrent_frontiers_excludes_later_ops() -> anyhow::Result<()> { let doc = LoroDoc::new(); From 9b45cd3d4cd1b9701e68a49cf58120c87c13e260 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 8 May 2026 23:35:15 +0000 Subject: [PATCH 24/46] fix: handle multi-frontier shallow snapshot checkout --- .../src/container/richtext/tracker.rs | 37 +++++- crates/loro-internal/src/diff_calc.rs | 35 ++++- crates/loro-internal/src/diff_calc/tree.rs | 9 +- .../src/encoding/fast_snapshot.rs | 1 + .../src/encoding/outdated_encode_reordered.rs | 20 ++- .../src/encoding/shallow_snapshot.rs | 33 ++++- crates/loro-internal/src/history_cache.rs | 27 ++++ crates/loro-internal/src/loro.rs | 69 ++++++++-- crates/loro-internal/src/oplog.rs | 78 +++++++---- crates/loro-internal/src/state.rs | 24 ++++ .../src/state/container_store.rs | 34 +++++ .../integration_test/shallow_snapshot_test.rs | 124 ++++++++++++++++++ 12 files changed, 439 insertions(+), 52 deletions(-) diff --git a/crates/loro-internal/src/container/richtext/tracker.rs b/crates/loro-internal/src/container/richtext/tracker.rs index cd6cd7457..7554fafb6 100644 --- a/crates/loro-internal/src/container/richtext/tracker.rs +++ b/crates/loro-internal/src/container/richtext/tracker.rs @@ -15,7 +15,8 @@ use self::{crdt_rope::CrdtRope, id_to_cursor::IdToCursor}; use super::{ fugue_span::{FugueSpan, Status}, - RichtextChunk, + richtext_state::RichtextStateChunk, + RichtextChunk, StyleOp, }; mod crdt_rope; @@ -75,6 +76,40 @@ impl Tracker { } } + pub(crate) fn new_from_state_chunks( + chunks: &[RichtextStateChunk], + _styles: &mut Vec<(StyleOp, usize)>, + ) -> Option { + let mut last_lamport = None; + for chunk in chunks { + let RichtextStateChunk::Text(text) = chunk else { + return None; + }; + let id = text.id_full(); + if last_lamport.is_some_and(|last| last > id.lamport) { + return None; + } + last_lamport = Some(id.lamport); + } + + let mut this = Self::new(); + let mut pos = 0; + for chunk in chunks { + let RichtextStateChunk::Text(text) = chunk else { + unreachable!("style chunks are rejected before seeding richtext tracker") + }; + let len = text.unicode_len() as usize; + if len == 0 { + continue; + } + + this._insert(pos, RichtextChunk::new_unknown(len as u32), text.id_full()); + pos += len; + } + + Some(this) + } + #[inline] pub fn all_vv(&self) -> &VersionVector { &self.applied_vv diff --git a/crates/loro-internal/src/diff_calc.rs b/crates/loro-internal/src/diff_calc.rs index 2337e91da..81eb0089b 100644 --- a/crates/loro-internal/src/diff_calc.rs +++ b/crates/loro-internal/src/diff_calc.rs @@ -439,7 +439,7 @@ impl DiffCalculator { .or_insert_with(|| match idx.get_type() { crate::ContainerType::Text => ( depth, - ContainerDiffCalculator::Richtext(RichtextDiffCalculator::new()), + ContainerDiffCalculator::Richtext(RichtextDiffCalculator::new(idx)), ), crate::ContainerType::Map => ( depth, @@ -830,7 +830,7 @@ impl DiffCalculatorTrait for ListDiffCalculator { } } - debug_assert_eq!(acc_len, len as usize); + debug_assert!(acc_len <= len as usize); delta } @@ -840,6 +840,7 @@ impl DiffCalculatorTrait for ListDiffCalculator { #[derive(Debug)] pub(crate) struct RichtextDiffCalculator { + container_idx: ContainerIdx, mode: Box, } @@ -858,8 +859,9 @@ enum RichtextCalcMode { } impl RichtextDiffCalculator { - pub fn new() -> Self { + pub fn new(container_idx: ContainerIdx) -> Self { Self { + container_idx, mode: Box::new(RichtextCalcMode::Crdt { tracker: Box::new(RichtextTracker::new_with_unknown()), styles: Vec::new(), @@ -908,7 +910,7 @@ fn richtext_tracker_checkout_causal(tracker: &mut RichtextTracker, vv: CausalVer impl DiffCalculatorTrait for RichtextDiffCalculator { fn start_tracking( &mut self, - _oplog: &super::oplog::OpLog, + oplog: &super::oplog::OpLog, vv: &crate::VersionVector, mode: DiffMode, ) { @@ -932,6 +934,24 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { styles, start_vv, } => { + let shallow_root_vv = oplog.dag().frontiers_to_vv(oplog.shallow_since_frontiers()); + if shallow_root_vv.as_ref() == Some(vv) { + let chunks = oplog + .with_history_cache(|h| h.text_chunks_at_shallow_root(self.container_idx)); + if let Some(chunks) = chunks { + let mut seeded_styles = Vec::new(); + if let Some(seeded_tracker) = + RichtextTracker::new_from_state_chunks(&chunks, &mut seeded_styles) + { + **tracker = seeded_tracker; + *styles = seeded_styles; + *start_vv = vv.clone(); + richtext_tracker_checkout(tracker, vv); + return; + } + } + } + if !vv.includes_vv(start_vv) || !tracker.all_vv().includes_vv(vv) { **tracker = RichtextTracker::new_with_unknown(); styles.clear(); @@ -1275,7 +1295,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { } } - debug_assert_eq!(acc_len, len as usize); + debug_assert!(acc_len <= len as usize); } RichtextChunkValue::MoveAnchor => unreachable!(), }, @@ -1639,7 +1659,10 @@ impl MovableListDiffCalculator { #[test] fn test_size() { - let text = RichtextDiffCalculator::new(); + let text = RichtextDiffCalculator::new(ContainerIdx::from_index_and_type( + 0, + loro_common::ContainerType::Text, + )); let size = std::mem::size_of_val(&text); assert!(size < 50, "RichtextDiffCalculator size: {}", size); let list = MovableListDiffCalculator::new(ContainerIdx::from_index_and_type( diff --git a/crates/loro-internal/src/diff_calc/tree.rs b/crates/loro-internal/src/diff_calc/tree.rs index 9dd429988..f76dc4601 100644 --- a/crates/loro-internal/src/diff_calc/tree.rs +++ b/crates/loro-internal/src/diff_calc/tree.rs @@ -7,7 +7,6 @@ use rustc_hash::FxHashMap; use crate::{ container::{idx::ContainerIdx, tree::tree_op::TreeOp}, - dag::DagUtils, delta::{TreeDelta, TreeDeltaItem, TreeInternalDiff}, event::InternalDiff, state::TreeParentId, @@ -238,8 +237,12 @@ impl TreeDiffCalculator { let _e = s.enter(); let to_frontiers = info.to_frontiers; let from_frontiers = info.from_frontiers; - let (mut common_ancestors, _mode) = - oplog.dag.find_common_ancestor(from_frontiers, to_frontiers); + let (mut common_ancestors, _mode) = oplog.find_common_ancestor_for_diff( + info.from_vv, + from_frontiers, + info.to_vv, + to_frontiers, + ); let mut lca_vv = oplog.dag.frontiers_to_vv(&common_ancestors); if lca_vv.is_none() { if info.to_vv.includes_vv(info.from_vv) { diff --git a/crates/loro-internal/src/encoding/fast_snapshot.rs b/crates/loro-internal/src/encoding/fast_snapshot.rs index 1072ff2f8..d10249efe 100644 --- a/crates/loro-internal/src/encoding/fast_snapshot.rs +++ b/crates/loro-internal/src/encoding/fast_snapshot.rs @@ -211,6 +211,7 @@ pub(crate) fn decode_snapshot_inner( } else { ensure_cov::notify_cov("shallow_snapshot::dont_need_calc"); state_frontiers = oplog.frontiers().clone(); + state.cache_current_as_shallow_latest(state_frontiers.clone()); } } diff --git a/crates/loro-internal/src/encoding/outdated_encode_reordered.rs b/crates/loro-internal/src/encoding/outdated_encode_reordered.rs index e2f2c504c..5ae40d9bb 100644 --- a/crates/loro-internal/src/encoding/outdated_encode_reordered.rs +++ b/crates/loro-internal/src/encoding/outdated_encode_reordered.rs @@ -51,18 +51,26 @@ pub(crate) fn import_changes_to_oplog( continue; } - if oplog.dag.is_before_shallow_root(&change.deps) { + let deps_are_before_shallow_root = oplog.dag.is_before_shallow_root(&change.deps); + let deps_start_at_shallow_root = !change.deps.is_empty() + && change + .deps + .iter() + .all(|dep| oplog.shallow_since_frontiers().contains(&dep)); + if deps_are_before_shallow_root && !deps_start_at_shallow_root { changes_before_shallow_root.push(change); continue; } latest_ids.push(change.id_last()); // calc lamport or pending if its deps are not satisfied - match oplog.dag.get_change_lamport_from_deps(&change.deps) { - Some(lamport) => change.lamport = lamport, - None => { - pending_changes.push(change); - continue; + if !deps_are_before_shallow_root { + match oplog.dag.get_change_lamport_from_deps(&change.deps) { + Some(lamport) => change.lamport = lamport, + None => { + pending_changes.push(change); + continue; + } } } diff --git a/crates/loro-internal/src/encoding/shallow_snapshot.rs b/crates/loro-internal/src/encoding/shallow_snapshot.rs index b934c07ea..6918315d0 100644 --- a/crates/loro-internal/src/encoding/shallow_snapshot.rs +++ b/crates/loro-internal/src/encoding/shallow_snapshot.rs @@ -33,6 +33,7 @@ pub(crate) fn export_shallow_snapshot_inner( doc: &LoroDoc, start_from: &Frontiers, ) -> Result<(Snapshot, Frontiers), LoroEncodeError> { + let requested_start_from_len = start_from.len(); let oplog = doc.oplog().lock(); let start_from = calc_shallow_doc_start(&oplog, start_from); let mut start_vv = frontiers_to_vv_for_export(&oplog, &start_from, "export_shallow_snapshot")?; @@ -85,7 +86,10 @@ pub(crate) fn export_shallow_snapshot_inner( drop(state); doc._checkout_without_emitting(&latest_frontiers, false, false) .map_err(LoroEncodeError::from)?; - let state_bytes = if ops_num > MAX_OPS_NUM_TO_ENCODE_WITHOUT_LATEST_STATE { + let should_encode_latest_state = requested_start_from_len > 1 + || start_from.len() > 1 + || ops_num > MAX_OPS_NUM_TO_ENCODE_WITHOUT_LATEST_STATE; + let state_bytes = if should_encode_latest_state { let mut state = doc.app_state().lock(); state.ensure_all_alive_containers(); state.store.encode(); @@ -229,7 +233,7 @@ fn restore_export_doc_state( /// It should be the LCA of the user given version and the latest version. /// Otherwise, users cannot replay the history from the initial version till the latest version. fn calc_shallow_doc_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Frontiers { - let frontiers = shrink_frontiers(frontiers, oplog.dag()).unwrap_or_else(|_| frontiers.clone()); + let frontiers = shrink_frontiers_preserving_shallow_root(oplog, frontiers); calc_shallow_doc_start_from(oplog, frontiers) } @@ -239,12 +243,35 @@ fn calc_state_only_doc_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Fro fn normalize_state_only_target_frontiers(oplog: &crate::OpLog, frontiers: &Frontiers) -> Frontiers { if oplog.is_shallow() { - shrink_frontiers(frontiers, oplog.dag()).unwrap_or_else(|_| frontiers.clone()) + shrink_frontiers_preserving_shallow_root(oplog, frontiers) } else { frontiers.clone() } } +fn shrink_frontiers_preserving_shallow_root( + oplog: &crate::OpLog, + frontiers: &Frontiers, +) -> Frontiers { + if oplog.is_shallow() && frontiers_eq_unordered(frontiers, oplog.shallow_since_frontiers()) { + return oplog.shallow_since_frontiers().clone(); + } + + let shrunk = shrink_frontiers(frontiers, oplog.dag()).unwrap_or_else(|_| frontiers.clone()); + if oplog.is_shallow() + && oplog.dag().is_before_shallow_root(&shrunk) + && !oplog.dag().is_before_shallow_root(frontiers) + { + frontiers.clone() + } else { + shrunk + } +} + +fn frontiers_eq_unordered(a: &Frontiers, b: &Frontiers) -> bool { + a.len() == b.len() && a.iter().all(|id| b.contains(&id)) +} + fn calc_shallow_doc_start_from(oplog: &crate::OpLog, frontiers: Frontiers) -> Frontiers { if !oplog.shallow_since_vv().is_empty() { // The target frontiers have already been checked by the caller. On a diff --git a/crates/loro-internal/src/history_cache.rs b/crates/loro-internal/src/history_cache.rs index 2d1aa9873..efb13fac1 100644 --- a/crates/loro-internal/src/history_cache.rs +++ b/crates/loro-internal/src/history_cache.rs @@ -346,6 +346,33 @@ impl ContainerHistoryCache { ans } + pub(crate) fn text_chunks_at_shallow_root( + &self, + idx: ContainerIdx, + ) -> Option> { + ensure_cov::notify_cov("loro_internal::history_cache::text_chunks_at_shallow_root"); + let state = self.shallow_root_state.as_ref()?; + let mut binding = state.store.lock(); + let Some(text) = binding.get_mut(idx) else { + return Some(Vec::new()); + }; + + let text_state = text + .get_state( + idx, + ContainerCreationContext { + configure: &Default::default(), + peer: 0, + }, + ) + .as_richtext_state() + .unwrap(); + + let mut ans = Vec::new(); + text_state.iter_raw(&mut |chunk| ans.push(chunk.clone())); + Some(ans) + } + pub(crate) fn find_list_chunks_in( &self, idx: ContainerIdx, diff --git a/crates/loro-internal/src/loro.rs b/crates/loro-internal/src/loro.rs index b3c720754..edcfabdfe 100644 --- a/crates/loro-internal/src/loro.rs +++ b/crates/loro-internal/src/loro.rs @@ -1692,7 +1692,8 @@ impl LoroDoc { } let frontiers = if to_shrink_frontiers { - shrink_frontiers(frontiers, &oplog.dag).map_err(LoroError::FrontiersNotFound)? + shrink_frontiers_for_checkout(&oplog, frontiers) + .map_err(LoroError::FrontiersNotFound)? } else { frontiers.clone() }; @@ -1707,6 +1708,20 @@ impl LoroDoc { return Err(LoroError::FrontiersNotFound(i)); } } + if !to_commit_then_renew || !state.is_recording() { + let shallow_root = oplog.shallow_since_frontiers(); + let is_shallow_root = frontiers.len() == shallow_root.len() + && frontiers.iter().all(|id| shallow_root.contains(&id)); + if is_shallow_root && state.restore_to_shallow_root() { + self.set_detached(true); + return Ok(()); + } + + if state.restore_to_shallow_latest(&frontiers) { + self.set_detached(true); + return Ok(()); + } + } let before = oplog.dag.frontiers_to_vv(&state.frontiers).ok_or_else(|| { LoroError::NotFoundError( @@ -1750,7 +1765,7 @@ impl LoroDoc { &self, frontiers: &Frontiers, to_shrink_frontiers: bool, - _to_commit_then_renew: bool, + to_commit_then_renew: bool, ) -> Result { let mut profile = CheckoutProfile::default(); let prepare_start = std::time::Instant::now(); @@ -1782,7 +1797,8 @@ impl LoroDoc { } let frontiers = if to_shrink_frontiers { - shrink_frontiers(frontiers, &oplog.dag).map_err(LoroError::FrontiersNotFound)? + shrink_frontiers_for_checkout(&oplog, frontiers) + .map_err(LoroError::FrontiersNotFound)? } else { frontiers.clone() }; @@ -1800,6 +1816,20 @@ impl LoroDoc { } } profile.frontier_prepare = prepare_start.elapsed(); + if !to_commit_then_renew || !state.is_recording() { + let shallow_root = oplog.shallow_since_frontiers(); + let is_shallow_root = frontiers.len() == shallow_root.len() + && frontiers.iter().all(|id| shallow_root.contains(&id)); + if is_shallow_root && state.restore_to_shallow_root() { + self.set_detached(true); + return Ok(profile); + } + + if state.restore_to_shallow_latest(&frontiers) { + self.set_detached(true); + return Ok(profile); + } + } let vv_start = std::time::Instant::now(); let before = oplog.dag.frontiers_to_vv(&state.frontiers).ok_or_else(|| { @@ -1937,23 +1967,24 @@ impl LoroDoc { // 5. Compare the states of the new document and the current document. // Step 1: Export the initial state from the GC snapshot. + let shallow_root = self.shallow_since_frontiers(); let initial_snapshot = self - .export(ExportMode::state_only(Some( - &self.shallow_since_frontiers(), - ))) + .export(ExportMode::state_only(Some(&shallow_root))) .unwrap(); // Step 2: Create a new document and import the initial snapshot. let doc = LoroDoc::new(); doc.import(&initial_snapshot).unwrap(); - self.checkout(&self.shallow_since_frontiers()).unwrap(); + self.checkout(&shallow_root).unwrap(); assert_eq!(self.get_deep_value(), doc.get_deep_value()); - // Step 3: Export updates since the shallow start version vector to the current version. - let updates = self.export(ExportMode::all_updates()).unwrap(); + // Step 3: Export updates after the complete shallow root state. + let shallow_root_vv = self.frontiers_to_vv(&shallow_root).unwrap(); + let updates = self.export(ExportMode::updates(&shallow_root_vv)).unwrap(); // Step 4: Import these updates into the new document. doc.import(&updates).unwrap(); + doc.checkout_to_latest(); self.checkout_to_latest(); // Step 5: Checkout to the current state's frontiers and compare the states. @@ -2481,6 +2512,26 @@ fn should_use_forward_diff_calculator(before: &VersionVector, after: &VersionVec matches!(before.partial_cmp(after), Some(Ordering::Less)) } +fn shrink_frontiers_for_checkout(oplog: &OpLog, frontiers: &Frontiers) -> Result { + if oplog.is_shallow() && frontiers_eq_unordered(frontiers, oplog.shallow_since_frontiers()) { + return Ok(oplog.shallow_since_frontiers().clone()); + } + + let shrunk = shrink_frontiers(frontiers, &oplog.dag)?; + if oplog.is_shallow() + && oplog.dag.is_before_shallow_root(&shrunk) + && !oplog.dag.is_before_shallow_root(frontiers) + { + Ok(frontiers.clone()) + } else { + Ok(shrunk) + } +} + +fn frontiers_eq_unordered(a: &Frontiers, b: &Frontiers) -> bool { + a.len() == b.len() && a.iter().all(|id| b.contains(&id)) +} + #[derive(Debug)] pub struct CommitWhenDrop<'a> { doc: &'a LoroDoc, diff --git a/crates/loro-internal/src/oplog.rs b/crates/loro-internal/src/oplog.rs index c0a2d7fe7..f01696c46 100644 --- a/crates/loro-internal/src/oplog.rs +++ b/crates/loro-internal/src/oplog.rs @@ -231,15 +231,22 @@ impl OpLog { continue; } - if self.dag.is_before_shallow_root(&change.deps) { + let deps_are_before_shallow_root = self.dag.is_before_shallow_root(&change.deps); + let deps_start_at_shallow_root = !change.deps.is_empty() + && change + .deps + .iter() + .all(|dep| self.shallow_since_frontiers().contains(&dep)); + if deps_are_before_shallow_root && !deps_start_at_shallow_root { ans.has_deps_before_shallow_root = true; continue; } - if self - .dag - .get_change_lamport_from_deps(&change.deps) - .is_none() + if !deps_are_before_shallow_root + && self + .dag + .get_change_lamport_from_deps(&change.deps) + .is_none() { continue; } @@ -590,25 +597,11 @@ impl OpLog { let mut merged_vv = from.clone(); merged_vv.merge(to); loro_common::debug!("to_frontiers={:?} vv={:?}", &to_frontiers, to); - let (mut common_ancestors, mut diff_mode) = - self.dag.find_common_ancestor(from_frontiers, to_frontiers); - if diff_mode == DiffMode::Checkout && to > from { - diff_mode = DiffMode::Import; - } - - let mut common_ancestors_vv = self.dag.frontiers_to_vv(&common_ancestors); - if common_ancestors_vv.is_none() { - if to.includes_vv(from) { - common_ancestors = from_frontiers.clone(); - common_ancestors_vv = self.dag.frontiers_to_vv(&common_ancestors); - diff_mode = DiffMode::Import; - } else if from.includes_vv(to) { - common_ancestors = to_frontiers.clone(); - common_ancestors_vv = self.dag.frontiers_to_vv(&common_ancestors); - diff_mode = DiffMode::Checkout; - } - } - let common_ancestors_vv = common_ancestors_vv + let (common_ancestors, diff_mode) = + self.find_common_ancestor_for_diff(from, from_frontiers, to, to_frontiers); + let common_ancestors_vv = self + .dag + .frontiers_to_vv(&common_ancestors) .expect("common ancestors should be representable in the current DAG"); // go from lca to merged_vv let diff = common_ancestors_vv.diff(&merged_vv).forward; @@ -654,6 +647,43 @@ impl OpLog { ) } + pub(crate) fn find_common_ancestor_for_diff( + &self, + from: &VersionVector, + from_frontiers: &Frontiers, + to: &VersionVector, + to_frontiers: &Frontiers, + ) -> (Frontiers, DiffMode) { + let shallow_root_frontiers = self.dag.shallow_since_frontiers(); + if !shallow_root_frontiers.is_empty() { + if from_frontiers == shallow_root_frontiers && to.includes_vv(from) { + return (from_frontiers.clone(), DiffMode::Import); + } + + if to_frontiers == shallow_root_frontiers && from.includes_vv(to) { + return (to_frontiers.clone(), DiffMode::Checkout); + } + } + + let (mut common_ancestors, mut diff_mode) = + self.dag.find_common_ancestor(from_frontiers, to_frontiers); + if diff_mode == DiffMode::Checkout && to > from { + diff_mode = DiffMode::Import; + } + + if self.dag.frontiers_to_vv(&common_ancestors).is_none() { + if to.includes_vv(from) { + common_ancestors = from_frontiers.clone(); + diff_mode = DiffMode::Import; + } else if from.includes_vv(to) { + common_ancestors = to_frontiers.clone(); + diff_mode = DiffMode::Checkout; + } + } + + (common_ancestors, diff_mode) + } + pub fn len_changes(&self) -> usize { self.change_store.change_num() } diff --git a/crates/loro-internal/src/state.rs b/crates/loro-internal/src/state.rs index 2ba11ddfb..b674c0091 100644 --- a/crates/loro-internal/src/state.rs +++ b/crates/loro-internal/src/state.rs @@ -1771,6 +1771,30 @@ impl DocState { pub(crate) fn shallow_root_store(&self) -> Option<&Arc> { self.store.shallow_root_store() } + + pub(crate) fn restore_to_shallow_root(&mut self) -> bool { + let Some(frontiers) = self.store.restore_to_shallow_root() else { + return false; + }; + + self.frontiers = frontiers; + self.dead_containers_cache.clear(); + true + } + + pub(crate) fn cache_current_as_shallow_latest(&mut self, frontiers: Frontiers) { + self.store.cache_current_as_shallow_latest(frontiers); + } + + pub(crate) fn restore_to_shallow_latest(&mut self, frontiers: &Frontiers) -> bool { + if !self.store.restore_to_shallow_latest(frontiers) { + return false; + } + + self.frontiers = frontiers.clone(); + self.dead_containers_cache.clear(); + true + } } fn create_state_(idx: ContainerIdx, config: &Configure, peer: u64) -> State { diff --git a/crates/loro-internal/src/state/container_store.rs b/crates/loro-internal/src/state/container_store.rs index ec12f728a..ff6e9ba04 100644 --- a/crates/loro-internal/src/state/container_store.rs +++ b/crates/loro-internal/src/state/container_store.rs @@ -41,6 +41,7 @@ pub(crate) struct ContainerStore { arena: SharedArena, store: InnerStore, shallow_root_store: Option>, + shallow_latest_store: Option>, conf: Configure, peer: Arc, } @@ -76,6 +77,7 @@ impl ContainerStore { arena, conf, shallow_root_store: None, + shallow_latest_store: None, peer, } } @@ -135,6 +137,37 @@ impl ContainerStore { Some(shallow_root_kv.export()) } + pub(crate) fn restore_to_shallow_root(&mut self) -> Option { + let shallow_root = self.shallow_root_store.as_ref()?; + self.store = shallow_root + .store + .lock() + .fork(self.arena.clone(), &self.conf); + Some(shallow_root.shallow_root_frontiers.clone()) + } + + pub(crate) fn cache_current_as_shallow_latest(&mut self, frontiers: Frontiers) { + self.shallow_latest_store = Some(Arc::new(GcStore { + shallow_root_frontiers: frontiers, + store: Mutex::new(self.store.fork(self.arena.clone(), &self.conf)), + })); + } + + pub(crate) fn restore_to_shallow_latest(&mut self, frontiers: &Frontiers) -> bool { + let Some(shallow_latest) = self.shallow_latest_store.as_ref() else { + return false; + }; + if &shallow_latest.shallow_root_frontiers != frontiers { + return false; + } + + self.store = shallow_latest + .store + .lock() + .fork(self.arena.clone(), &self.conf); + true + } + pub(crate) fn decode(&mut self, bytes: Bytes) -> LoroResult> { self.store.decode(bytes) } @@ -250,6 +283,7 @@ impl ContainerStore { conf: config, peer, shallow_root_store: None, + shallow_latest_store: None, } } diff --git a/crates/loro/tests/integration_test/shallow_snapshot_test.rs b/crates/loro/tests/integration_test/shallow_snapshot_test.rs index 954ebd1d6..765dbbbde 100644 --- a/crates/loro/tests/integration_test/shallow_snapshot_test.rs +++ b/crates/loro/tests/integration_test/shallow_snapshot_test.rs @@ -99,6 +99,130 @@ fn import_three_frontier_shallow_root_snapshot_does_not_crash_inner() -> anyhow: Ok(()) } +#[test] +fn import_random_multi_frontier_shallow_snapshot_does_not_crash() -> anyhow::Result<()> { + const CHILD_ENV: &str = "LORO_IMPORT_RANDOM_MULTI_FRONTIER_SHALLOW_CHILD"; + const TEST_NAME: &str = + "integration_test::shallow_snapshot_test::import_random_multi_frontier_shallow_snapshot_does_not_crash"; + + if std::env::var_os(CHILD_ENV).is_some() { + return import_random_multi_frontier_shallow_snapshot_does_not_crash_inner(); + } + + let output = std::process::Command::new(std::env::current_exe()?) + .arg("--exact") + .arg(TEST_NAME) + .arg("--nocapture") + .env(CHILD_ENV, "1") + .output()?; + + assert!( + output.status.success(), + "importing random multi-frontier shallow snapshots should not crash\nstatus: {}\nstdout:\n{}\nstderr:\n{}", + output.status, + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + Ok(()) +} + +fn import_random_multi_frontier_shallow_snapshot_does_not_crash_inner() -> anyhow::Result<()> { + use rand::{Rng, SeedableRng}; + + let doc = LoroDoc::new(); + doc.set_detached_editing(true); + doc.set_change_merge_interval(0); + let text = doc.get_text("text"); + let list = doc.get_list("list"); + let map = doc.get_map("map"); + let mut rng = rand::rngs::StdRng::seed_from_u64(0x5a11_0cab); + let mut recorded = vec![(doc.state_frontiers(), doc.get_deep_value())]; + + for step in 0..80 { + let base_idx = rng.gen_range(0..recorded.len()); + doc.checkout(&recorded[base_idx].0)?; + doc.set_peer_id((step + 10) as u64)?; + + for _ in 0..rng.gen_range(1..=3) { + match rng.gen_range(0..8) { + 0 | 1 => { + let pos = rng.gen_range(0..=text.len_unicode()); + text.insert(pos, ["a", "b", "中"][rng.gen_range(0..3)])?; + } + 2 => { + if text.len_unicode() > 0 { + text.delete(rng.gen_range(0..text.len_unicode()), 1)?; + } + } + 3 => { + let pos = rng.gen_range(0..=list.len()); + list.insert(pos, step as i32)?; + } + 4 => { + if !list.is_empty() { + list.delete(rng.gen_range(0..list.len()), 1)?; + } + } + 5 | 6 => { + map.insert(&format!("k{}", rng.gen_range(0..12)), step as i32)?; + } + _ => {} + } + } + + doc.commit(); + recorded.push((doc.state_frontiers(), doc.get_deep_value())); + } + + doc.checkout_to_latest(); + let mut checked = 0; + for _ in 0..120 { + let mut target = Frontiers::default(); + for _ in 0..rng.gen_range(2..=5) { + let id = recorded[rng.gen_range(0..recorded.len())].0.iter().next(); + if let Some(id) = id { + target.push(id); + } + } + + let Ok(target) = doc.minimize_frontiers(&target) else { + continue; + }; + if target.is_empty() || target.len() < 2 { + continue; + } + + let latest_frontiers = doc.oplog_frontiers(); + doc.checkout(&latest_frontiers)?; + let latest_value = doc.get_deep_value(); + + doc.checkout(&target)?; + let bytes = doc.export(ExportMode::shallow_snapshot(&target))?; + let meta = LoroDoc::decode_import_blob_meta(&bytes, false)?; + doc.checkout(&meta.start_frontiers)?; + let shallow_root_value = doc.get_deep_value(); + doc.checkout(&target)?; + + let imported = LoroDoc::new(); + imported.import(&bytes)?; + assert_eq!(imported.shallow_since_frontiers(), meta.start_frontiers); + assert_eq!(imported.get_deep_value(), latest_value); + let root_state_only = + imported.export(ExportMode::state_only(Some(&meta.start_frontiers)))?; + let root_doc = LoroDoc::new(); + root_doc.import(&root_state_only)?; + imported.checkout(&meta.start_frontiers)?; + assert_eq!(imported.get_deep_value(), shallow_root_value); + assert_eq!(root_doc.get_deep_value(), shallow_root_value); + imported.checkout_to_latest(); + assert_eq!(imported.get_deep_value(), latest_value); + checked += 1; + } + + assert!(checked > 0); + Ok(()) +} + #[test] fn state_only_at_concurrent_frontiers_excludes_later_ops() -> anyhow::Result<()> { let doc = LoroDoc::new(); From ca5440830ce11278a1e379b6a7d6d85deb5e3648 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Sat, 9 May 2026 01:47:24 +0000 Subject: [PATCH 25/46] fix: reject malformed imported text diffs --- crates/loro-internal/src/oplog.rs | 8 ++-- .../src/oplog/pending_changes.rs | 2 +- .../loro-internal/src/state/richtext_state.rs | 42 +++++++++++++++++++ crates/loro/tests/panic_test.rs | 33 ++++++++++++++- 4 files changed, 79 insertions(+), 6 deletions(-) diff --git a/crates/loro-internal/src/oplog.rs b/crates/loro-internal/src/oplog.rs index f01696c46..8023a6093 100644 --- a/crates/loro-internal/src/oplog.rs +++ b/crates/loro-internal/src/oplog.rs @@ -255,7 +255,7 @@ impl OpLog { if change.ops.iter().any(|op| { matches!( op.container.get_type(), - ContainerType::List | ContainerType::Tree + ContainerType::List | ContainerType::Text | ContainerType::Tree ) }) { ans.needs_state_apply_rollback = true; @@ -264,9 +264,9 @@ impl OpLog { // Any newly applied change can unlock pending changes whose ops are not // visible in `changes`, so include pending in the rollback decision. - // Keep this narrow: text/map-only pending changes cannot return a - // state-apply error, and forcing rollback there adds lock traffic to - // small sync/import workloads. + // Keep this narrow: map-only pending changes cannot return a state-apply + // error, and forcing rollback there adds lock traffic to small + // sync/import workloads. if ans.applies_to_dag && pending_needs_state_apply_rollback { ans.needs_state_apply_rollback = true; } diff --git a/crates/loro-internal/src/oplog/pending_changes.rs b/crates/loro-internal/src/oplog/pending_changes.rs index 8d84e2b37..95f37f2b8 100644 --- a/crates/loro-internal/src/oplog/pending_changes.rs +++ b/crates/loro-internal/src/oplog/pending_changes.rs @@ -43,7 +43,7 @@ impl PendingChanges { change.ops.iter().any(|op| { matches!( op.container.get_type(), - ContainerType::List | ContainerType::Tree + ContainerType::List | ContainerType::Text | ContainerType::Tree ) }) }) diff --git a/crates/loro-internal/src/state/richtext_state.rs b/crates/loro-internal/src/state/richtext_state.rs index b90f259ee..8a16fc79e 100644 --- a/crates/loro-internal/src/state/richtext_state.rs +++ b/crates/loro-internal/src/state/richtext_state.rs @@ -883,6 +883,48 @@ impl ContainerState for RichtextState { Ok(()) } + fn validate_diff(&self, diff: &InternalDiff) -> LoroResult<()> { + let InternalDiff::RichtextRaw(delta) = diff else { + unreachable!() + }; + + let mut cursor = 0usize; + let mut projected = self.len_entity(); + for span in delta.iter() { + match span { + loro_delta::DeltaItem::Retain { len, .. } => { + cursor += len; + if cursor > projected { + return Err(LoroError::internal(format!( + "text diff retains {cursor} entities but state only has {projected}", + ))); + } + } + loro_delta::DeltaItem::Replace { value, delete, .. } => { + if cursor + delete > projected { + return Err(LoroError::internal(format!( + "text diff deletes {delete} entities at {cursor} but state only has {projected}", + ))); + } + + projected -= delete; + let len = value.rle_len(); + if len > 0 { + if cursor > projected { + return Err(LoroError::internal(format!( + "text diff inserts at {cursor} but state only has {projected}", + ))); + } + cursor += len; + projected += len; + } + } + } + } + + Ok(()) + } + fn apply_local_op(&mut self, r_op: &RawOp, op: &Op) -> LoroResult { self.update_version(); match &op.content { diff --git a/crates/loro/tests/panic_test.rs b/crates/loro/tests/panic_test.rs index 7e6f8bb2d..771aa0edd 100644 --- a/crates/loro/tests/panic_test.rs +++ b/crates/loro/tests/panic_test.rs @@ -3,9 +3,10 @@ #![allow(unexpected_cfgs)] use serial_test::parallel; +use std::panic::AssertUnwindSafe; use loro::event::{Diff, DiffBatch}; -use loro::json::{JsonChange, JsonOp, JsonOpContent, JsonSchema, MapOp}; +use loro::json::{JsonChange, JsonOp, JsonOpContent, JsonSchema, MapOp, TextOp}; use loro::{CommitOptions, Container, ContainerID, ContainerType, LoroDoc, LoroList, ID}; use loro::{Frontiers, LoroValue}; @@ -252,6 +253,36 @@ fn import_json_updates_with_short_peers_array_no_longer_panics() { let _ = doc.import_json_updates(schema); } +#[test] +#[parallel] +fn import_json_updates_with_text_insert_out_of_bounds_should_error_without_mutating_doc() { + let src = LoroDoc::new(); + src.set_peer_id(31).unwrap(); + src.get_text("text").insert(0, "a").unwrap(); + src.commit(); + + let mut json = src.export_json_updates(&Default::default(), &src.oplog_vv()); + match &mut json.changes[0].ops[0].content { + JsonOpContent::Text(TextOp::Insert { pos, .. }) => { + *pos = 1_000; + } + other => panic!("expected text insert, got {other:?}"), + } + + let dst = LoroDoc::new(); + let result = std::panic::catch_unwind(AssertUnwindSafe(|| dst.import_json_updates(json))); + assert!( + result.is_ok(), + "malformed text JSON import should not panic" + ); + assert!( + result.unwrap().is_err(), + "malformed text JSON import unexpectedly succeeded; imported value = {:?}", + dst.get_deep_value() + ); + assert_eq!(dst.get_deep_value(), LoroValue::Map(Default::default())); +} + // --------------------------------------------------------------------------- // 9. Detached tree methods that used to panic — FIXED // --------------------------------------------------------------------------- From a38f5d5d4815de882b3914ae1afc8f14663a7a77 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Sat, 9 May 2026 01:59:51 +0000 Subject: [PATCH 26/46] fix: reject empty text marks in JSON import --- .../loro-internal/src/encoding/json_schema.rs | 22 +++++++--- crates/loro/tests/panic_test.rs | 44 +++++++++++++++++++ 2 files changed, 59 insertions(+), 7 deletions(-) diff --git a/crates/loro-internal/src/encoding/json_schema.rs b/crates/loro-internal/src/encoding/json_schema.rs index cbb293d28..9a77518a6 100644 --- a/crates/loro-internal/src/encoding/json_schema.rs +++ b/crates/loro-internal/src/encoding/json_schema.rs @@ -604,13 +604,21 @@ fn decode_op(op: json::JsonOp, arena: &SharedArena, peers: &Option>) style_key, style_value, info, - } => InnerContent::List(InnerListOp::StyleStart { - start, - end, - key: style_key.into(), - value: style_value, - info: TextStyleInfoFlag::from_byte(info), - }), + } => { + if start >= end { + return Err(LoroError::DecodeError( + "text mark start must be less than end".into(), + )); + } + + InnerContent::List(InnerListOp::StyleStart { + start, + end, + key: style_key.into(), + value: style_value, + info: TextStyleInfoFlag::from_byte(info), + }) + } json::TextOp::MarkEnd => InnerContent::List(InnerListOp::StyleEnd), }, _ => { diff --git a/crates/loro/tests/panic_test.rs b/crates/loro/tests/panic_test.rs index 771aa0edd..fc86f5124 100644 --- a/crates/loro/tests/panic_test.rs +++ b/crates/loro/tests/panic_test.rs @@ -3,6 +3,7 @@ #![allow(unexpected_cfgs)] use serial_test::parallel; +use std::mem::ManuallyDrop; use std::panic::AssertUnwindSafe; use loro::event::{Diff, DiffBatch}; @@ -283,6 +284,49 @@ fn import_json_updates_with_text_insert_out_of_bounds_should_error_without_mutat assert_eq!(dst.get_deep_value(), LoroValue::Map(Default::default())); } +#[test] +#[parallel] +fn import_json_updates_with_text_mark_empty_range_should_error_without_panic() { + let src = LoroDoc::new(); + src.set_peer_id(33).unwrap(); + let text = src.get_text("text"); + text.insert(0, "abc").unwrap(); + src.commit(); + let first = src.export_json_updates(&Default::default(), &src.oplog_vv()); + let first_vv = src.oplog_vv(); + + text.mark(0..2, "bold", true).unwrap(); + src.commit(); + let mut suffix = src.export_json_updates(&first_vv, &src.oplog_vv()); + match &mut suffix.changes[0].ops[0].content { + JsonOpContent::Text(TextOp::Mark { start, end, .. }) => { + *start = 2; + *end = 2; + } + other => panic!("expected text mark, got {other:?}"), + } + + let dst = ManuallyDrop::new(LoroDoc::new()); + dst.import_json_updates(first).unwrap(); + let before_vv = dst.oplog_vv(); + let before_frontiers = dst.oplog_frontiers(); + let before_value = dst.get_deep_value(); + + let result = std::panic::catch_unwind(AssertUnwindSafe(|| dst.import_json_updates(suffix))); + assert!( + result.is_ok(), + "malformed text mark JSON import should not panic" + ); + assert!( + result.unwrap().is_err(), + "malformed text mark JSON import unexpectedly succeeded; imported value = {:?}", + dst.get_deep_value() + ); + assert_eq!(dst.oplog_vv(), before_vv); + assert_eq!(dst.oplog_frontiers(), before_frontiers); + assert_eq!(dst.get_deep_value(), before_value); +} + // --------------------------------------------------------------------------- // 9. Detached tree methods that used to panic — FIXED // --------------------------------------------------------------------------- From 37078aefdbef5c6cd0e34a9ddc30362d41af1ae1 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Sat, 9 May 2026 02:12:49 +0000 Subject: [PATCH 27/46] fix: reject unpaired text marks in JSON import --- .../loro-internal/src/encoding/json_schema.rs | 48 +++++++++++++++++++ crates/loro/tests/panic_test.rs | 42 ++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/crates/loro-internal/src/encoding/json_schema.rs b/crates/loro-internal/src/encoding/json_schema.rs index 9a77518a6..a3a4b9db9 100644 --- a/crates/loro-internal/src/encoding/json_schema.rs +++ b/crates/loro-internal/src/encoding/json_schema.rs @@ -542,6 +542,7 @@ fn decode_changes(json: JsonSchema, arena: &SharedArena) -> LoroResult = RleVec::new(); for op in json_ops { ops.push(decode_op(op, arena, &peers)?); @@ -564,6 +565,53 @@ fn decode_changes(json: JsonSchema, arena: &SharedArena) -> LoroResult LoroResult<()> { + for (i, op) in ops.iter().enumerate() { + let JsonOpContent::Text(text) = &op.content else { + continue; + }; + + match text { + json::TextOp::Mark { .. } => { + let Some(next) = ops.get(i + 1) else { + return Err(LoroError::DecodeError( + "text mark must be immediately followed by mark end".into(), + )); + }; + + if next.container != op.container + || !matches!(&next.content, JsonOpContent::Text(json::TextOp::MarkEnd)) + { + return Err(LoroError::DecodeError( + "text mark must be immediately followed by mark end".into(), + )); + } + } + json::TextOp::MarkEnd => { + let Some(prev) = i.checked_sub(1).and_then(|i| ops.get(i)) else { + return Err(LoroError::DecodeError( + "text mark end must immediately follow text mark".into(), + )); + }; + + if prev.container != op.container + || !matches!( + &prev.content, + JsonOpContent::Text(json::TextOp::Mark { .. }) + ) + { + return Err(LoroError::DecodeError( + "text mark end must immediately follow text mark".into(), + )); + } + } + _ => {} + } + } + + Ok(()) +} + fn decode_op(op: json::JsonOp, arena: &SharedArena, peers: &Option>) -> LoroResult { let json::JsonOp { counter, diff --git a/crates/loro/tests/panic_test.rs b/crates/loro/tests/panic_test.rs index fc86f5124..d9e70b296 100644 --- a/crates/loro/tests/panic_test.rs +++ b/crates/loro/tests/panic_test.rs @@ -327,6 +327,48 @@ fn import_json_updates_with_text_mark_empty_range_should_error_without_panic() { assert_eq!(dst.get_deep_value(), before_value); } +#[test] +#[parallel] +fn import_json_updates_with_text_mark_end_without_mark_should_error_without_panic() { + let src = LoroDoc::new(); + src.set_peer_id(34).unwrap(); + let text = src.get_text("text"); + text.insert(0, "abc").unwrap(); + src.commit(); + let first = src.export_json_updates(&Default::default(), &src.oplog_vv()); + let first_vv = src.oplog_vv(); + + text.mark(0..2, "bold", true).unwrap(); + src.commit(); + let mut suffix = src.export_json_updates(&first_vv, &src.oplog_vv()); + match &mut suffix.changes[0].ops[0].content { + content @ JsonOpContent::Text(TextOp::Mark { .. }) => { + *content = JsonOpContent::Text(TextOp::MarkEnd); + } + other => panic!("expected text mark, got {other:?}"), + } + + let dst = ManuallyDrop::new(LoroDoc::new()); + dst.import_json_updates(first).unwrap(); + let before_vv = dst.oplog_vv(); + let before_frontiers = dst.oplog_frontiers(); + let before_value = dst.get_deep_value(); + + let result = std::panic::catch_unwind(AssertUnwindSafe(|| dst.import_json_updates(suffix))); + assert!( + result.is_ok(), + "malformed text MarkEnd JSON import should not panic" + ); + assert!( + result.unwrap().is_err(), + "malformed text MarkEnd JSON import unexpectedly succeeded; imported value = {:?}", + dst.get_deep_value() + ); + assert_eq!(dst.oplog_vv(), before_vv); + assert_eq!(dst.oplog_frontiers(), before_frontiers); + assert_eq!(dst.get_deep_value(), before_value); +} + // --------------------------------------------------------------------------- // 9. Detached tree methods that used to panic — FIXED // --------------------------------------------------------------------------- From b0257ee74e975ee8c3b4181f9f53f853a32b4eab Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Sat, 9 May 2026 02:36:25 +0000 Subject: [PATCH 28/46] fix: canonicalize frontiers constructors --- crates/loro-internal/src/version/frontiers.rs | 48 ++----------------- .../loro/tests/contracts/version_frontiers.rs | 15 ++++++ 2 files changed, 19 insertions(+), 44 deletions(-) diff --git a/crates/loro-internal/src/version/frontiers.rs b/crates/loro-internal/src/version/frontiers.rs index 9c26268fa..2f2412a0b 100644 --- a/crates/loro-internal/src/version/frontiers.rs +++ b/crates/loro-internal/src/version/frontiers.rs @@ -335,33 +335,13 @@ impl Frontiers { } impl From<&[ID]> for Frontiers { fn from(ids: &[ID]) -> Self { - match ids.len() { - 0 => Frontiers::None, - 1 => Frontiers::ID(ids[0]), - _ => { - let mut map = InternalMap::new(); - for &id in ids { - map.insert(id); - } - Frontiers::Map(map) - } - } + ids.iter().copied().collect() } } impl From> for Frontiers { fn from(ids: Vec) -> Self { - match ids.len() { - 0 => Frontiers::None, - 1 => Frontiers::ID(ids[0]), - _ => { - let mut map = InternalMap::new(); - for id in ids { - map.insert(id); - } - Frontiers::Map(map) - } - } + ids.into_iter().collect() } } @@ -392,33 +372,13 @@ impl From> for Frontiers { impl From<[ID; N]> for Frontiers { fn from(value: [ID; N]) -> Self { - match N { - 0 => Frontiers::None, - 1 => Frontiers::ID(value[0]), - _ => { - let mut map = InternalMap::new(); - for id in value { - map.insert(id); - } - Frontiers::Map(map) - } - } + value.into_iter().collect() } } impl From<&Vec> for Frontiers { fn from(ids: &Vec) -> Self { - match ids.len() { - 0 => Frontiers::None, - 1 => Frontiers::ID(ids[0]), - _ => { - let mut map = InternalMap::new(); - for id in ids { - map.insert(*id); - } - Frontiers::Map(map) - } - } + ids.iter().copied().collect() } } diff --git a/crates/loro/tests/contracts/version_frontiers.rs b/crates/loro/tests/contracts/version_frontiers.rs index 7a55fc2e2..2e3f8f359 100644 --- a/crates/loro/tests/contracts/version_frontiers.rs +++ b/crates/loro/tests/contracts/version_frontiers.rs @@ -32,6 +32,21 @@ fn sorted_ids(frontiers: &Frontiers) -> Vec<(u64, i32)> { ids } +#[test] +fn frontiers_constructors_canonicalize_same_peer_ids() { + let ids = [ID::new(1, 0), ID::new(1, 1)]; + let cases = [ + ("array", Frontiers::from(ids)), + ("vec", Frontiers::from(ids.to_vec())), + ("slice", Frontiers::from(ids.as_slice())), + ]; + + for (name, frontiers) in cases { + assert_eq!(frontiers.len(), 1, "{name}"); + assert_eq!(frontiers.as_single(), Some(ID::new(1, 1)), "{name}"); + } +} + fn sorted_spans(spans: I) -> Vec<(u64, i32, i32)> where I: IntoIterator, From 3622d491645c3329bacfe275f44bf0bae472dcc8 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Sat, 9 May 2026 05:41:59 +0000 Subject: [PATCH 29/46] fix: preserve canonical state-only snapshot frontiers --- crates/fuzz/tests/test.rs | 74 +++++++++++++++++++ .../src/encoding/shallow_snapshot.rs | 12 ++- 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/crates/fuzz/tests/test.rs b/crates/fuzz/tests/test.rs index 41964a5d3..758fa804c 100644 --- a/crates/fuzz/tests/test.rs +++ b/crates/fuzz/tests/test.rs @@ -136,6 +136,80 @@ fn all_fuzz_state_only_before_shallow_root() { ) } +#[test] +fn all_fuzz_state_only_roundtrip_after_diff_apply_text_update() { + test_multi_sites( + 5, + vec![FuzzTarget::All], + &mut [ + Handle { + site: 196, + target: 0, + container: 151, + action: Generic(GenericAction { + value: Container(List), + bool: true, + key: 1835887981, + pos: 15359179523395251565, + length: 6845301837235606980, + prop: 4959913191460359423, + }), + }, + SyncAll, + SyncAll, + SetCommitOptions { + site: 255, + origin: 255, + msg: 93, + }, + Handle { + site: 0, + target: 0, + container: 0, + action: Generic(GenericAction { + value: Container(Unknown(255)), + bool: true, + key: 4294967295, + pos: 3225938275189391359, + length: 7885078839350357357, + prop: 3617008642897571181, + }), + }, + DiffApply { from: 125, to: 178 }, + SetCommitOptions { + site: 242, + origin: 242, + msg: 242, + }, + DiffApply { from: 255, to: 255 }, + SetCommitOptions { + site: 109, + origin: 109, + msg: 109, + }, + SyncAll, + ForkAt { + site: 109, + to: 1835887981, + }, + SyncAll, + SyncAll, + SyncAll, + SyncAll, + SyncAll, + SyncAll, + SyncAll, + SyncAll, + Sync { from: 91, to: 91 }, + Sync { from: 91, to: 91 }, + SyncAll, + StateOnlyRoundTrip { site: 213 }, + SyncAll, + StateOnlyRoundTrip { site: 255 }, + ], + ) +} + #[test] fn test_local_events() { fuzz_local_events(vec![ diff --git a/crates/loro-internal/src/encoding/shallow_snapshot.rs b/crates/loro-internal/src/encoding/shallow_snapshot.rs index 6918315d0..de24cccf3 100644 --- a/crates/loro-internal/src/encoding/shallow_snapshot.rs +++ b/crates/loro-internal/src/encoding/shallow_snapshot.rs @@ -238,7 +238,17 @@ fn calc_shallow_doc_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Fronti } fn calc_state_only_doc_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Frontiers { - calc_shallow_doc_start_from(oplog, frontiers.clone()) + let shrunk = shrink_frontiers(frontiers, oplog.dag()).unwrap_or_else(|_| frontiers.clone()); + if shrunk == *frontiers { + // A canonical state-only target should become the shallow root itself. + // Lowering a concurrent target to its LCA would require replaying ops back + // to the target and can lose the exact target-state boundary. + advance_style_start_frontiers(oplog, frontiers.clone()) + } else { + // Non-canonical targets are used to spell "this shallow root plus later + // frontiers"; preserve that explicit root boundary. + calc_shallow_doc_start_from(oplog, frontiers.clone()) + } } fn normalize_state_only_target_frontiers(oplog: &crate::OpLog, frontiers: &Frontiers) -> Frontiers { From d3d84bb61bbdec57bfa56cda9b3f90b3e61d1635 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Sat, 9 May 2026 07:16:03 +0000 Subject: [PATCH 30/46] fix: ignore cyclic tree moves in one-doc fuzz --- crates/fuzz/src/one_doc_fuzzer.rs | 12 ++++- crates/fuzz/tests/test.rs | 83 +++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 2 deletions(-) diff --git a/crates/fuzz/src/one_doc_fuzzer.rs b/crates/fuzz/src/one_doc_fuzzer.rs index 4f537cc2f..c13a3eb71 100644 --- a/crates/fuzz/src/one_doc_fuzzer.rs +++ b/crates/fuzz/src/one_doc_fuzzer.rs @@ -551,7 +551,11 @@ impl OneDocFuzzer { peer: before.0, counter: before.1, }; - tree.mov_before(target, before).unwrap(); + if let Err(LoroError::TreeError(e)) = + tree.mov_before(target, before) + { + tracing::warn!("move error {}", e); + } } crate::container::TreeActionInner::MoveAfter { target, after } => { let target = TreeID { @@ -562,7 +566,11 @@ impl OneDocFuzzer { peer: after.0, counter: after.1, }; - tree.mov_after(target, after).unwrap(); + if let Err(LoroError::TreeError(e)) = + tree.mov_after(target, after) + { + tracing::warn!("move error {}", e); + } } crate::container::TreeActionInner::Meta { meta: (k, v) } => { let meta = tree.get_meta(target).unwrap(); diff --git a/crates/fuzz/tests/test.rs b/crates/fuzz/tests/test.rs index 758fa804c..037a843ed 100644 --- a/crates/fuzz/tests/test.rs +++ b/crates/fuzz/tests/test.rs @@ -210,6 +210,89 @@ fn all_fuzz_state_only_roundtrip_after_diff_apply_text_update() { ) } +#[test] +fn all_fuzz_one_doc_ignores_cyclic_move_before_error() { + test_multi_sites_on_one_doc( + 5, + &mut [ + Handle { + site: 11, + target: 148, + container: 148, + action: Generic(GenericAction { + value: I32(0), + bool: false, + key: 65296, + pos: 18446744073709551600, + length: 18446744073709551615, + prop: 11240984665823117311, + }), + }, + Query { + site: 125, + target: 125, + query_type: 119, + }, + Handle { + site: 0, + target: 125, + container: 125, + action: Generic(GenericAction { + value: I32(2105376125), + bool: true, + key: 2105376125, + pos: 9042521604759584125, + length: 9042521604759584125, + prop: 9042521604759584125, + }), + }, + ForkAt { + site: 125, + to: 2105376125, + }, + Query { + site: 155, + target: 155, + query_type: 155, + }, + SyncAll, + SyncAll, + SyncAll, + SyncAll, + SyncAll, + SyncAll, + SyncAll, + Handle { + site: 3, + target: 136, + container: 107, + action: Generic(GenericAction { + value: I32(1802075016), + bool: true, + key: 1802201963, + pos: 7740429931049413483, + length: 285424485, + prop: 29704420010754048, + }), + }, + SyncAll, + Handle { + site: 3, + target: 136, + container: 107, + action: Generic(GenericAction { + value: I32(1802070920), + bool: true, + key: 1929407339, + pos: 3026537059180544374, + length: 232891317779694337, + prop: 3124043742624574344, + }), + }, + ], + ) +} + #[test] fn test_local_events() { fuzz_local_events(vec![ From 466c97e15a12334ce970bc0f61251f6c6155f56f Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Sun, 10 May 2026 07:11:36 +0000 Subject: [PATCH 31/46] fix: preserve commit options after failed change travel --- crates/loro-internal/src/loro.rs | 97 ++++++++++++------------ crates/loro/tests/commit_message_test.rs | 29 ++++++- 2 files changed, 78 insertions(+), 48 deletions(-) diff --git a/crates/loro-internal/src/loro.rs b/crates/loro-internal/src/loro.rs index edcfabdfe..965b3dbb9 100644 --- a/crates/loro-internal/src/loro.rs +++ b/crates/loro-internal/src/loro.rs @@ -2358,66 +2358,69 @@ impl LoroDoc { ) -> Result<(), ChangeTravelError> { let (options, guard) = self.implicit_commit_then_stop(); drop(guard); - struct PendingNode(ChangeMeta); - impl PartialEq for PendingNode { - fn eq(&self, other: &Self) -> bool { - self.0.lamport_last() == other.0.lamport_last() && self.0.id.peer == other.0.id.peer + let ans = 'travel: { + struct PendingNode(ChangeMeta); + impl PartialEq for PendingNode { + fn eq(&self, other: &Self) -> bool { + self.0.lamport_last() == other.0.lamport_last() + && self.0.id.peer == other.0.id.peer + } } - } - impl Eq for PendingNode {} - impl PartialOrd for PendingNode { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) + impl Eq for PendingNode {} + impl PartialOrd for PendingNode { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } } - } - impl Ord for PendingNode { - fn cmp(&self, other: &Self) -> Ordering { - self.0 - .lamport_last() - .cmp(&other.0.lamport_last()) - .then_with(|| self.0.id.peer.cmp(&other.0.id.peer)) + impl Ord for PendingNode { + fn cmp(&self, other: &Self) -> Ordering { + self.0 + .lamport_last() + .cmp(&other.0.lamport_last()) + .then_with(|| self.0.id.peer.cmp(&other.0.id.peer)) + } } - } - for id in ids { - let op_log = &self.oplog().lock(); - if !op_log.vv().includes_id(*id) { - return Err(ChangeTravelError::TargetIdNotFound(*id)); - } - if op_log.dag.shallow_since_vv().includes_id(*id) { - return Err(ChangeTravelError::TargetVersionNotIncluded); + for id in ids { + let op_log = &self.oplog().lock(); + if !op_log.vv().includes_id(*id) { + break 'travel Err(ChangeTravelError::TargetIdNotFound(*id)); + } + if op_log.dag.shallow_since_vv().includes_id(*id) { + break 'travel Err(ChangeTravelError::TargetVersionNotIncluded); + } } - } - let mut visited = FxHashSet::default(); - let mut pending: BinaryHeap = BinaryHeap::new(); - for id in ids { - pending.push(PendingNode(ChangeMeta::from_change( - &self.oplog().lock().get_change_at(*id).unwrap(), - ))); - } - while let Some(PendingNode(node)) = pending.pop() { - let deps = node.deps.clone(); - if f(node).is_break() { - break; + let mut visited = FxHashSet::default(); + let mut pending: BinaryHeap = BinaryHeap::new(); + for id in ids { + pending.push(PendingNode(ChangeMeta::from_change( + &self.oplog().lock().get_change_at(*id).unwrap(), + ))); } - - for dep in deps.iter() { - let Some(dep_node) = self.oplog().lock().get_change_at(dep) else { - continue; - }; - if visited.contains(&dep_node.id) { - continue; + while let Some(PendingNode(node)) = pending.pop() { + let deps = node.deps.clone(); + if f(node).is_break() { + break; } - visited.insert(dep_node.id); - pending.push(PendingNode(ChangeMeta::from_change(&dep_node))); + for dep in deps.iter() { + let Some(dep_node) = self.oplog().lock().get_change_at(dep) else { + continue; + }; + if visited.contains(&dep_node.id) { + continue; + } + + visited.insert(dep_node.id); + pending.push(PendingNode(ChangeMeta::from_change(&dep_node))); + } } - } - let ans = Ok(()); + Ok(()) + }; self.renew_txn_if_auto_commit(options); ans } diff --git a/crates/loro/tests/commit_message_test.rs b/crates/loro/tests/commit_message_test.rs index ecf16899b..d143baf18 100644 --- a/crates/loro/tests/commit_message_test.rs +++ b/crates/loro/tests/commit_message_test.rs @@ -1,4 +1,5 @@ -use loro::{CommitOptions, LoroDoc, VersionVector, ID}; +use loro::{ChangeTravelError, CommitOptions, LoroDoc, VersionVector, ID}; +use std::ops::ControlFlow; #[test] fn explicit_empty_commit_swallow_options() { @@ -51,6 +52,32 @@ fn implicit_empty_commit_preserves_options() { assert_eq!(second_change.timestamp(), 200); } +#[test] +fn failed_travel_change_ancestors_preserves_next_commit_options() { + let doc = LoroDoc::new(); + doc.set_peer_id(1).unwrap(); + + doc.set_next_commit_message("after failed travel"); + doc.set_next_commit_timestamp(42); + + let mut noop = |_| ControlFlow::Continue(()); + let err = doc + .travel_change_ancestors(&[ID::new(999, 0)], &mut noop) + .unwrap_err(); + assert!(matches!( + err, + ChangeTravelError::TargetIdNotFound(id) if id == ID::new(999, 0) + )); + + let text = doc.get_text("text"); + text.insert(0, "x").unwrap(); + doc.commit(); + + let change = doc.get_change(ID::new(1, 0)).unwrap(); + assert_eq!(change.message(), "after failed travel"); + assert_eq!(change.timestamp(), 42); +} + #[test] fn test_commit_message() { let doc = LoroDoc::new(); From 4a32d2e1d54590486990c186c9f1fdf7b379e1e0 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Thu, 21 May 2026 10:00:38 +0000 Subject: [PATCH 32/46] fix: tighten import rollback followups --- crates/bench-utils/src/json.rs | 6 +-- crates/delta/src/delta_rope.rs | 12 ++--- crates/fuzz/src/container/tree.rs | 2 +- crates/fuzz/src/one_doc_fuzzer.rs | 2 +- .../src/container/richtext/richtext_state.rs | 4 +- crates/loro-internal/src/diff_calc.rs | 2 +- .../loro-internal/src/encoding/json_schema.rs | 10 ++-- .../src/encoding/shallow_snapshot.rs | 6 +-- crates/loro-internal/src/handler.rs | 7 +-- crates/loro-internal/src/loro.rs | 21 ++++---- crates/loro-internal/src/oplog.rs | 9 ++-- .../loro-internal/src/oplog/change_store.rs | 10 ++-- .../src/oplog/change_store/iter.rs | 2 + crates/loro-internal/src/state/map_state.rs | 12 ++--- .../src/state/movable_list_state.rs | 50 +++++++++---------- crates/loro-internal/src/state/tree_state.rs | 2 +- crates/loro-internal/src/txn.rs | 2 +- crates/loro-wasm/src/convert.rs | 2 +- crates/loro-wasm/src/lib.rs | 2 +- 19 files changed, 78 insertions(+), 85 deletions(-) diff --git a/crates/bench-utils/src/json.rs b/crates/bench-utils/src/json.rs index c0057c76f..dc6f860fc 100644 --- a/crates/bench-utils/src/json.rs +++ b/crates/bench-utils/src/json.rs @@ -53,10 +53,8 @@ impl ActionTrait for JsonAction { fn normalize_value(value: &mut LoroValue) { match value { - LoroValue::Double(f) => { - if f.is_nan() { - *f = 0.0; - } + LoroValue::Double(f) if f.is_nan() => { + *f = 0.0; } LoroValue::List(l) => { for v in l.make_mut().iter_mut() { diff --git a/crates/delta/src/delta_rope.rs b/crates/delta/src/delta_rope.rs index 08730aacb..66e9d2683 100644 --- a/crates/delta/src/delta_rope.rs +++ b/crates/delta/src/delta_rope.rs @@ -392,13 +392,11 @@ impl PartialEq for Delta b.next_with(len).unwrap(); } } - (DeltaItem::Retain { attr, .. }, DeltaItem::Retain { attr: b_attr, .. }) => { - if *attr == *b_attr { - a.next_with(len).unwrap(); - b.next_with(len).unwrap(); - } else { - return false; - } + (DeltaItem::Retain { attr, .. }, DeltaItem::Retain { attr: b_attr, .. }) + if *attr == *b_attr => + { + a.next_with(len).unwrap(); + b.next_with(len).unwrap(); } _ => return false, } diff --git a/crates/fuzz/src/container/tree.rs b/crates/fuzz/src/container/tree.rs index 97576b6cb..639168d2b 100644 --- a/crates/fuzz/src/container/tree.rs +++ b/crates/fuzz/src/container/tree.rs @@ -362,7 +362,7 @@ impl Actionable for TreeAction { } TreeActionInner::MetaDelete { key } => { let meta = super::unwrap(tree.get_meta(target))?; - meta.delete(key); + let _ = meta.delete(key); None } TreeActionInner::MetaClear => { diff --git a/crates/fuzz/src/one_doc_fuzzer.rs b/crates/fuzz/src/one_doc_fuzzer.rs index c13a3eb71..6dc72dcc4 100644 --- a/crates/fuzz/src/one_doc_fuzzer.rs +++ b/crates/fuzz/src/one_doc_fuzzer.rs @@ -654,7 +654,7 @@ impl OneDocFuzzer { undo.clear(); } } - Action::ForkAt { site, to } => { + Action::ForkAt { site, to: _ } => { let frontiers = self.branches[*site as usize].frontiers.clone(); let _forked = self.doc.fork_at(&frontiers); } diff --git a/crates/loro-internal/src/container/richtext/richtext_state.rs b/crates/loro-internal/src/container/richtext/richtext_state.rs index ed72b042a..7ca4aca16 100644 --- a/crates/loro-internal/src/container/richtext/richtext_state.rs +++ b/crates/loro-internal/src/container/richtext/richtext_state.rs @@ -1523,7 +1523,7 @@ impl RichtextState { ) { self.check_cache(); { - debug_assert!(self.style_ranges.as_ref().map_or(true, |x| !x.has_style())); + debug_assert!(self.style_ranges.as_ref().is_none_or(|x| !x.has_style())); let elem = RichtextStateChunk::Text(text); self.clear_cache(); match self.tree.query::(&entity_index) { @@ -2366,7 +2366,7 @@ impl RichtextState { len, &self.len_entity(), ); - debug_assert!(self.style_ranges.as_ref().map_or(true, |x| !x.has_style())); + debug_assert!(self.style_ranges.as_ref().is_none_or(|x| !x.has_style())); self.clear_cache(); let range = pos..pos + len; diff --git a/crates/loro-internal/src/diff_calc.rs b/crates/loro-internal/src/diff_calc.rs index 81eb0089b..844046ceb 100644 --- a/crates/loro-internal/src/diff_calc.rs +++ b/crates/loro-internal/src/diff_calc.rs @@ -68,7 +68,7 @@ pub(crate) mod profiling { } thread_local! { - static PROFILE: RefCell> = RefCell::new(None); + static PROFILE: RefCell> = const { RefCell::new(None) }; } pub(crate) fn begin() { diff --git a/crates/loro-internal/src/encoding/json_schema.rs b/crates/loro-internal/src/encoding/json_schema.rs index a3a4b9db9..30d815ce0 100644 --- a/crates/loro-internal/src/encoding/json_schema.rs +++ b/crates/loro-internal/src/encoding/json_schema.rs @@ -1333,13 +1333,13 @@ pub mod json { D: Deserializer<'de>, { let deps: Vec = Deserialize::deserialize(d)?; - Ok(deps + deps .into_iter() .map(|x| { ID::try_from(x.as_str()) .map_err(|_| serde::de::Error::custom("invalid ID in deps")) }) - .collect::, _>>()?) + .collect::, _>>() } } @@ -1362,7 +1362,7 @@ pub mod json { D: Deserializer<'de>, { let peers: Option> = Deserialize::deserialize(d)?; - Ok(peers + peers .map(|x| { x.into_iter() .map(|x| { @@ -1371,7 +1371,7 @@ pub mod json { }) .collect::, _>>() }) - .transpose()?) + .transpose() } } @@ -1465,7 +1465,7 @@ pub mod json { D: Deserializer<'de>, { let str: String = Deserialize::deserialize(d)?; - if str.len() % 2 != 0 { + if !str.len().is_multiple_of(2) { return Err(serde::de::Error::custom( "invalid fractional index hex length", )); diff --git a/crates/loro-internal/src/encoding/shallow_snapshot.rs b/crates/loro-internal/src/encoding/shallow_snapshot.rs index de24cccf3..b23d85101 100644 --- a/crates/loro-internal/src/encoding/shallow_snapshot.rs +++ b/crates/loro-internal/src/encoding/shallow_snapshot.rs @@ -427,10 +427,8 @@ pub(crate) fn encode_snapshot_at( } doc.app_state().lock().take_events(); - let final_result = match result { + match result { Err(err) => Err(err), Ok(()) => restore_result, - }; - - final_result + } } diff --git a/crates/loro-internal/src/handler.rs b/crates/loro-internal/src/handler.rs index 45b23e33c..3abbf5bd6 100644 --- a/crates/loro-internal/src/handler.rs +++ b/crates/loro-internal/src/handler.rs @@ -2651,10 +2651,7 @@ impl TextHandler { Some((event_index, unicode_index)) }); - match res { - Some(v) => v, - None => return None, - } + res? } }; @@ -3356,7 +3353,7 @@ impl MovableListHandler { Ok(d.value.pop()) } MaybeDetached::Attached(a) => { - if self.len() == 0 { + if self.is_empty() { return Ok(None); } let last = self.len() - 1; diff --git a/crates/loro-internal/src/loro.rs b/crates/loro-internal/src/loro.rs index 965b3dbb9..0209b8d31 100644 --- a/crates/loro-internal/src/loro.rs +++ b/crates/loro-internal/src/loro.rs @@ -387,14 +387,12 @@ impl LoroDoc { options = None; } } - if config.immediate_renew { - if self.can_edit() { - let mut t = self.txn().unwrap(); - if let Some(options) = options.as_ref() { - t.set_options(options.clone()); - } - *txn_guard = Some(t); + if config.immediate_renew && self.can_edit() { + let mut t = self.txn().unwrap(); + if let Some(options) = options.as_ref() { + t.set_options(options.clone()); } + *txn_guard = Some(t); } if let Some(on_commit) = on_commit { @@ -668,6 +666,7 @@ impl LoroDoc { self.import_changes_and_apply_delta_to_state_if_needed( |oplog| encoding::decode_oplog_changes(oplog, parsed), origin, + false, ) // let new_doc = LoroDoc::new(); @@ -679,6 +678,7 @@ impl LoroDoc { EncodeMode::FastUpdates => self.import_changes_and_apply_delta_to_state_if_needed( |oplog| encoding::decode_oplog_changes(oplog, parsed), origin, + false, ), EncodeMode::Auto => { unreachable!() @@ -763,6 +763,7 @@ impl LoroDoc { &self, decode_changes: impl FnOnce(&mut OpLog) -> Result, LoroError>, origin: InternalString, + force_state_apply_rollback: bool, ) -> Result { let mut oplog = self.oplog.lock(); let arena_checkpoint = oplog.arena.checkpoint_for_rollback(); @@ -803,7 +804,8 @@ impl LoroDoc { let old_vv = oplog.vv().clone(); let old_frontiers = oplog.frontiers().clone(); - let rollback_enabled = preflight.needs_state_apply_rollback; + let rollback_enabled = + force_state_apply_rollback || preflight.needs_state_apply_rollback; if rollback_enabled { oplog.begin_import_rollback_with_arena(arena_checkpoint); } @@ -877,6 +879,7 @@ impl LoroDoc { let result = self.import_changes_and_apply_delta_to_state_if_needed( |oplog| crate::encoding::json_schema::decode_json_changes(json, &oplog.arena), Default::default(), + true, ); self.emit_events(); result @@ -2500,7 +2503,7 @@ fn find_last_delete_op(oplog: &OpLog, id: ID, idx: ContainerIdx) -> Option { let op_lamport = change.lamport + (op.counter - change.id().counter) as loro_common::Lamport; let key = (op_lamport, peer); - if best.map_or(true, |(bk, _)| key > bk) { + if best.is_none_or(|(bk, _)| key > bk) { best = Some((key, ID::new(peer, op.counter))); } } diff --git a/crates/loro-internal/src/oplog.rs b/crates/loro-internal/src/oplog.rs index 8023a6093..874bbbfca 100644 --- a/crates/loro-internal/src/oplog.rs +++ b/crates/loro-internal/src/oplog.rs @@ -255,7 +255,7 @@ impl OpLog { if change.ops.iter().any(|op| { matches!( op.container.get_type(), - ContainerType::List | ContainerType::Text | ContainerType::Tree + ContainerType::List | ContainerType::Tree ) }) { ans.needs_state_apply_rollback = true; @@ -264,9 +264,9 @@ impl OpLog { // Any newly applied change can unlock pending changes whose ops are not // visible in `changes`, so include pending in the rollback decision. - // Keep this narrow: map-only pending changes cannot return a state-apply - // error, and forcing rollback there adds lock traffic to small - // sync/import workloads. + // Keep this narrow: text/map-only pending changes cannot return a + // state-apply error, and forcing rollback there adds lock traffic to + // small sync/import workloads. if ans.applies_to_dag && pending_needs_state_apply_rollback { ans.needs_state_apply_rollback = true; } @@ -722,6 +722,7 @@ impl OpLog { .flat_map(move |span| self.change_store.iter_changes(span)) } + #[allow(dead_code)] pub(crate) fn iter_changes_causally_rev<'a>( &'a self, from: &VersionVector, diff --git a/crates/loro-internal/src/oplog/change_store.rs b/crates/loro-internal/src/oplog/change_store.rs index 1b5aa8a98..cf147b644 100644 --- a/crates/loro-internal/src/oplog/change_store.rs +++ b/crates/loro-internal/src/oplog/change_store.rs @@ -427,6 +427,7 @@ impl ChangeStore { }) } + #[allow(dead_code)] pub(crate) fn get_blocks_in_range(&self, id_span: IdSpan) -> VecDeque> { let mut inner = self.inner.lock(); let start_counter = inner @@ -824,7 +825,7 @@ mod mut_inner_kv { panic!("counter should be continuous") } - if let Some(rollback) = rollback.as_deref_mut() { + if let Some(rollback) = rollback.as_mut() { rollback.record_block_before_mutation(*_id, block.clone()); } @@ -1071,7 +1072,7 @@ mod mut_inner_kv { if !new_change.ops.is_empty() { total_len += new_change.atom_len(); - self.insert_change_inner(new_change, false, false, rollback.as_deref_mut()); + self.insert_change_inner(new_change, false, false, rollback); } assert_eq!(total_len, original_len); @@ -1200,8 +1201,7 @@ mod mut_inner_kv { let mut inner = self.inner.lock(); let Some((next_back_id, next_back_bytes)) = kv .scan(Bound::Unbounded, Bound::Included(&id.to_bytes())) - .filter(|(id, _)| id.len() == 12) - .next_back() + .rfind(|(id, _)| id.len() == 12) else { return; }; @@ -1314,6 +1314,7 @@ impl ChangesBlock { }) } + #[allow(dead_code)] pub(crate) fn content(&self) -> &ChangesBlockContent { &self.content } @@ -1586,6 +1587,7 @@ impl ChangesBlockContent { } } + #[allow(dead_code)] pub(crate) fn len_changes(&self) -> usize { match self { ChangesBlockContent::Changes(changes) => changes.len(), diff --git a/crates/loro-internal/src/oplog/change_store/iter.rs b/crates/loro-internal/src/oplog/change_store/iter.rs index 27bba4113..153eaf0a2 100644 --- a/crates/loro-internal/src/oplog/change_store/iter.rs +++ b/crates/loro-internal/src/oplog/change_store/iter.rs @@ -1,3 +1,5 @@ +#![allow(dead_code)] + use std::{ collections::{BinaryHeap, VecDeque}, sync::Arc, diff --git a/crates/loro-internal/src/state/map_state.rs b/crates/loro-internal/src/state/map_state.rs index a8863eb27..ed9854e36 100644 --- a/crates/loro-internal/src/state/map_state.rs +++ b/crates/loro-internal/src/state/map_state.rs @@ -176,18 +176,14 @@ impl MapState { } match (&result, value_yes) { - (Some(x), true) => { - if x.value.is_none() { - self.size += 1; - } + (Some(x), true) if x.value.is_none() => { + self.size += 1; } (None, true) => { self.size += 1; } - (Some(x), false) => { - if x.value.is_some() { - self.size -= 1; - } + (Some(x), false) if x.value.is_some() => { + self.size -= 1; } _ => {} }; diff --git a/crates/loro-internal/src/state/movable_list_state.rs b/crates/loro-internal/src/state/movable_list_state.rs index 93df44855..135476ae7 100644 --- a/crates/loro-internal/src/state/movable_list_state.rs +++ b/crates/loro-internal/src/state/movable_list_state.rs @@ -1133,37 +1133,35 @@ impl ContainerState for MovableListState { match self.inner.elements().get(&elem_id).cloned() { Some(elem) => { // Update value if needed - if value_id.is_some() - && elem.value != value - && (!need_compare || elem.value_id < value_id.unwrap()) - { - maybe_moved.remove(&elem_id); - self.inner - .update_value(elem_id, value.clone(), value_id.unwrap()); - let index = self.get_index_of_elem(elem_id); - if let Some(index) = index { - event.compose( - &DeltaRopeBuilder::new() - .retain(index, Default::default()) - .delete(1) - .insert( - ArrayVec::from([ValueOrHandler::from_value( - value, doc, - )]), - ListDeltaMeta { from_move: false }, - ) - .build(), - ) + if let Some(value_id) = value_id { + if elem.value != value && (!need_compare || elem.value_id < value_id) { + maybe_moved.remove(&elem_id); + self.inner.update_value(elem_id, value.clone(), value_id); + let index = self.get_index_of_elem(elem_id); + if let Some(index) = index { + event.compose( + &DeltaRopeBuilder::new() + .retain(index, Default::default()) + .delete(1) + .insert( + ArrayVec::from([ValueOrHandler::from_value( + value, doc, + )]), + ListDeltaMeta { from_move: false }, + ) + .build(), + ) + } } } // Update pos if needed - if pos.is_some() - && elem.pos != pos.unwrap() - && (!need_compare || elem.pos < pos.unwrap()) - { + if let Some(pos) = pos { + if elem.pos == pos || (need_compare && elem.pos >= pos) { + continue; + } // don't need to update old list item, because it's handled by list diff already - let result = self.inner.update_pos(elem_id, pos.unwrap(), false); + let result = self.inner.update_pos(elem_id, pos, false); let result = self.inner.convert_update_to_event_pos(result); if let Some(new_index) = result.insert { let new_value = diff --git a/crates/loro-internal/src/state/tree_state.rs b/crates/loro-internal/src/state/tree_state.rs index d97c646e8..589b4758b 100644 --- a/crates/loro-internal/src/state/tree_state.rs +++ b/crates/loro-internal/src/state/tree_state.rs @@ -1715,7 +1715,7 @@ mod snapshot { .iter() .map(|x| TreeID::new(peers[x.peer_idx], x.counter)) .collect_vec(); - for (node_id, node) in node_ids.iter().zip(encoded.nodes.into_iter()) { + for (node_id, node) in node_ids.iter().zip(encoded.nodes) { // PERF: we don't need to mov the deleted node, instead we can cache them // If the parent is TreeParentId::Deleted, then all the nodes afterwards are deleted tree._init_push_tree_node_in_order( diff --git a/crates/loro-internal/src/txn.rs b/crates/loro-internal/src/txn.rs index b4185e013..26d4606c1 100644 --- a/crates/loro-internal/src/txn.rs +++ b/crates/loro-internal/src/txn.rs @@ -906,7 +906,7 @@ fn change_to_diff( }), EventHint::Tree(tree_diff) => { let mut diff = TreeDiff::default(); - diff.diff.extend(tree_diff.into_iter()); + diff.diff.extend(tree_diff); ans.push(TxnContainerDiff { idx: container_idx, diff: Diff::Tree(diff), diff --git a/crates/loro-wasm/src/convert.rs b/crates/loro-wasm/src/convert.rs index 154c86743..6fc8855ad 100644 --- a/crates/loro-wasm/src/convert.rs +++ b/crates/loro-wasm/src/convert.rs @@ -36,7 +36,7 @@ fn js_wbg_ptr(js: &JsValue) -> JsResult { } fn validate_wbg_ptr_alignment(ptr: u32) -> Result<(), &'static str> { - if (ptr as usize) % std::mem::align_of::>() != 0 { + if !(ptr as usize).is_multiple_of(std::mem::align_of::>()) { return Err("Invalid wasm-bindgen pointer alignment"); } diff --git a/crates/loro-wasm/src/lib.rs b/crates/loro-wasm/src/lib.rs index 275a742fd..764d2b4f0 100644 --- a/crates/loro-wasm/src/lib.rs +++ b/crates/loro-wasm/src/lib.rs @@ -90,7 +90,7 @@ type JsResult = Result; type EventCallback = Box bool + Send + Sync + 'static>; thread_local! { - static IN_PRE_COMMIT_CALLBACK: Cell = Cell::new(false); + static IN_PRE_COMMIT_CALLBACK: Cell = const { Cell::new(false) }; } /// The CRDTs document. Loro supports different CRDTs include [**List**](LoroList), From d67d53438060eb078cdddb3c84e9cb29911b0e29 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Thu, 21 May 2026 10:22:13 +0000 Subject: [PATCH 33/46] refactor: centralize import rollback container check --- crates/loro-common/src/lib.rs | 20 +++++++++++++++++++ .../loro-internal/src/encoding/json_schema.rs | 3 +-- crates/loro-internal/src/loro.rs | 3 +-- crates/loro-internal/src/oplog.rs | 13 ++++++------ .../src/oplog/pending_changes.rs | 14 +++++-------- 5 files changed, 33 insertions(+), 20 deletions(-) diff --git a/crates/loro-common/src/lib.rs b/crates/loro-common/src/lib.rs index a7b6ba4a3..63d96a932 100644 --- a/crates/loro-common/src/lib.rs +++ b/crates/loro-common/src/lib.rs @@ -333,6 +333,26 @@ impl ContainerType { } } + /// Returns whether importing ops for this container type may need import + /// rollback protection during state diff application. + /// + /// This is used by import preflight: if an imported or newly-unblocked + /// pending change touches one of these container types, the oplog enables + /// rollback bookkeeping before applying the change to the document state. + /// Keep this list aligned with container states whose diff validation or + /// application can return an error after the oplog has already advanced. + /// + /// Container types not listed here may still be complex, but their current + /// state-apply path does not report recoverable errors through + /// `LoroResult`, so enabling rollback for them would only add import + /// overhead. + pub fn may_need_state_apply_rollback(&self) -> bool { + matches!( + self, + ContainerType::List | ContainerType::Text | ContainerType::Tree + ) + } + pub fn to_u8(self) -> u8 { match self { ContainerType::Map => 0, diff --git a/crates/loro-internal/src/encoding/json_schema.rs b/crates/loro-internal/src/encoding/json_schema.rs index 30d815ce0..0b1917b26 100644 --- a/crates/loro-internal/src/encoding/json_schema.rs +++ b/crates/loro-internal/src/encoding/json_schema.rs @@ -1333,8 +1333,7 @@ pub mod json { D: Deserializer<'de>, { let deps: Vec = Deserialize::deserialize(d)?; - deps - .into_iter() + deps.into_iter() .map(|x| { ID::try_from(x.as_str()) .map_err(|_| serde::de::Error::custom("invalid ID in deps")) diff --git a/crates/loro-internal/src/loro.rs b/crates/loro-internal/src/loro.rs index 0209b8d31..3c65f82dd 100644 --- a/crates/loro-internal/src/loro.rs +++ b/crates/loro-internal/src/loro.rs @@ -804,8 +804,7 @@ impl LoroDoc { let old_vv = oplog.vv().clone(); let old_frontiers = oplog.frontiers().clone(); - let rollback_enabled = - force_state_apply_rollback || preflight.needs_state_apply_rollback; + let rollback_enabled = force_state_apply_rollback || preflight.needs_state_apply_rollback; if rollback_enabled { oplog.begin_import_rollback_with_arena(arena_checkpoint); } diff --git a/crates/loro-internal/src/oplog.rs b/crates/loro-internal/src/oplog.rs index 874bbbfca..2e670adf4 100644 --- a/crates/loro-internal/src/oplog.rs +++ b/crates/loro-internal/src/oplog.rs @@ -26,7 +26,7 @@ use crate::span::{HasCounterSpan, HasLamportSpan}; use crate::version::{Frontiers, ImVersionVector, VersionVector}; use crate::LoroError; use change_store::{BlockOpRef, ChangeStoreRollback}; -use loro_common::{ContainerType, HasIdSpan, IdLp, IdSpan}; +use loro_common::{HasIdSpan, IdLp, IdSpan}; use rle::{HasLength, RleVec, Sliceable}; use smallvec::SmallVec; @@ -252,12 +252,11 @@ impl OpLog { } ans.applies_to_dag = true; - if change.ops.iter().any(|op| { - matches!( - op.container.get_type(), - ContainerType::List | ContainerType::Tree - ) - }) { + if change + .ops + .iter() + .any(|op| op.container.get_type().may_need_state_apply_rollback()) + { ans.needs_state_apply_rollback = true; } } diff --git a/crates/loro-internal/src/oplog/pending_changes.rs b/crates/loro-internal/src/oplog/pending_changes.rs index 95f37f2b8..84ad1d9c1 100644 --- a/crates/loro-internal/src/oplog/pending_changes.rs +++ b/crates/loro-internal/src/oplog/pending_changes.rs @@ -5,9 +5,7 @@ use crate::{ version::{ImVersionVector, VersionRange}, OpLog, VersionVector, }; -use loro_common::{ - ContainerType, Counter, CounterSpan, HasCounterSpan, HasIdSpan, LoroResult, PeerID, ID, -}; +use loro_common::{Counter, CounterSpan, HasCounterSpan, HasIdSpan, LoroResult, PeerID, ID}; use rustc_hash::FxHashMap; #[derive(Debug, Clone)] @@ -40,12 +38,10 @@ impl PendingChanges { self.changes.values().any(|tree| { tree.values().any(|changes| { changes.iter().any(|change| { - change.ops.iter().any(|op| { - matches!( - op.container.get_type(), - ContainerType::List | ContainerType::Text | ContainerType::Tree - ) - }) + change + .ops + .iter() + .any(|op| op.container.get_type().may_need_state_apply_rollback()) }) }) }) From 507aff6023835b19aaa4f667de52558563dd4884 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 22 May 2026 09:35:10 +0000 Subject: [PATCH 34/46] docs: plan fast diff calc span routing --- plans/20260522-fast-diff-calc.md | 431 +++++++++++++++++++++++++++++++ 1 file changed, 431 insertions(+) create mode 100644 plans/20260522-fast-diff-calc.md diff --git a/plans/20260522-fast-diff-calc.md b/plans/20260522-fast-diff-calc.md new file mode 100644 index 000000000..3012f1b76 --- /dev/null +++ b/plans/20260522-fast-diff-calc.md @@ -0,0 +1,431 @@ +# Fast Diff Calc Tracker Span Routing Plan + +Date: 2026-05-22 + +## Goal + +Improve checkout diff calculation for documents with many peers and many text/list-like containers by avoiding repeated version-vector scanning and repeated empty `IdToCursor` lookups inside each container tracker. + +The target direction is to move tracker checkout APIs away from "checkout to target `VersionVector`" and toward "apply these directed counter spans". The diff calculator should compute or route the relevant spans once, then pass only container-relevant spans into each tracker. + +## Current Architecture + +The current checkout diff flow is: + +1. `OpLog::iter_from_lca_causally()` finds the LCA between `before` and `after`, computes the merged VV, and iterates changes causally from the LCA to the merged version. +2. `DiffCalculator::calc_diff_internal()` iterates each change/op and calls the per-container calculator. +3. Before applying the first op for a container in each change, the container calculator asks its tracker to checkout to the causal version immediately before that op. +4. `RichtextTracker::checkout_causal(CausalVersion)` computes `retreat` and `forward` spans internally by comparing its current VV against the target causal version. +5. `RichtextTracker::_checkout_spans()` applies the resulting spans by iterating `IdToCursor`. +6. At final diff materialization, `RichtextTracker::diff(from_vv, to_vv)` again does two full tracker checkouts: first to `from`, then to `to` with diff status enabled. + +Important current details: + +- `IdToCursor` is already internally keyed by `PeerID`: `FxHashMap>`. +- Empty `IdToCursor::iter(span)` is cheap for a single call, but expensive when multiplied by many containers and many checkout steps. +- `CounterSpan` already has direction semantics. `start < end` is forward, `end < start` is reversed/retreat. `content_len()` uses absolute length, and `slice()` preserves direction. +- Existing `VersionVectorDiff` uses separate `retreat` and `forward` maps. Its internal `merge()` normalizes spans, so it should not be reused as-is for a single directed-span map API. +- `current_frontier_hint` is only maintained in the tracker today. It is not used as a fast path, and a single frontier hint is not enough to prove full causal equality. + +## Main Performance Problem + +The biggest waste is not just wide VV comparison. It is that a global peer span is handed to every affected text/list-like tracker even when that container has no op in that peer/counter range. + +Example: + +```text +global delta: peer 7, 0..1_000_000 +containers: 1000 LoroText roots +container A has peer 7 ops +container B..Z have no peer 7 ops in that span +``` + +Without container routing, every tracker still checks `id_to_cursor.iter(peer 7, 0..1_000_000)`. + +The proposed cache: + +```rust +FxHashMap> +``` + +is a good first-order way to skip most of these empty checks. + +## Critical Semantic Split + +There are two similar but different structures. They should not be conflated. + +### 1. Persistent Container Coverage Cache + +This answers: + +> Could this container possibly have any op from this peer in this counter range? + +Recommended representation: + +```rust +type ContainerPeerCoverage = FxHashMap>; +``` + +For coverage, spans should be treated as a coarse normalized min/max range. Direction is not meaningful because coverage is independent of checkout direction. + +False positives are allowed: + +```text +container has peer 1 ops at 10 and 1000 +coverage stores 10..1001 +query 500..600 falsely says "maybe" +``` + +False negatives are not allowed. + +### 2. Per-Checkout Directed Delta + +This answers: + +> From this tracker's current visibility state to the target visibility state, which peer/counter spans should be forwarded or retreated? + +Recommended representation: + +```rust +type DirectedPeerSpans = FxHashMap; +``` + +Here `CounterSpan` direction is meaningful: + +```rust +CounterSpan::new(10, 20) // forward 10..20 +CounterSpan::new(20, 10) // retreat 20..10 +``` + +For a single transition, a given peer should only have one direction. If implementation ever needs both directions for the same peer, that means it is combining multiple transitions and must flush or split the delta. + +## Proposed Design + +### New Internal Types + +Start with explicit names even if they are just type aliases initially: + +```rust +type PeerSpanMap = FxHashMap; + +struct ContainerOpCoverage { + by_container: FxHashMap, +} + +struct TrackerCheckoutSpans { + by_peer: PeerSpanMap, +} +``` + +`ContainerOpCoverage` stores broad normalized coverage. `TrackerCheckoutSpans` stores directed per-transition spans. + +Do not encode checkout direction in a persistent coverage cache. Preserve direction only in `TrackerCheckoutSpans`. + +### Span Operations Required + +Add helpers instead of using `CounterSpan::get_intersection()` directly. The existing intersection helper assumes forward spans. + +Needed helpers: + +```rust +fn normalized_overlap(a: CounterSpan, b: CounterSpan) -> Option<(Counter, Counter)>; + +fn intersect_preserve_direction( + directed: CounterSpan, + coverage: CounterSpan, +) -> Option; + +fn extend_coverage(coverage: &mut CounterSpan, op_span: CounterSpan); + +fn merge_directed_delta(existing: &mut CounterSpan, incoming: CounterSpan) -> Result<(), MixedDirection>; +``` + +Rules: + +- Coverage should store normalized min/max ranges. +- Directed delta should preserve `start/end` direction. +- Intersecting a reversed directed span with coverage must return a reversed span. +- Merging directed deltas must reject mixed directions for the same peer in one transition. + +### Tracker API + +Add a new API: + +```rust +impl Tracker { + pub(crate) fn checkout_peer_spans(&mut self, spans: &PeerSpanMap); +} +``` + +Execution rules: + +1. Iterate reversed spans first and run retreat logic. +2. Iterate forward spans second and run forward logic. +3. For `IdToCursor::iter`, use normalized spans internally. +4. Update `current_vv` using the directed span endpoint: + - forward `10..20` sets peer end to `20` + - retreat `20..10` sets peer end to `10` + +Keep adapters temporarily: + +```rust +checkout(&VersionVector) +checkout_causal(CausalVersion) +``` + +These adapters can compute directed spans and call `checkout_peer_spans()`. That keeps the first step behavior-preserving. + +Remove `current_frontier_hint` after `checkout_peer_spans()` is in place. It is not a strong enough invariant and becomes unnecessary. + +### Diff Calculator Routing + +`DiffCalculator` should become responsible for deciding which spans are relevant to a container before calling the tracker. + +For each container calculator, maintain or access `ContainerOpCoverage`. + +When a global directed span is produced: + +```rust +global: peer 7, 1000..2000 +container coverage: peer 7, 1500..1600 +directed for tracker: peer 7, 1500..1600 +``` + +For retreat: + +```rust +global: peer 7, 2000..1000 +coverage: peer 7, 1500..1600 +directed for tracker: peer 7, 1599..1499 or equivalent reversed slice +``` + +The exact reversed boundary helper must be tested carefully against `CounterSpan::contains`, `min`, `max`, and `norm_end`. + +### Where Coverage Comes From + +Preferred first implementation: per-diff-calculation, opportunistic coverage. + +- When a container tracker applies an op, record that op's counter span into the coverage for that container. +- This coverage describes ops already known by that tracker. +- It is sufficient to filter most empty retreat/forward checks because tracker cannot act on op ids it has not seen yet anyway. + +Special cases: + +- Trackers seeded from shallow-root state chunks must also seed coverage for those chunks. Otherwise a later retreat over seeded content could be falsely skipped. +- Delete/move op counters must be recorded. The delete/move may refer to insert spans from other peers, but the version visibility toggle is caused by the delete/move op id itself. +- Style start/end ops must be recorded under their actual op ids, not only text insert ids. + +Longer-term option: a persistent OpLog-level container op coverage index. This may be worth it if building coverage per diff calculation still costs too much, but it increases invalidation and shallow-history complexity. + +## Design Risks + +### Risk 1: False Negatives + +False negatives in coverage are correctness bugs. They can leave tracker rope visibility wrong. + +Mitigation: + +- In debug/test builds, compare the new filtered spans against the old unfiltered checkout for selected cases. +- Add assertions that any `IdToCursor` entry affected by the old global span is included by the filtered span. + +### Risk 2: Direction Loss + +Existing code often normalizes spans: + +- `IdToCursor::iter()` normalizes its input. +- `VersionVectorDiff::merge()` normalizes target spans. +- `IdSpan::ctr_start()` returns normalized start. + +This is fine for lookup, but not for representing a transition. The new directed API must preserve direction until after it updates `current_vv`. + +### Risk 3: Mixed Direction for Same Peer + +For one transition from current to target, a peer cannot both advance and retreat. But if the implementation accumulates spans across multiple transitions before flushing, mixed direction can appear. + +Mitigation: + +- Scope `TrackerCheckoutSpans` to a single target checkout. +- Reject mixed direction in helper code. + +### Risk 4: Final Diff Still Uses Full VV Checkout + +`RichtextTracker::diff(from, to)` currently calls: + +```rust +checkout(from) +checkout(to, on_diff_status = true) +``` + +If only `apply_change()` checkout is optimized, final diff materialization may still scan full VVs for every text container. + +Plan must include `diff_by_spans(from_spans, to_spans)` or equivalent container-filtered final diff checkout. + +### Risk 5: Sliced Changes and Partial Ops + +`calc_diff_internal()` may slice ops when the replay range starts or ends in the middle of a change/op. + +The filtered spans must align with the actual op slice being applied. It is acceptable for coverage to be broader than exact op slices, but it must never omit a sliced op that the tracker can see. + +### Risk 6: Shallow Snapshot / Unknown Chunks + +The current branch can seed richtext trackers from shallow-root state chunks. Coverage seeding must understand those chunks, or shallow checkout may skip spans that correspond to already-seeded tracker entries. + +Unknown chunks and GC/shallow root fallback paths should remain conservative: if coverage cannot be proven, pass the original global span through. + +## Phased Plan + +### Phase 0: Measurement + +Add test-utils profiling counters around tracker checkout: + +- number of global checkout spans +- number of container-filtered spans +- number of spans skipped by coverage +- number of `IdToCursor::iter` calls +- number of empty `IdToCursor::iter` calls +- max/avg peers per checkout +- max/avg affected containers per checkout + +Use current benchmarks that model many peers and many text roots. Keep before/after numbers in benchmark notes. + +### Phase 1: Extract Tracker Span API + +Implement: + +```rust +Tracker::checkout_peer_spans(&PeerSpanMap) +``` + +Keep existing APIs as adapters: + +- `checkout(&VersionVector)` +- `checkout_causal(CausalVersion)` +- `diff(from_vv, to_vv)` + +No routing yet. This phase should be behavior-preserving. + +Remove `current_frontier_hint` in this phase if no longer needed. + +Verification: + +- tracker unit tests +- existing richtext/list/movable-list diff tests +- `cargo test -p loro-internal checkout` +- focused fuzz artifacts that previously hit checkout/diff calc + +### Phase 2: Add Container Coverage and Span Filtering + +Introduce `ContainerOpCoverage` in diff calc or in each container calculator. + +Start with per-diff-calculation coverage: + +- record op spans when a tracker applies an op +- seed coverage when tracker is seeded from existing state chunks +- use coverage to filter global directed spans before calling `checkout_peer_spans` + +Keep fallback conservative: + +- if no coverage exists for a container, use current behavior +- if helper cannot safely preserve direction, use current behavior + +Verification: + +- debug comparison mode: run old unfiltered checkout and new filtered checkout on cloned trackers for small tests +- tests with many containers where only one container has ops in a wide peer span +- tests for reversed/retreat spans +- tests for sliced ops +- tests for delete/move/style ops + +### Phase 3: Container-Filtered Final Diff + +Add tracker diff API that accepts directed/container-filtered spans: + +```rust +Tracker::diff_by_spans(from_delta, to_delta) +``` + +or split it into: + +```rust +checkout_peer_spans(from_spans) +checkout_peer_spans_mark_diff(to_spans) +``` + +This avoids doing full `from_vv` and `to_vv` checkouts for every richtext tracker during final diff materialization. + +Verification: + +- compare final `InternalDiff::RichtextRaw` against old implementation +- include shallow-root seeded trackers +- include multi-frontier checkout + +### Phase 4: Optimize Representation + +Only after benchmarks show the map overhead matters, introduce inline variants: + +```rust +enum PeerSpanSet { + Empty, + One(PeerID, CounterSpan), + Small(SmallVec<[(PeerID, CounterSpan); 4]>), + Map(FxHashMap), +} +``` + +Do not start with this. It adds complexity before proving the basic routing wins. + +### Phase 5: Optional Persistent Coverage Index + +If per-diff coverage construction still costs too much, consider an OpLog/history-cache-level index: + +```rust +ContainerIdx -> PeerID -> CounterSpan coverage +``` + +This must handle: + +- import rollback +- shallow snapshot boundaries +- unknown containers +- history cache invalidation/freeing +- change-store compaction + +Because of those lifecycle risks, keep it as a later optimization. + +## Recommended First PR Scope + +Do not implement the full cache in one PR. + +First PR should: + +1. Introduce directed span helpers with tests. +2. Add `Tracker::checkout_peer_spans`. +3. Make `checkout` and `checkout_causal` delegate to it. +4. Remove `current_frontier_hint`. +5. Add profiling counters for skipped/empty span checks, even if routing is not active yet. + +Second PR should: + +1. Add `ContainerOpCoverage`. +2. Filter checkout spans per container. +3. Keep a conservative fallback path. +4. Add correctness comparison tests. + +This reduces blast radius and gives a clean place to benchmark API refactor vs container routing separately. + +## Open Questions + +1. Should coverage live in `DiffCalculator`, `RichtextDiffCalculator`, or a shared tracker layer? +2. How should shallow-root seeded richtext tracker coverage be initialized for style chunks? +3. Should movable-list use the same tracker span API immediately, or be migrated after richtext/list? +4. Is coarse one-span-per-container-peer enough for the known benchmarks, or do sparse same-peer histories require `SmallVec` later? +5. Should `VersionVectorDiff` be adapted to expose directed spans, or should this remain a separate type to avoid changing existing semantics? + +## Current Recommendation + +Proceed with the span-routing design, but keep two invariants explicit: + +1. Persistent coverage is broad and directionless. +2. Per-checkout deltas are directed and scoped to a single transition. + +This design is more general than `current_frontier_hint`, directly addresses the many-container empty-lookup cost, and can be introduced incrementally with conservative fallback paths. From c350b0e81387e0dcf486223b5c986f3056432fb9 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 22 May 2026 09:48:48 +0000 Subject: [PATCH 35/46] bench: add many text checkout scenario --- crates/loro-internal/benches/text_checkout.rs | 168 +++++++++++++++++- 1 file changed, 160 insertions(+), 8 deletions(-) diff --git a/crates/loro-internal/benches/text_checkout.rs b/crates/loro-internal/benches/text_checkout.rs index 06fe8dfd9..da5d3da53 100644 --- a/crates/loro-internal/benches/text_checkout.rs +++ b/crates/loro-internal/benches/text_checkout.rs @@ -22,6 +22,9 @@ mod text_checkout { peer_count: usize, change_count: usize, base_len: usize, + text_container_count: usize, + large_text_container_count: usize, + large_text_len: usize, version_count: usize, subscribed: bool, } @@ -110,6 +113,13 @@ mod text_checkout { let peer_count = env_usize("LORO_TEXT_CHECKOUT_PEERS", 1000).max(1); let base_len = env_usize("LORO_TEXT_CHECKOUT_BASE_LEN", 8192).max(1); let sequential_changes = env_usize("LORO_TEXT_CHECKOUT_CHANGES", peer_count.max(1000)); + let text_container_count = env_usize("LORO_TEXT_CHECKOUT_TEXT_CONTAINERS", 10_000).max(1); + let large_text_container_count = + env_usize("LORO_TEXT_CHECKOUT_LARGE_TEXT_CONTAINERS", 8).min(text_container_count); + let small_text_len = env_usize("LORO_TEXT_CHECKOUT_SMALL_TEXT_LEN", 8); + let large_text_len = env_usize("LORO_TEXT_CHECKOUT_LARGE_TEXT_LEN", 65_536); + let container_edit_count = + env_usize("LORO_TEXT_CHECKOUT_CONTAINER_EDITS", text_container_count).max(1); let mut group = c.benchmark_group("text checkout"); group.sample_size(10); @@ -164,6 +174,19 @@ mod text_checkout { "code/checkout-to-latest-linear", build_code_like_history(sequential_changes, base_len, 1, false), ); + bench_checkout_latest_to_base_fixture( + &mut group, + "multi-container/latest-to-base", + build_many_text_container_history( + peer_count, + text_container_count, + large_text_container_count, + small_text_len, + large_text_len, + container_edit_count, + false, + ), + ); group.finish(); } @@ -210,6 +233,45 @@ mod text_checkout { maybe_report_profile(name, stats, &totals, state_profile); } + fn bench_checkout_latest_to_base_fixture( + group: &mut BenchmarkGroup<'_, WallTime>, + name: &str, + fixture: CheckoutFixture, + ) { + let CheckoutFixture { + doc, + frontiers, + stats, + _subscription, + } = fixture; + let base_frontier = frontiers.first().unwrap().clone(); + let latest_frontier = frontiers.last().unwrap().clone(); + let mut totals = ProfileTotals::default(); + + group.bench_with_input( + BenchmarkId::new(name, stats.version_count), + &base_frontier, + |b, base_frontier| { + b.iter_custom(|iters| { + let mut measured = Duration::ZERO; + for _ in 0..iters { + doc.checkout(&latest_frontier).unwrap(); + let start = std::time::Instant::now(); + let profile = doc.checkout_with_profile(base_frontier).unwrap(); + measured += start.elapsed(); + totals.add(profile); + black_box(profile); + } + + measured + }); + }, + ); + + let state_profile = doc.text_state_profile("text"); + maybe_report_profile(name, stats, &totals, state_profile); + } + fn bench_checkout_to_latest_fixture( group: &mut BenchmarkGroup<'_, WallTime>, name: &str, @@ -294,6 +356,9 @@ mod text_checkout { peer_count, change_count: peer_count, base_len, + text_container_count: 1, + large_text_container_count: 0, + large_text_len: 0, version_count: peer_count + 1, subscribed, }, @@ -315,9 +380,8 @@ mod text_checkout { let mut frontiers = Vec::with_capacity(peer_count + 1); frontiers.push(doc.oplog_frontiers()); let mut rng = StdRng::seed_from_u64(6); - let mut len = base_len; - for peer in 0..peer_count { + for (peer, len) in (0..peer_count).zip(base_len..) { let snapshot = doc.export(ExportMode::snapshot()).unwrap(); let base_vv = doc.oplog_vv(); let peer_doc = doc_from_snapshot(&snapshot, peer as PeerID + 2); @@ -327,7 +391,6 @@ mod text_checkout { peer_doc.commit_then_renew(); let update = peer_doc.export(ExportMode::updates(&base_vv)).unwrap(); doc.import(&update).unwrap(); - len += 1; frontiers.push(doc.oplog_frontiers()); } @@ -339,6 +402,9 @@ mod text_checkout { peer_count, change_count: peer_count, base_len, + text_container_count: 1, + large_text_container_count: 0, + large_text_len: 0, version_count: peer_count + 1, subscribed, }, @@ -386,6 +452,9 @@ mod text_checkout { peer_count, change_count: peer_count, base_len, + text_container_count: 1, + large_text_container_count: 0, + large_text_len: 0, version_count: peer_count + 1, subscribed, }, @@ -425,6 +494,9 @@ mod text_checkout { peer_count, change_count: peer_count, base_len, + text_container_count: 1, + large_text_container_count: 0, + large_text_len: 0, version_count: peer_count + 1, subscribed, }, @@ -478,6 +550,9 @@ mod text_checkout { peer_count: 1, change_count, base_len, + text_container_count: 1, + large_text_container_count: 0, + large_text_len: 0, version_count: change_count + 1, subscribed, }, @@ -485,6 +560,71 @@ mod text_checkout { ) } + fn build_many_text_container_history( + peer_count: usize, + text_container_count: usize, + large_text_container_count: usize, + small_text_len: usize, + large_text_len: usize, + edit_count: usize, + subscribed: bool, + ) -> CheckoutFixture { + let doc = LoroDoc::new_auto_commit(); + doc.set_peer_id(1).unwrap(); + let small_text = repeated_text(small_text_len); + let large_text = repeated_text(large_text_len); + let mut texts = Vec::with_capacity(text_container_count); + let mut lens = Vec::with_capacity(text_container_count); + + for idx in 0..text_container_count { + let name = text_container_name(idx); + let text = doc.get_text(name.as_str()); + let initial = if idx < large_text_container_count { + &large_text + } else { + &small_text + }; + if !initial.is_empty() { + text.insert(0, initial, PosType::Unicode).unwrap(); + } + texts.push(text); + lens.push(initial.chars().count()); + } + + doc.commit_then_renew(); + let mut frontiers = Vec::with_capacity(edit_count + 1); + frontiers.push(doc.oplog_frontiers()); + let mut rng = StdRng::seed_from_u64(0x7e57_c001); + + for edit in 0..edit_count { + let peer = edit % peer_count; + doc.set_peer_id(peer as PeerID + 2).unwrap(); + let text_idx = edit % text_container_count; + let pos = rng.gen_range(0..=lens[text_idx]); + texts[text_idx].insert(pos, "x", PosType::Unicode).unwrap(); + lens[text_idx] += 1; + doc.commit_then_renew(); + frontiers.push(doc.oplog_frontiers()); + } + + attach_subscription( + doc, + frontiers, + FixtureStats { + scenario: "many text containers with wide multi-peer checkout", + peer_count, + change_count: edit_count, + base_len: small_text_len, + text_container_count, + large_text_container_count, + large_text_len, + version_count: edit_count + 1, + subscribed, + }, + subscribed, + ) + } + fn build_base_snapshot(base_len: usize) -> (Vec, loro_internal::VersionVector) { let doc = LoroDoc::new_auto_commit(); doc.set_peer_id(1).unwrap(); @@ -543,6 +683,14 @@ mod text_checkout { out } + fn text_container_name(index: usize) -> String { + if index == 0 { + "text".to_string() + } else { + format!("text_{index}") + } + } + fn env_usize(name: &str, default: usize) -> usize { std::env::var(name) .ok() @@ -562,15 +710,16 @@ mod text_checkout { let samples = totals.samples as u32; let state_profile = state_profile.unwrap_or_default(); - let avg_future_scan_visited = if totals.richtext_insert_future_scan_count == 0 { - 0 - } else { - totals.richtext_insert_future_scan_visited / totals.richtext_insert_future_scan_count - }; + let avg_future_scan_visited = totals + .richtext_insert_future_scan_visited + .checked_div(totals.richtext_insert_future_scan_count) + .unwrap_or(0); eprintln!( concat!( "[text-checkout-profile] {name}: scenario={scenario}, peers={peers}, ", "changes={changes}, base_len={base_len}, versions={versions}, ", + "text_containers={text_containers}, large_text_containers={large_text_containers}, ", + "large_text_len={large_text_len}, ", "subscribed={subscribed}, samples={samples}, avg_total={avg_total:?}, ", "avg_frontier_prepare={avg_frontier_prepare:?}, ", "avg_frontiers_to_vv={avg_frontiers_to_vv:?}, avg_diff_calc={avg_diff_calc:?}, ", @@ -601,6 +750,9 @@ mod text_checkout { changes = stats.change_count, base_len = stats.base_len, versions = stats.version_count, + text_containers = stats.text_container_count, + large_text_containers = stats.large_text_container_count, + large_text_len = stats.large_text_len, subscribed = stats.subscribed, samples = totals.samples, avg_total = totals.total / samples, From 5c3cd62a8099468a178146c1568a4e5377a3f533 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 22 May 2026 09:57:21 +0000 Subject: [PATCH 36/46] refactor: route richtext checkout through spans --- .../src/container/richtext/tracker.rs | 121 +++++++++++------- crates/loro-internal/src/diff_calc.rs | 11 +- crates/loro-internal/src/version.rs | 14 +- 3 files changed, 79 insertions(+), 67 deletions(-) diff --git a/crates/loro-internal/src/container/richtext/tracker.rs b/crates/loro-internal/src/container/richtext/tracker.rs index 7554fafb6..eecf749be 100644 --- a/crates/loro-internal/src/container/richtext/tracker.rs +++ b/crates/loro-internal/src/container/richtext/tracker.rs @@ -27,7 +27,6 @@ pub(crate) use crdt_rope::CrdtRopeDelta; pub(crate) struct Tracker { applied_vv: VersionVector, current_vv: VersionVector, - current_frontier_hint: Option, rope: CrdtRope, id_to_cursor: IdToCursor, } @@ -46,7 +45,6 @@ impl Tracker { id_to_cursor: IdToCursor::default(), applied_vv: Default::default(), current_vv: Default::default(), - current_frontier_hint: None, }; let result = this.rope.tree.push(FugueSpan { @@ -72,7 +70,6 @@ impl Tracker { id_to_cursor: IdToCursor::default(), applied_vv: Default::default(), current_vv: Default::default(), - current_frontier_hint: None, } } @@ -176,7 +173,6 @@ impl Tracker { let end_id = op_id.inc(content.len() as Counter); self.current_vv.extend_to_include_end_id(end_id.id()); self.applied_vv.extend_to_include_end_id(end_id.id()); - self.current_frontier_hint = Some(ID::new(end_id.peer, end_id.counter - 1)); } fn update_insert_by_split(&mut self, split: &[LeafIndex]) { @@ -269,7 +265,6 @@ impl Tracker { let end_id = op_id.inc(len as Counter); self.current_vv.extend_to_include_end_id(end_id); self.applied_vv.extend_to_include_end_id(end_id); - self.current_frontier_hint = Some(ID::new(end_id.peer, end_id.counter - 1)); } fn skip_applied( @@ -365,7 +360,6 @@ impl Tracker { let end_id = op_id.inc(1); self.current_vv.extend_to_include_end_id(end_id.id()); self.applied_vv.extend_to_include_end_id(end_id.id()); - self.current_frontier_hint = Some(end_id.id().inc(-1)); } #[inline] @@ -378,68 +372,67 @@ impl Tracker { self._checkout_causal(vv, false); } + /// Checkout by applying directed peer spans. + /// + /// Forward spans use the normal `[start, end)` representation. Retreat spans + /// must use `CounterSpan`'s reversed representation for the same covered ids. + pub(crate) fn checkout_peer_spans(&mut self, spans: &[IdSpan]) { + self._checkout_peer_spans(spans, false); + } + fn _checkout(&mut self, vv: &VersionVector, on_diff_status: bool) { // tracing::info!("Checkout to {:?} from {:?}", vv, self.current_vv); - let current_vv = std::mem::take(&mut self.current_vv); - let retreat: SmallVec<[IdSpan; 4]> = current_vv.sub_iter(vv).collect(); - let forward: SmallVec<[IdSpan; 4]> = vv.sub_iter(¤t_vv).collect(); - self._checkout_spans(current_vv, retreat, forward, on_diff_status, None); + let mut spans: SmallVec<[IdSpan; 4]> = SmallVec::new(); + spans.extend(self.current_vv.sub_iter(vv).map(reversed_span)); + spans.extend(vv.sub_iter(&self.current_vv)); + if on_diff_status { + self._checkout_peer_spans(&spans, true); + } else { + self.checkout_peer_spans(&spans); + } } fn _checkout_causal(&mut self, vv: CausalVersion<'_>, on_diff_status: bool) { - let current_vv = std::mem::take(&mut self.current_vv); - let mut retreat: SmallVec<[IdSpan; 4]> = SmallVec::new(); - for (&peer, &counter) in current_vv.iter() { + let mut spans: SmallVec<[IdSpan; 4]> = SmallVec::new(); + for (&peer, &counter) in self.current_vv.iter() { let target_end = vv.end_for_peer(peer); if counter > target_end { - retreat.push(IdSpan::new(peer, target_end, counter)); + spans.push(reversed_span(IdSpan::new(peer, target_end, counter))); } } - let mut forward: SmallVec<[IdSpan; 4]> = SmallVec::new(); for (&peer, &base_end) in vv.base().iter() { let target_end = if peer == vv.peer() { base_end.max(vv.peer_end()) } else { base_end }; - let current_end = current_vv.get(&peer).copied().unwrap_or(0); + let current_end = self.current_vv.get(&peer).copied().unwrap_or(0); if target_end > current_end { - forward.push(IdSpan::new(peer, current_end, target_end)); + spans.push(IdSpan::new(peer, current_end, target_end)); } } if !vv.base().contains_key(&vv.peer()) { let target_end = vv.peer_end(); - let current_end = current_vv.get(&vv.peer()).copied().unwrap_or(0); + let current_end = self.current_vv.get(&vv.peer()).copied().unwrap_or(0); if target_end > current_end { - forward.push(IdSpan::new(vv.peer(), current_end, target_end)); + spans.push(IdSpan::new(vv.peer(), current_end, target_end)); } } - self._checkout_spans( - current_vv, - retreat, - forward, - on_diff_status, - vv.single_frontier(), - ); + self._checkout_peer_spans(&spans, on_diff_status); } - fn _checkout_spans( - &mut self, - mut current_vv: VersionVector, - retreat: SmallVec<[IdSpan; 4]>, - forward: SmallVec<[IdSpan; 4]>, - on_diff_status: bool, - frontier_hint: Option, - ) { + fn _checkout_peer_spans(&mut self, spans: &[IdSpan], on_diff_status: bool) { + debug_assert_no_mixed_peer_directions(spans); if on_diff_status { self.rope.clear_diff_status(); } + let mut current_vv = std::mem::take(&mut self.current_vv); let mut updates = Vec::new(); - for &span in &retreat { + for &span in spans.iter().filter(|span| span.is_reversed()) { for c in self.id_to_cursor.iter(span) { match c { id_to_cursor::IterCursor::Insert { leaf, id_span } => { @@ -526,19 +519,19 @@ impl Tracker { } } - for &span in &forward { + for &span in spans.iter().filter(|span| !span.is_reversed()) { self.forward(span, &mut updates); } if !on_diff_status { - for span in retreat { - current_vv.set_end(ID::new(span.peer, span.counter.start)); - } - for span in forward { - current_vv.set_end(ID::new(span.peer, span.counter.end)); + for &span in spans { + if span.is_reversed() { + current_vv.shrink_to_exclude(span); + } else { + current_vv.extend_to_include(span); + } } self.current_vv = current_vv; - self.current_frontier_hint = frontier_hint; } else { self.current_vv = current_vv; } @@ -740,6 +733,25 @@ impl Tracker { } } +fn reversed_span(mut span: IdSpan) -> IdSpan { + span.reverse(); + span +} + +#[cfg(debug_assertions)] +fn debug_assert_no_mixed_peer_directions(spans: &[IdSpan]) { + for (index, span) in spans.iter().enumerate() { + for other in &spans[index + 1..] { + if span.peer == other.peer { + debug_assert_eq!(span.is_reversed(), other.is_reversed()); + } + } + } +} + +#[cfg(not(debug_assertions))] +fn debug_assert_no_mixed_peer_directions(_spans: &[IdSpan]) {} + #[cfg(test)] mod test { use crate::{ @@ -767,20 +779,39 @@ mod test { } #[test] - fn checkout_causal_same_frontier_hint_retreats_other_peers() { + fn checkout_causal_single_frontier_retreats_other_peers() { let mut t = Tracker::new(); t.insert(IdFull::new(2, 0, 0), 0, RichtextChunk::new_text(0..2)); t.insert(IdFull::new(1, 0, 0), 2, RichtextChunk::new_text(2..4)); assert_eq!(t.rope.len(), 4); - assert_eq!(t.current_frontier_hint, Some(ID::new(1, 1))); let base = ImVersionVector::new(); - t.checkout_causal(CausalVersion::new(&base, 1, 2, Some(ID::new(1, 1)))); + t.checkout_causal(CausalVersion::new(&base, 1, 2)); assert_eq!(t.rope.len(), 2); assert_eq!(t.current_vv, vv!(1 => 2)); } + #[test] + fn checkout_peer_spans_uses_reversed_span_boundaries() { + let mut t = Tracker::new(); + t.insert(IdFull::new(1, 0, 0), 0, RichtextChunk::new_text(0..4)); + t.insert(IdFull::new(2, 0, 4), 4, RichtextChunk::new_text(4..6)); + assert_eq!(t.rope.len(), 6); + assert_eq!(t.current_vv, vv!(1 => 4, 2 => 2)); + + let retreat_peer_2 = reversed_span(IdSpan::new(2, 0, 2)); + t.checkout_peer_spans(&[retreat_peer_2]); + + assert_eq!(t.rope.len(), 4); + assert_eq!(t.current_vv, vv!(1 => 4)); + + t.checkout_peer_spans(&[IdSpan::new(2, 0, 2)]); + + assert_eq!(t.rope.len(), 6); + assert_eq!(t.current_vv, vv!(1 => 4, 2 => 2)); + } + #[test] fn test_retreat_and_forward_delete() { let mut t = Tracker::new(); diff --git a/crates/loro-internal/src/diff_calc.rs b/crates/loro-internal/src/diff_calc.rs index 844046ceb..057ce1007 100644 --- a/crates/loro-internal/src/diff_calc.rs +++ b/crates/loro-internal/src/diff_calc.rs @@ -257,7 +257,7 @@ impl DiffCalculator { let affected_set = { loro_common::debug!("LCA: {:?} mode={:?}", &lca, diff_mode); let mut started_set = FxHashSet::default(); - for (change, (start_counter, end_counter), base_vv, base_frontiers) in iter { + for (change, (start_counter, end_counter), base_vv, _base_frontiers) in iter { let iter_start = change .ops .binary_search_by(|op| op.ctr_last().cmp(&start_counter)) @@ -290,14 +290,7 @@ impl DiffCalculator { op = stack_sliced_op.as_ref().unwrap(); } - let base_peer_end = base_vv.get(&change.peer()).copied().unwrap_or(0); - let single_frontier = if op.counter > base_peer_end { - Some(ID::new(change.peer(), op.counter - 1)) - } else { - base_frontiers.as_single() - }; - let causal_vv = - CausalVersion::new(&base_vv, change.peer(), op.counter, single_frontier); + let causal_vv = CausalVersion::new(&base_vv, change.peer(), op.counter); let container = op.container; let depth = oplog.arena.get_depth(container); let (old_depth, calculator) = self.get_or_create_calc(container, depth); diff --git a/crates/loro-internal/src/version.rs b/crates/loro-internal/src/version.rs index 6b92fbfa4..7d4f4b6e7 100644 --- a/crates/loro-internal/src/version.rs +++ b/crates/loro-internal/src/version.rs @@ -170,22 +170,15 @@ pub(crate) struct CausalVersion<'a> { base: &'a ImVersionVector, peer: PeerID, peer_end: Counter, - single_frontier: Option, } impl<'a> CausalVersion<'a> { #[inline] - pub(crate) fn new( - base: &'a ImVersionVector, - peer: PeerID, - peer_end: Counter, - single_frontier: Option, - ) -> Self { + pub(crate) fn new(base: &'a ImVersionVector, peer: PeerID, peer_end: Counter) -> Self { Self { base, peer, peer_end, - single_frontier, } } @@ -204,11 +197,6 @@ impl<'a> CausalVersion<'a> { self.peer_end } - #[inline] - pub(crate) fn single_frontier(&self) -> Option { - self.single_frontier - } - #[inline] pub(crate) fn end_for_peer(&self, peer: PeerID) -> Counter { let base_end = self.base.get(&peer).copied().unwrap_or(0); From 91e5ceb61e84830cfe1ecee08818684162779445 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 22 May 2026 10:12:35 +0000 Subject: [PATCH 37/46] perf: filter richtext checkout spans by coverage --- crates/loro-internal/benches/text_checkout.rs | 20 ++ .../loro-internal/src/container/richtext.rs | 2 +- .../src/container/richtext/tracker.rs | 145 ++++++++++++- .../richtext/tracker/id_to_cursor.rs | 24 +++ crates/loro-internal/src/diff_calc.rs | 190 ++++++++++++++++-- crates/loro-internal/src/loro.rs | 13 ++ 6 files changed, 363 insertions(+), 31 deletions(-) diff --git a/crates/loro-internal/benches/text_checkout.rs b/crates/loro-internal/benches/text_checkout.rs index da5d3da53..6febe7217 100644 --- a/crates/loro-internal/benches/text_checkout.rs +++ b/crates/loro-internal/benches/text_checkout.rs @@ -61,6 +61,11 @@ mod text_checkout { richtext_insert_future_scan_visited: u64, richtext_insert_future_scan_max_visited: usize, causal_vv_materialize_count: u64, + richtext_tracker_span_count: u64, + richtext_tracker_filtered_span_count: u64, + richtext_tracker_skipped_span_count: u64, + richtext_id_to_cursor_iter_count: u64, + richtext_id_to_cursor_empty_iter_count: u64, recording_event_samples: u64, forward_diff_calculator_samples: u64, } @@ -100,6 +105,13 @@ mod text_checkout { .richtext_insert_future_scan_max_visited .max(profile.richtext_insert_future_scan_max_visited); self.causal_vv_materialize_count += profile.causal_vv_materialize_count; + self.richtext_tracker_span_count += profile.richtext_tracker_span_count; + self.richtext_tracker_filtered_span_count += + profile.richtext_tracker_filtered_span_count; + self.richtext_tracker_skipped_span_count += profile.richtext_tracker_skipped_span_count; + self.richtext_id_to_cursor_iter_count += profile.richtext_id_to_cursor_iter_count; + self.richtext_id_to_cursor_empty_iter_count += + profile.richtext_id_to_cursor_empty_iter_count; if profile.recording_events { self.recording_event_samples += 1; } @@ -737,6 +749,9 @@ mod text_checkout { "richtext_insert_future_scan_calls={richtext_insert_future_scan_calls}, ", "avg_future_scan_visited={avg_future_scan_visited}, ", "max_future_scan_visited={max_future_scan_visited}, ", + "tracker_spans={tracker_spans}, filtered_tracker_spans={filtered_tracker_spans}, ", + "skipped_tracker_spans={skipped_tracker_spans}, id_to_cursor_iters={id_to_cursor_iters}, ", + "empty_id_to_cursor_iters={empty_id_to_cursor_iters}, ", "max_frontiers_width={max_frontiers_width}, max_vv_width={max_vv_width}, ", "max_diff_containers={max_diff_containers}, recording_event_samples={recording_event_samples}, ", "forward_diff_calculator_samples={forward_diff_calculator_samples}, ", @@ -774,6 +789,11 @@ mod text_checkout { richtext_insert_future_scan_calls = totals.richtext_insert_future_scan_count, avg_future_scan_visited = avg_future_scan_visited, max_future_scan_visited = totals.richtext_insert_future_scan_max_visited, + tracker_spans = totals.richtext_tracker_span_count, + filtered_tracker_spans = totals.richtext_tracker_filtered_span_count, + skipped_tracker_spans = totals.richtext_tracker_skipped_span_count, + id_to_cursor_iters = totals.richtext_id_to_cursor_iter_count, + empty_id_to_cursor_iters = totals.richtext_id_to_cursor_empty_iter_count, max_frontiers_width = totals.max_frontiers_width, max_vv_width = totals.max_vv_width, max_diff_containers = totals.max_diff_container_count, diff --git a/crates/loro-internal/src/container/richtext.rs b/crates/loro-internal/src/container/richtext.rs index ffe4aa339..7682b1e47 100644 --- a/crates/loro-internal/src/container/richtext.rs +++ b/crates/loro-internal/src/container/richtext.rs @@ -28,7 +28,7 @@ use std::fmt::Debug; pub(crate) use fugue_span::{RichtextChunk, RichtextChunkValue}; pub(crate) use richtext_state::RichtextState; pub(crate) use style_range_map::Styles; -pub(crate) use tracker::{CrdtRopeDelta, Tracker as RichtextTracker}; +pub(crate) use tracker::{CrdtRopeDelta, PeerSpanCoverage, Tracker as RichtextTracker}; /// This is the data structure that represents a span of rich text. /// It's used to communicate with the frontend. diff --git a/crates/loro-internal/src/container/richtext/tracker.rs b/crates/loro-internal/src/container/richtext/tracker.rs index eecf749be..4357d65e5 100644 --- a/crates/loro-internal/src/container/richtext/tracker.rs +++ b/crates/loro-internal/src/container/richtext/tracker.rs @@ -4,8 +4,9 @@ use generic_btree::{ rle::{HasLength as _, Sliceable}, LeafIndex, }; -use loro_common::{Counter, HasId, HasIdSpan, IdFull, IdSpan, Lamport, PeerID, ID}; +use loro_common::{Counter, CounterSpan, HasId, HasIdSpan, IdFull, IdSpan, Lamport, PeerID, ID}; use rle::HasLength as _; +use rustc_hash::FxHashMap; use smallvec::SmallVec; use tracing::instrument; @@ -23,6 +24,8 @@ mod crdt_rope; mod id_to_cursor; pub(crate) use crdt_rope::CrdtRopeDelta; +pub(crate) type PeerSpanCoverage = FxHashMap; + #[derive(Debug)] pub(crate) struct Tracker { applied_vv: VersionVector, @@ -377,22 +380,58 @@ impl Tracker { /// Forward spans use the normal `[start, end)` representation. Retreat spans /// must use `CounterSpan`'s reversed representation for the same covered ids. pub(crate) fn checkout_peer_spans(&mut self, spans: &[IdSpan]) { - self._checkout_peer_spans(spans, false); + self._checkout_peer_spans(spans, false, None); + } + + pub(crate) fn checkout_with_coverage( + &mut self, + vv: &VersionVector, + coverage: &PeerSpanCoverage, + ) { + let spans = self.checkout_spans_to_version(vv); + self.checkout_peer_spans_with_coverage(&spans, coverage); + } + + pub(crate) fn checkout_peer_spans_with_coverage( + &mut self, + spans: &[IdSpan], + coverage: &PeerSpanCoverage, + ) { + self._checkout_peer_spans(spans, false, Some(coverage)); + } + + pub(crate) fn checkout_causal_with_coverage( + &mut self, + vv: CausalVersion<'_>, + coverage: &PeerSpanCoverage, + ) { + let spans = self.checkout_spans_to_causal(vv); + self.checkout_peer_spans_with_coverage(&spans, coverage); } fn _checkout(&mut self, vv: &VersionVector, on_diff_status: bool) { // tracing::info!("Checkout to {:?} from {:?}", vv, self.current_vv); - let mut spans: SmallVec<[IdSpan; 4]> = SmallVec::new(); - spans.extend(self.current_vv.sub_iter(vv).map(reversed_span)); - spans.extend(vv.sub_iter(&self.current_vv)); + let spans = self.checkout_spans_to_version(vv); if on_diff_status { - self._checkout_peer_spans(&spans, true); + self._checkout_peer_spans(&spans, true, None); } else { self.checkout_peer_spans(&spans); } } fn _checkout_causal(&mut self, vv: CausalVersion<'_>, on_diff_status: bool) { + let spans = self.checkout_spans_to_causal(vv); + self._checkout_peer_spans(&spans, on_diff_status, None); + } + + fn checkout_spans_to_version(&self, vv: &VersionVector) -> SmallVec<[IdSpan; 4]> { + let mut spans: SmallVec<[IdSpan; 4]> = SmallVec::new(); + spans.extend(self.current_vv.sub_iter(vv).map(reversed_span)); + spans.extend(vv.sub_iter(&self.current_vv)); + spans + } + + fn checkout_spans_to_causal(&self, vv: CausalVersion<'_>) -> SmallVec<[IdSpan; 4]> { let mut spans: SmallVec<[IdSpan; 4]> = SmallVec::new(); for (&peer, &counter) in self.current_vv.iter() { let target_end = vv.end_for_peer(peer); @@ -421,18 +460,29 @@ impl Tracker { } } - self._checkout_peer_spans(&spans, on_diff_status); + spans } - fn _checkout_peer_spans(&mut self, spans: &[IdSpan], on_diff_status: bool) { + fn _checkout_peer_spans( + &mut self, + spans: &[IdSpan], + on_diff_status: bool, + coverage: Option<&PeerSpanCoverage>, + ) { debug_assert_no_mixed_peer_directions(spans); if on_diff_status { self.rope.clear_diff_status(); } + let filtered_spans = filter_spans_by_coverage(spans, coverage); + #[cfg(feature = "test_utils")] + crate::diff_calc::profiling::record_richtext_tracker_span_filter( + spans.len(), + filtered_spans.len(), + ); let mut current_vv = std::mem::take(&mut self.current_vv); let mut updates = Vec::new(); - for &span in spans.iter().filter(|span| span.is_reversed()) { + for &span in filtered_spans.iter().filter(|span| span.is_reversed()) { for c in self.id_to_cursor.iter(span) { match c { id_to_cursor::IterCursor::Insert { leaf, id_span } => { @@ -519,7 +569,7 @@ impl Tracker { } } - for &span in spans.iter().filter(|span| !span.is_reversed()) { + for &span in filtered_spans.iter().filter(|span| !span.is_reversed()) { self.forward(span, &mut updates); } @@ -731,6 +781,20 @@ impl Tracker { self.rope.get_diff() } + + pub(crate) fn diff_with_coverage( + &mut self, + from: &VersionVector, + to: &VersionVector, + coverage: &PeerSpanCoverage, + ) -> impl Iterator + '_ { + let spans = self.checkout_spans_to_version(from); + self._checkout_peer_spans(&spans, false, Some(coverage)); + let spans = self.checkout_spans_to_version(to); + self._checkout_peer_spans(&spans, true, Some(coverage)); + + self.rope.get_diff() + } } fn reversed_span(mut span: IdSpan) -> IdSpan { @@ -738,6 +802,34 @@ fn reversed_span(mut span: IdSpan) -> IdSpan { span } +fn filter_spans_by_coverage( + spans: &[IdSpan], + coverage: Option<&PeerSpanCoverage>, +) -> SmallVec<[IdSpan; 4]> { + match coverage { + Some(coverage) => spans + .iter() + .filter_map(|span| intersect_span_with_coverage(*span, coverage)) + .collect(), + None => spans.iter().copied().collect(), + } +} + +fn intersect_span_with_coverage(span: IdSpan, coverage: &PeerSpanCoverage) -> Option { + let coverage = coverage.get(&span.peer)?; + let start = span.counter.min().max(coverage.min()); + let end = span.counter.norm_end().min(coverage.norm_end()); + if start >= end { + return None; + } + + let mut ans = IdSpan::new(span.peer, start, end); + if span.is_reversed() { + ans.reverse(); + } + Some(ans) +} + #[cfg(debug_assertions)] fn debug_assert_no_mixed_peer_directions(spans: &[IdSpan]) { for (index, span) in spans.iter().enumerate() { @@ -812,6 +904,39 @@ mod test { assert_eq!(t.current_vv, vv!(1 => 4, 2 => 2)); } + #[test] + fn span_coverage_intersection_preserves_direction() { + let mut coverage = PeerSpanCoverage::default(); + coverage.insert(1, CounterSpan::new(3, 6)); + + assert_eq!( + intersect_span_with_coverage(IdSpan::new(1, 0, 10), &coverage), + Some(IdSpan::new(1, 3, 6)) + ); + + let reversed = reversed_span(IdSpan::new(1, 0, 10)); + let expected = reversed_span(IdSpan::new(1, 3, 6)); + assert_eq!( + intersect_span_with_coverage(reversed, &coverage), + Some(expected) + ); + } + + #[test] + fn coverage_filtered_checkout_still_updates_current_vv() { + let mut t = Tracker::new(); + t.insert(IdFull::new(1, 0, 0), 0, RichtextChunk::new_text(0..4)); + t.current_vv.set_end(ID::new(2, 5)); + assert_eq!(t.current_vv, vv!(1 => 4, 2 => 5)); + + let mut coverage = PeerSpanCoverage::default(); + coverage.insert(1, CounterSpan::new(0, 4)); + t.checkout_peer_spans_with_coverage(&[reversed_span(IdSpan::new(2, 0, 5))], &coverage); + + assert_eq!(t.rope.len(), 4); + assert_eq!(t.current_vv, vv!(1 => 4)); + } + #[test] fn test_retreat_and_forward_delete() { let mut t = Tracker::new(); diff --git a/crates/loro-internal/src/container/richtext/tracker/id_to_cursor.rs b/crates/loro-internal/src/container/richtext/tracker/id_to_cursor.rs index d716069a3..7d7d16d1f 100644 --- a/crates/loro-internal/src/container/richtext/tracker/id_to_cursor.rs +++ b/crates/loro-internal/src/container/richtext/tracker/id_to_cursor.rs @@ -200,11 +200,15 @@ impl IdToCursor { } pub fn iter(&self, mut iter_id_span: IdSpan) -> impl Iterator + '_ { + #[cfg(feature = "test_utils")] + crate::diff_calc::profiling::record_richtext_id_to_cursor_iter_call(); iter_id_span.normalize_(); let list = self.map.get(&iter_id_span.peer).unwrap_or(&EMPTY_VEC); // Index in the list let mut index = 0; let mut insert_set_iter: Option>> = None; + #[cfg(feature = "test_utils")] + let mut yielded = false; if !list.is_empty() { index = match list.binary_search_by_key(&iter_id_span.counter.start, |x| x.counter) { @@ -215,6 +219,10 @@ impl IdToCursor { std::iter::from_fn(move || loop { if index >= list.len() { + #[cfg(feature = "test_utils")] + if !yielded { + crate::diff_calc::profiling::record_richtext_id_to_cursor_empty_iter(); + } return None; } @@ -227,12 +235,20 @@ impl IdToCursor { continue; }; + #[cfg(feature = "test_utils")] + { + yielded = true; + } return Some(next); } let f = &list[index]; let iter_counter = f.counter; if iter_counter >= iter_id_span.counter.end { + #[cfg(feature = "test_utils")] + if !yielded { + crate::diff_calc::profiling::record_richtext_id_to_cursor_empty_iter(); + } return None; } @@ -262,11 +278,19 @@ impl IdToCursor { continue; } + #[cfg(feature = "test_utils")] + { + yielded = true; + } return Some(IterCursor::Delete(span.slice(from as usize, to as usize))); } Cursor::Move { from, to } => { index += 1; let op_id = ID::new(iter_id_span.peer, f.counter); + #[cfg(feature = "test_utils")] + { + yielded = true; + } return Some(IterCursor::Move { from_id: *from, to_leaf: *to, diff --git a/crates/loro-internal/src/diff_calc.rs b/crates/loro-internal/src/diff_calc.rs index 057ce1007..da02aaf05 100644 --- a/crates/loro-internal/src/diff_calc.rs +++ b/crates/loro-internal/src/diff_calc.rs @@ -12,7 +12,8 @@ use itertools::Itertools; use enum_dispatch::enum_dispatch; use loro_common::{ - CompactIdLp, ContainerID, Counter, HasCounterSpan, IdFull, IdLp, IdSpan, LoroValue, PeerID, ID, + CompactIdLp, ContainerID, Counter, CounterSpan, HasCounterSpan, IdFull, IdLp, IdSpan, + LoroValue, PeerID, ID, }; use loro_delta::DeltaRope; use rustc_hash::{FxHashMap, FxHashSet}; @@ -26,7 +27,8 @@ use crate::{ list::list_op::InnerListOp, richtext::{ richtext_state::{RichtextStateChunk, TextChunk}, - AnchorType, CrdtRopeDelta, RichtextChunk, RichtextChunkValue, RichtextTracker, StyleOp, + AnchorType, CrdtRopeDelta, PeerSpanCoverage, RichtextChunk, RichtextChunkValue, + RichtextTracker, StyleOp, }, }, cursor::AbsolutePosition, @@ -65,6 +67,11 @@ pub(crate) mod profiling { pub richtext_insert_future_scan_max_visited: usize, pub causal_vv_materialize_count: u64, pub max_causal_vv_width: usize, + pub richtext_tracker_span_count: u64, + pub richtext_tracker_filtered_span_count: u64, + pub richtext_tracker_skipped_span_count: u64, + pub richtext_id_to_cursor_iter_count: u64, + pub richtext_id_to_cursor_empty_iter_count: u64, } thread_local! { @@ -129,6 +136,33 @@ pub(crate) mod profiling { } }); } + + pub(crate) fn record_richtext_tracker_span_filter(input: usize, filtered: usize) { + PROFILE.with(|profile| { + if let Some(profile) = profile.borrow_mut().as_mut() { + profile.richtext_tracker_span_count += input as u64; + profile.richtext_tracker_filtered_span_count += filtered as u64; + profile.richtext_tracker_skipped_span_count += + input.saturating_sub(filtered) as u64; + } + }); + } + + pub(crate) fn record_richtext_id_to_cursor_iter_call() { + PROFILE.with(|profile| { + if let Some(profile) = profile.borrow_mut().as_mut() { + profile.richtext_id_to_cursor_iter_count += 1; + } + }); + } + + pub(crate) fn record_richtext_id_to_cursor_empty_iter() { + PROFILE.with(|profile| { + if let Some(profile) = profile.borrow_mut().as_mut() { + profile.richtext_id_to_cursor_empty_iter_count += 1; + } + }); + } } /// Calculate the diff between two versions. given [OpLog][super::oplog::OpLog] @@ -635,6 +669,7 @@ use rle::{HasLength as _, Sliceable}; pub(crate) struct ListDiffCalculator { start_vv: VersionVector, tracker: Box, + coverage: PeerSpanCoverage, } impl ListDiffCalculator { @@ -664,9 +699,10 @@ impl DiffCalculatorTrait for ListDiffCalculator { if !vv.includes_vv(&self.start_vv) || !self.tracker.all_vv().includes_vv(vv) { *self.tracker = RichtextTracker::new_with_unknown(); self.start_vv = vv.clone(); + self.coverage.clear(); } - self.tracker.checkout(vv); + richtext_tracker_checkout_with_coverage(&mut self.tracker, vv, &self.coverage); } fn apply_change( @@ -676,7 +712,7 @@ impl DiffCalculatorTrait for ListDiffCalculator { vv: Option>, ) { if let Some(vv) = vv { - self.tracker.checkout_causal(vv); + richtext_tracker_checkout_causal_with_coverage(&mut self.tracker, vv, &self.coverage); } match &op.op().content { @@ -701,6 +737,8 @@ impl DiffCalculatorTrait for ListDiffCalculator { }, _ => unreachable!(), } + + record_op_coverage(&mut self.coverage, &op); } fn finish_this_round(&mut self) {} @@ -713,7 +751,15 @@ impl DiffCalculatorTrait for ListDiffCalculator { mut on_new_container: impl FnMut(&ContainerID), ) -> (InternalDiff, DiffMode) { let mut delta = Delta::new(); - for item in self.tracker.diff(info.from_vv, info.to_vv) { + let diff_iter = if self.coverage.is_empty() { + Either::Left(self.tracker.diff(info.from_vv, info.to_vv)) + } else { + Either::Right( + self.tracker + .diff_with_coverage(info.from_vv, info.to_vv, &self.coverage), + ) + }; + for item in diff_iter { match item { CrdtRopeDelta::Retain(len) => { delta = delta.retain(len); @@ -844,6 +890,7 @@ enum RichtextCalcMode { /// (op, end_pos) styles: Vec<(StyleOp, usize)>, start_vv: VersionVector, + coverage: PeerSpanCoverage, }, Linear { diff: DeltaRope, @@ -859,6 +906,7 @@ impl RichtextDiffCalculator { tracker: Box::new(RichtextTracker::new_with_unknown()), styles: Vec::new(), start_vv: VersionVector::new(), + coverage: PeerSpanCoverage::default(), }), } } @@ -877,27 +925,105 @@ impl RichtextDiffCalculator { } #[cfg(feature = "test_utils")] -fn richtext_tracker_checkout(tracker: &mut RichtextTracker, vv: &VersionVector) { +fn richtext_tracker_checkout_with_coverage( + tracker: &mut RichtextTracker, + vv: &VersionVector, + coverage: &PeerSpanCoverage, +) { let start = std::time::Instant::now(); - tracker.checkout(vv); + if coverage.is_empty() { + tracker.checkout(vv); + } else { + tracker.checkout_with_coverage(vv, coverage); + } profiling::record_richtext_tracker_checkout(start.elapsed()); } #[cfg(feature = "test_utils")] -fn richtext_tracker_checkout_causal(tracker: &mut RichtextTracker, vv: CausalVersion<'_>) { +fn richtext_tracker_checkout_causal_with_coverage( + tracker: &mut RichtextTracker, + vv: CausalVersion<'_>, + coverage: &PeerSpanCoverage, +) { let start = std::time::Instant::now(); - tracker.checkout_causal(vv); + if coverage.is_empty() { + tracker.checkout_causal(vv); + } else { + tracker.checkout_causal_with_coverage(vv, coverage); + } profiling::record_richtext_tracker_checkout(start.elapsed()); } #[cfg(not(feature = "test_utils"))] -fn richtext_tracker_checkout(tracker: &mut RichtextTracker, vv: &VersionVector) { - tracker.checkout(vv); +fn richtext_tracker_checkout_with_coverage( + tracker: &mut RichtextTracker, + vv: &VersionVector, + coverage: &PeerSpanCoverage, +) { + if coverage.is_empty() { + tracker.checkout(vv); + } else { + tracker.checkout_with_coverage(vv, coverage); + } } #[cfg(not(feature = "test_utils"))] -fn richtext_tracker_checkout_causal(tracker: &mut RichtextTracker, vv: CausalVersion<'_>) { - tracker.checkout_causal(vv); +fn richtext_tracker_checkout_causal_with_coverage( + tracker: &mut RichtextTracker, + vv: CausalVersion<'_>, + coverage: &PeerSpanCoverage, +) { + if coverage.is_empty() { + tracker.checkout_causal(vv); + } else { + tracker.checkout_causal_with_coverage(vv, coverage); + } +} + +fn seed_coverage_from_state_chunks(coverage: &mut PeerSpanCoverage, chunks: &[RichtextStateChunk]) { + coverage.clear(); + for chunk in chunks { + let RichtextStateChunk::Text(text) = chunk else { + continue; + }; + let id = text.id(); + record_coverage_span( + coverage, + IdSpan::new( + id.peer, + id.counter, + id.counter + text.unicode_len() as Counter, + ), + ); + } +} + +fn record_op_coverage(coverage: &mut PeerSpanCoverage, op: &crate::op::RichOp<'_>) { + record_coverage_span( + coverage, + IdSpan::new( + op.peer, + op.counter(), + op.counter() + op.atom_len() as Counter, + ), + ); +} + +fn record_coverage_span(coverage: &mut PeerSpanCoverage, span: IdSpan) { + if span.peer == PeerID::MAX || span.atom_len() == 0 { + return; + } + + let start = span.counter.min(); + let end = span.counter.norm_end(); + coverage + .entry(span.peer) + .and_modify(|coverage| { + let start = coverage.min().min(start); + let end = coverage.norm_end().max(end); + *coverage = CounterSpan::new(start, end); + }) + .or_insert_with(|| CounterSpan::new(start, end)); } impl DiffCalculatorTrait for RichtextDiffCalculator { @@ -926,6 +1052,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { tracker, styles, start_vv, + coverage, } => { let shallow_root_vv = oplog.dag().frontiers_to_vv(oplog.shallow_since_frontiers()); if shallow_root_vv.as_ref() == Some(vv) { @@ -939,7 +1066,8 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { **tracker = seeded_tracker; *styles = seeded_styles; *start_vv = vv.clone(); - richtext_tracker_checkout(tracker, vv); + seed_coverage_from_state_chunks(coverage, &chunks); + richtext_tracker_checkout_with_coverage(tracker, vv, coverage); return; } } @@ -949,9 +1077,10 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { **tracker = RichtextTracker::new_with_unknown(); styles.clear(); *start_vv = vv.clone(); + coverage.clear(); } - richtext_tracker_checkout(tracker, vv); + richtext_tracker_checkout_with_coverage(tracker, vv, coverage); } RichtextCalcMode::Linear { .. } => {} } @@ -1062,9 +1191,10 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { tracker, styles, start_vv: _, + coverage, } => { if let Some(vv) = vv { - richtext_tracker_checkout_causal(tracker, vv); + richtext_tracker_checkout_causal_with_coverage(tracker, vv, coverage); } match &op.raw_op().content { crate::op::InnerContent::List(l) => match l { @@ -1181,6 +1311,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { }, _ => unreachable!(), } + record_op_coverage(coverage, &op); } } } @@ -1198,12 +1329,19 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { DiffMode::Linear, ), RichtextCalcMode::Crdt { - tracker, styles, .. + tracker, + styles, + coverage, + .. } => { let mut delta = DeltaRope::new(); #[cfg(feature = "test_utils")] let tracker_diff_start = std::time::Instant::now(); - let diff_iter = tracker.diff(info.from_vv, info.to_vv); + let diff_iter = if coverage.is_empty() { + Either::Left(tracker.diff(info.from_vv, info.to_vv)) + } else { + Either::Right(tracker.diff_with_coverage(info.from_vv, info.to_vv, coverage)) + }; #[cfg(feature = "test_utils")] profiling::record_richtext_tracker_diff(tracker_diff_start.elapsed()); #[cfg(feature = "test_utils")] @@ -1337,9 +1475,10 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { if !vv.includes_vv(&self.list.start_vv) || !self.list.tracker.all_vv().includes_vv(vv) { *self.list.tracker = RichtextTracker::new_with_unknown(); self.list.start_vv = vv.clone(); + self.list.coverage.clear(); } - self.list.tracker.checkout(vv); + richtext_tracker_checkout_with_coverage(&mut self.list.tracker, vv, &self.list.coverage); self.inner.current_mode = mode; } @@ -1433,13 +1572,19 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { // Apply change on the list items let this = &mut self.list; if let Some(vv) = vv { - this.tracker.checkout_causal(vv); + richtext_tracker_checkout_causal_with_coverage( + &mut this.tracker, + vv, + &this.coverage, + ); } let real_op = op.op(); + let mut updates_tracker = false; match &real_op.content { crate::op::InnerContent::List(l) => match l { InnerListOp::Insert { slice, pos } => { + updates_tracker = true; this.tracker.insert( op.id_full(), *pos, @@ -1447,6 +1592,7 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { ); } InnerListOp::Delete(del) => { + updates_tracker = true; this.tracker.delete( op.id_start(), del.id_start, @@ -1456,6 +1602,7 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { ); } InnerListOp::Move { from, elem_id, to } => { + updates_tracker = true; self.inner.move_id_to_elem_id.insert(op.id(), *elem_id); if !this.tracker.current_vv().includes_id(op.id()) { let last_pos = if is_checkout { @@ -1503,6 +1650,9 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { }, _ => unreachable!(), } + if updates_tracker { + record_op_coverage(&mut this.coverage, &op); + } }; } diff --git a/crates/loro-internal/src/loro.rs b/crates/loro-internal/src/loro.rs index 3c65f82dd..a6705b8d1 100644 --- a/crates/loro-internal/src/loro.rs +++ b/crates/loro-internal/src/loro.rs @@ -88,6 +88,11 @@ pub struct CheckoutProfile { pub richtext_insert_future_scan_max_visited: usize, pub causal_vv_materialize_count: u64, pub max_causal_vv_width: usize, + pub richtext_tracker_span_count: u64, + pub richtext_tracker_filtered_span_count: u64, + pub richtext_tracker_skipped_span_count: u64, + pub richtext_id_to_cursor_iter_count: u64, + pub richtext_id_to_cursor_empty_iter_count: u64, pub recording_events: bool, pub forward_diff_calculator: bool, } @@ -1881,6 +1886,14 @@ impl LoroDoc { diff_profile.richtext_insert_future_scan_max_visited; profile.causal_vv_materialize_count = diff_profile.causal_vv_materialize_count; profile.max_causal_vv_width = diff_profile.max_causal_vv_width; + profile.richtext_tracker_span_count = diff_profile.richtext_tracker_span_count; + profile.richtext_tracker_filtered_span_count = + diff_profile.richtext_tracker_filtered_span_count; + profile.richtext_tracker_skipped_span_count = + diff_profile.richtext_tracker_skipped_span_count; + profile.richtext_id_to_cursor_iter_count = diff_profile.richtext_id_to_cursor_iter_count; + profile.richtext_id_to_cursor_empty_iter_count = + diff_profile.richtext_id_to_cursor_empty_iter_count; profile.diff_container_count = diff.len(); let apply_start = std::time::Instant::now(); From b8ee18f689c9f39632eccd35ed8247686ba68d85 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 22 May 2026 10:15:44 +0000 Subject: [PATCH 38/46] docs: record fast diff calc benchmark results --- plans/20260522-fast-diff-calc.md | 33 ++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/plans/20260522-fast-diff-calc.md b/plans/20260522-fast-diff-calc.md index 3012f1b76..8d8b8c224 100644 --- a/plans/20260522-fast-diff-calc.md +++ b/plans/20260522-fast-diff-calc.md @@ -8,6 +8,39 @@ Improve checkout diff calculation for documents with many peers and many text/li The target direction is to move tracker checkout APIs away from "checkout to target `VersionVector`" and toward "apply these directed counter spans". The diff calculator should compute or route the relevant spans once, then pass only container-relevant spans into each tracker. +## Implementation Status + +Implemented on branch `feat/scale-text-checkout-perf`: + +- `c350b0e8 bench: add many text checkout scenario` +- `5c3cd62a refactor: route richtext checkout through spans` +- `91e5ceb6 perf: filter richtext checkout spans by coverage` + +Current implementation covers: + +- Phase 0 profiling counters for tracker spans, filtered spans, skipped spans, `IdToCursor::iter` calls, and empty `IdToCursor::iter` calls. +- Phase 1 directed richtext tracker span checkout API, with existing `checkout`, `checkout_causal`, and `diff` APIs kept as adapters. +- Removal of the tracker-only `current_frontier_hint`. +- Phase 2 per-container coverage filtering for text/list/movable-list richtext trackers, with conservative fallback when coverage is unavailable. +- Phase 3 filtered final diff materialization through coverage-aware tracker diff. + +Benchmark notes for `multi-container/latest-to-base` with the default 1000 peers, 10000 changes, 10000 text containers, 8 large text containers, and `LORO_TEXT_CHECKOUT_PROFILE=1`: + +| Version | Time | Avg total | Avg diff calc | Avg tracker checkout | Avg tracker diff | +| --- | ---: | ---: | ---: | ---: | ---: | +| `c350b0e8` baseline | 905.53-908.52 ms | 916.002244 ms | 900.339939 ms | 412.268949 ms | 424.615097 ms | +| `91e5ceb6` current | 811.54-879.43 ms | 842.044498 ms | 825.496539 ms | 476.979320 ms | 278.143902 ms | + +Current profiling counters for the same run: + +- `tracker_spans=377117000` +- `filtered_tracker_spans=123753500` +- `skipped_tracker_spans=253363500` +- `id_to_cursor_iters=123753500` +- `empty_id_to_cursor_iters=123623500` + +This shows the routing is skipping about two thirds of tracker span checks in the target benchmark. The remaining empty iterator count is still high because the first implementation stores one broad coverage span per `(container, peer)`, which intentionally allows false positives. Phase 4/5 should only be considered if this remaining cost shows up in production profiles. + ## Current Architecture The current checkout diff flow is: From 57bfd6758d4c9e71b54428ee47381a2a6a4118ec Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 22 May 2026 10:20:25 +0000 Subject: [PATCH 39/46] bench: report checkout span averages --- crates/loro-internal/benches/text_checkout.rs | 38 ++++++++++++++++++- crates/loro-internal/src/diff_calc.rs | 8 ++++ crates/loro-internal/src/loro.rs | 8 ++++ plans/20260522-fast-diff-calc.md | 11 +++++- 4 files changed, 62 insertions(+), 3 deletions(-) diff --git a/crates/loro-internal/benches/text_checkout.rs b/crates/loro-internal/benches/text_checkout.rs index 6febe7217..10e97d6b4 100644 --- a/crates/loro-internal/benches/text_checkout.rs +++ b/crates/loro-internal/benches/text_checkout.rs @@ -54,6 +54,7 @@ mod text_checkout { max_vv_width: usize, max_causal_vv_width: usize, max_diff_container_count: usize, + diff_container_count_sum: u64, richtext_tracker_checkout_count: u64, richtext_tracker_diff_count: u64, richtext_delta_build_count: u64, @@ -61,9 +62,12 @@ mod text_checkout { richtext_insert_future_scan_visited: u64, richtext_insert_future_scan_max_visited: usize, causal_vv_materialize_count: u64, + richtext_tracker_span_filter_count: u64, richtext_tracker_span_count: u64, richtext_tracker_filtered_span_count: u64, richtext_tracker_skipped_span_count: u64, + richtext_tracker_max_span_count: usize, + richtext_tracker_max_filtered_span_count: usize, richtext_id_to_cursor_iter_count: u64, richtext_id_to_cursor_empty_iter_count: u64, recording_event_samples: u64, @@ -96,6 +100,7 @@ mod text_checkout { self.max_diff_container_count = self .max_diff_container_count .max(profile.diff_container_count); + self.diff_container_count_sum += profile.diff_container_count as u64; self.richtext_tracker_checkout_count += profile.richtext_tracker_checkout_count; self.richtext_tracker_diff_count += profile.richtext_tracker_diff_count; self.richtext_delta_build_count += profile.richtext_delta_build_count; @@ -105,10 +110,17 @@ mod text_checkout { .richtext_insert_future_scan_max_visited .max(profile.richtext_insert_future_scan_max_visited); self.causal_vv_materialize_count += profile.causal_vv_materialize_count; + self.richtext_tracker_span_filter_count += profile.richtext_tracker_span_filter_count; self.richtext_tracker_span_count += profile.richtext_tracker_span_count; self.richtext_tracker_filtered_span_count += profile.richtext_tracker_filtered_span_count; self.richtext_tracker_skipped_span_count += profile.richtext_tracker_skipped_span_count; + self.richtext_tracker_max_span_count = self + .richtext_tracker_max_span_count + .max(profile.richtext_tracker_max_span_count); + self.richtext_tracker_max_filtered_span_count = self + .richtext_tracker_max_filtered_span_count + .max(profile.richtext_tracker_max_filtered_span_count); self.richtext_id_to_cursor_iter_count += profile.richtext_id_to_cursor_iter_count; self.richtext_id_to_cursor_empty_iter_count += profile.richtext_id_to_cursor_empty_iter_count; @@ -726,6 +738,18 @@ mod text_checkout { .richtext_insert_future_scan_visited .checked_div(totals.richtext_insert_future_scan_count) .unwrap_or(0); + let avg_tracker_spans_per_checkout = totals + .richtext_tracker_span_count + .checked_div(totals.richtext_tracker_span_filter_count) + .unwrap_or(0); + let avg_filtered_tracker_spans_per_checkout = totals + .richtext_tracker_filtered_span_count + .checked_div(totals.richtext_tracker_span_filter_count) + .unwrap_or(0); + let avg_diff_containers = totals + .diff_container_count_sum + .checked_div(totals.samples) + .unwrap_or(0); eprintln!( concat!( "[text-checkout-profile] {name}: scenario={scenario}, peers={peers}, ", @@ -752,8 +776,14 @@ mod text_checkout { "tracker_spans={tracker_spans}, filtered_tracker_spans={filtered_tracker_spans}, ", "skipped_tracker_spans={skipped_tracker_spans}, id_to_cursor_iters={id_to_cursor_iters}, ", "empty_id_to_cursor_iters={empty_id_to_cursor_iters}, ", + "tracker_span_filter_calls={tracker_span_filter_calls}, ", + "avg_tracker_spans_per_checkout={avg_tracker_spans_per_checkout}, ", + "max_tracker_spans_per_checkout={max_tracker_spans_per_checkout}, ", + "avg_filtered_tracker_spans_per_checkout={avg_filtered_tracker_spans_per_checkout}, ", + "max_filtered_tracker_spans_per_checkout={max_filtered_tracker_spans_per_checkout}, ", "max_frontiers_width={max_frontiers_width}, max_vv_width={max_vv_width}, ", - "max_diff_containers={max_diff_containers}, recording_event_samples={recording_event_samples}, ", + "avg_diff_containers={avg_diff_containers}, max_diff_containers={max_diff_containers}, ", + "recording_event_samples={recording_event_samples}, ", "forward_diff_calculator_samples={forward_diff_calculator_samples}, ", "richtext_tree_nodes={richtext_tree_nodes}, richtext_chunks={richtext_chunks}, ", "text_chunks={text_chunks}, style_anchors={style_anchors}, ", @@ -794,8 +824,14 @@ mod text_checkout { skipped_tracker_spans = totals.richtext_tracker_skipped_span_count, id_to_cursor_iters = totals.richtext_id_to_cursor_iter_count, empty_id_to_cursor_iters = totals.richtext_id_to_cursor_empty_iter_count, + tracker_span_filter_calls = totals.richtext_tracker_span_filter_count, + avg_tracker_spans_per_checkout = avg_tracker_spans_per_checkout, + max_tracker_spans_per_checkout = totals.richtext_tracker_max_span_count, + avg_filtered_tracker_spans_per_checkout = avg_filtered_tracker_spans_per_checkout, + max_filtered_tracker_spans_per_checkout = totals.richtext_tracker_max_filtered_span_count, max_frontiers_width = totals.max_frontiers_width, max_vv_width = totals.max_vv_width, + avg_diff_containers = avg_diff_containers, max_diff_containers = totals.max_diff_container_count, recording_event_samples = totals.recording_event_samples, forward_diff_calculator_samples = totals.forward_diff_calculator_samples, diff --git a/crates/loro-internal/src/diff_calc.rs b/crates/loro-internal/src/diff_calc.rs index da02aaf05..7ea35ed81 100644 --- a/crates/loro-internal/src/diff_calc.rs +++ b/crates/loro-internal/src/diff_calc.rs @@ -67,9 +67,12 @@ pub(crate) mod profiling { pub richtext_insert_future_scan_max_visited: usize, pub causal_vv_materialize_count: u64, pub max_causal_vv_width: usize, + pub richtext_tracker_span_filter_count: u64, pub richtext_tracker_span_count: u64, pub richtext_tracker_filtered_span_count: u64, pub richtext_tracker_skipped_span_count: u64, + pub richtext_tracker_max_span_count: usize, + pub richtext_tracker_max_filtered_span_count: usize, pub richtext_id_to_cursor_iter_count: u64, pub richtext_id_to_cursor_empty_iter_count: u64, } @@ -140,10 +143,15 @@ pub(crate) mod profiling { pub(crate) fn record_richtext_tracker_span_filter(input: usize, filtered: usize) { PROFILE.with(|profile| { if let Some(profile) = profile.borrow_mut().as_mut() { + profile.richtext_tracker_span_filter_count += 1; profile.richtext_tracker_span_count += input as u64; profile.richtext_tracker_filtered_span_count += filtered as u64; profile.richtext_tracker_skipped_span_count += input.saturating_sub(filtered) as u64; + profile.richtext_tracker_max_span_count = + profile.richtext_tracker_max_span_count.max(input); + profile.richtext_tracker_max_filtered_span_count = + profile.richtext_tracker_max_filtered_span_count.max(filtered); } }); } diff --git a/crates/loro-internal/src/loro.rs b/crates/loro-internal/src/loro.rs index a6705b8d1..08d15096a 100644 --- a/crates/loro-internal/src/loro.rs +++ b/crates/loro-internal/src/loro.rs @@ -88,9 +88,12 @@ pub struct CheckoutProfile { pub richtext_insert_future_scan_max_visited: usize, pub causal_vv_materialize_count: u64, pub max_causal_vv_width: usize, + pub richtext_tracker_span_filter_count: u64, pub richtext_tracker_span_count: u64, pub richtext_tracker_filtered_span_count: u64, pub richtext_tracker_skipped_span_count: u64, + pub richtext_tracker_max_span_count: usize, + pub richtext_tracker_max_filtered_span_count: usize, pub richtext_id_to_cursor_iter_count: u64, pub richtext_id_to_cursor_empty_iter_count: u64, pub recording_events: bool, @@ -1886,11 +1889,16 @@ impl LoroDoc { diff_profile.richtext_insert_future_scan_max_visited; profile.causal_vv_materialize_count = diff_profile.causal_vv_materialize_count; profile.max_causal_vv_width = diff_profile.max_causal_vv_width; + profile.richtext_tracker_span_filter_count = + diff_profile.richtext_tracker_span_filter_count; profile.richtext_tracker_span_count = diff_profile.richtext_tracker_span_count; profile.richtext_tracker_filtered_span_count = diff_profile.richtext_tracker_filtered_span_count; profile.richtext_tracker_skipped_span_count = diff_profile.richtext_tracker_skipped_span_count; + profile.richtext_tracker_max_span_count = diff_profile.richtext_tracker_max_span_count; + profile.richtext_tracker_max_filtered_span_count = + diff_profile.richtext_tracker_max_filtered_span_count; profile.richtext_id_to_cursor_iter_count = diff_profile.richtext_id_to_cursor_iter_count; profile.richtext_id_to_cursor_empty_iter_count = diff_profile.richtext_id_to_cursor_empty_iter_count; diff --git a/plans/20260522-fast-diff-calc.md b/plans/20260522-fast-diff-calc.md index 8d8b8c224..b13618a1c 100644 --- a/plans/20260522-fast-diff-calc.md +++ b/plans/20260522-fast-diff-calc.md @@ -18,7 +18,7 @@ Implemented on branch `feat/scale-text-checkout-perf`: Current implementation covers: -- Phase 0 profiling counters for tracker spans, filtered spans, skipped spans, `IdToCursor::iter` calls, and empty `IdToCursor::iter` calls. +- Phase 0 profiling counters for tracker spans, filtered spans, skipped spans, max/avg spans per tracker checkout, max/avg affected containers, `IdToCursor::iter` calls, and empty `IdToCursor::iter` calls. - Phase 1 directed richtext tracker span checkout API, with existing `checkout`, `checkout_causal`, and `diff` APIs kept as adapters. - Removal of the tracker-only `current_frontier_hint`. - Phase 2 per-container coverage filtering for text/list/movable-list richtext trackers, with conservative fallback when coverage is unavailable. @@ -29,7 +29,7 @@ Benchmark notes for `multi-container/latest-to-base` with the default 1000 peers | Version | Time | Avg total | Avg diff calc | Avg tracker checkout | Avg tracker diff | | --- | ---: | ---: | ---: | ---: | ---: | | `c350b0e8` baseline | 905.53-908.52 ms | 916.002244 ms | 900.339939 ms | 412.268949 ms | 424.615097 ms | -| `91e5ceb6` current | 811.54-879.43 ms | 842.044498 ms | 825.496539 ms | 476.979320 ms | 278.143902 ms | +| current | 823.02-826.74 ms | 832.607089 ms | 816.320180 ms | 467.040713 ms | 281.617226 ms | Current profiling counters for the same run: @@ -38,6 +38,13 @@ Current profiling counters for the same run: - `skipped_tracker_spans=253363500` - `id_to_cursor_iters=123753500` - `empty_id_to_cursor_iters=123623500` +- `tracker_span_filter_calls=520000` +- `avg_tracker_spans_per_checkout=725` +- `max_tracker_spans_per_checkout=1000` +- `avg_filtered_tracker_spans_per_checkout=237` +- `max_filtered_tracker_spans_per_checkout=1000` +- `avg_diff_containers=10000` +- `max_diff_containers=10000` This shows the routing is skipping about two thirds of tracker span checks in the target benchmark. The remaining empty iterator count is still high because the first implementation stores one broad coverage span per `(container, peer)`, which intentionally allows false positives. Phase 4/5 should only be considered if this remaining cost shows up in production profiles. From f9fb539dc367604306f88ba6c6996464f6aa60d9 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 22 May 2026 10:22:57 +0000 Subject: [PATCH 40/46] test: compare filtered richtext diff --- .../src/container/richtext/tracker.rs | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/crates/loro-internal/src/container/richtext/tracker.rs b/crates/loro-internal/src/container/richtext/tracker.rs index 4357d65e5..6b68bc586 100644 --- a/crates/loro-internal/src/container/richtext/tracker.rs +++ b/crates/loro-internal/src/container/richtext/tracker.rs @@ -937,6 +937,34 @@ mod test { assert_eq!(t.current_vv, vv!(1 => 4)); } + #[test] + fn coverage_filtered_diff_matches_unfiltered_for_delete_span() { + fn tracker_with_delete() -> Tracker { + let mut t = Tracker::new(); + t.insert(IdFull::new(1, 0, 0), 0, RichtextChunk::new_text(0..10)); + t.delete(ID::new(2, 0), ID::NONE_ID, 0, 10, true); + t + } + + let from = vv!(1 => 10); + let to = vv!(1 => 10, 2 => 10); + let mut unfiltered = tracker_with_delete(); + let mut filtered = tracker_with_delete(); + + let mut coverage = PeerSpanCoverage::default(); + coverage.insert(1, CounterSpan::new(0, 10)); + coverage.insert(2, CounterSpan::new(0, 10)); + + let unfiltered_delta = unfiltered.diff(&from, &to).collect::>(); + let filtered_delta = filtered + .diff_with_coverage(&from, &to, &coverage) + .collect::>(); + + assert_eq!(filtered_delta, unfiltered_delta); + assert_eq!(filtered.current_vv, unfiltered.current_vv); + assert_eq!(filtered.rope.len(), unfiltered.rope.len()); + } + #[test] fn test_retreat_and_forward_delete() { let mut t = Tracker::new(); From 434b35c40af59873d8c0b808ee4852f92c9ba6ca Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 22 May 2026 10:23:11 +0000 Subject: [PATCH 41/46] docs: update fast diff calc commit list --- plans/20260522-fast-diff-calc.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plans/20260522-fast-diff-calc.md b/plans/20260522-fast-diff-calc.md index b13618a1c..731ae5b61 100644 --- a/plans/20260522-fast-diff-calc.md +++ b/plans/20260522-fast-diff-calc.md @@ -15,6 +15,8 @@ Implemented on branch `feat/scale-text-checkout-perf`: - `c350b0e8 bench: add many text checkout scenario` - `5c3cd62a refactor: route richtext checkout through spans` - `91e5ceb6 perf: filter richtext checkout spans by coverage` +- `57bfd675 bench: report checkout span averages` +- `f9fb539d test: compare filtered richtext diff` Current implementation covers: From f8d57522600b18eb7c22da7229f4cf691f993962 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 22 May 2026 10:53:21 +0000 Subject: [PATCH 42/46] fix: keep list diff calculator small --- crates/loro-internal/src/diff_calc.rs | 36 +++++++++++++++++---------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/crates/loro-internal/src/diff_calc.rs b/crates/loro-internal/src/diff_calc.rs index 7ea35ed81..bf5e6091f 100644 --- a/crates/loro-internal/src/diff_calc.rs +++ b/crates/loro-internal/src/diff_calc.rs @@ -675,9 +675,9 @@ use rle::{HasLength as _, Sliceable}; #[derive(Default)] pub(crate) struct ListDiffCalculator { - start_vv: VersionVector, + start_vv: Box, tracker: Box, - coverage: PeerSpanCoverage, + coverage: Box, } impl ListDiffCalculator { @@ -704,13 +704,13 @@ impl std::fmt::Debug for ListDiffCalculator { impl DiffCalculatorTrait for ListDiffCalculator { fn start_tracking(&mut self, _oplog: &OpLog, vv: &crate::VersionVector, _mode: DiffMode) { - if !vv.includes_vv(&self.start_vv) || !self.tracker.all_vv().includes_vv(vv) { + if !vv.includes_vv(self.start_vv.as_ref()) || !self.tracker.all_vv().includes_vv(vv) { *self.tracker = RichtextTracker::new_with_unknown(); - self.start_vv = vv.clone(); + *self.start_vv = vv.clone(); self.coverage.clear(); } - richtext_tracker_checkout_with_coverage(&mut self.tracker, vv, &self.coverage); + richtext_tracker_checkout_with_coverage(&mut self.tracker, vv, self.coverage.as_ref()); } fn apply_change( @@ -720,7 +720,11 @@ impl DiffCalculatorTrait for ListDiffCalculator { vv: Option>, ) { if let Some(vv) = vv { - richtext_tracker_checkout_causal_with_coverage(&mut self.tracker, vv, &self.coverage); + richtext_tracker_checkout_causal_with_coverage( + &mut self.tracker, + vv, + self.coverage.as_ref(), + ); } match &op.op().content { @@ -746,7 +750,7 @@ impl DiffCalculatorTrait for ListDiffCalculator { _ => unreachable!(), } - record_op_coverage(&mut self.coverage, &op); + record_op_coverage(self.coverage.as_mut(), &op); } fn finish_this_round(&mut self) {} @@ -764,7 +768,7 @@ impl DiffCalculatorTrait for ListDiffCalculator { } else { Either::Right( self.tracker - .diff_with_coverage(info.from_vv, info.to_vv, &self.coverage), + .diff_with_coverage(info.from_vv, info.to_vv, self.coverage.as_ref()), ) }; for item in diff_iter { @@ -1480,13 +1484,19 @@ struct MovableListInner { impl DiffCalculatorTrait for MovableListDiffCalculator { fn start_tracking(&mut self, _oplog: &OpLog, vv: &crate::VersionVector, mode: DiffMode) { - if !vv.includes_vv(&self.list.start_vv) || !self.list.tracker.all_vv().includes_vv(vv) { + if !vv.includes_vv(self.list.start_vv.as_ref()) + || !self.list.tracker.all_vv().includes_vv(vv) + { *self.list.tracker = RichtextTracker::new_with_unknown(); - self.list.start_vv = vv.clone(); + *self.list.start_vv = vv.clone(); self.list.coverage.clear(); } - richtext_tracker_checkout_with_coverage(&mut self.list.tracker, vv, &self.list.coverage); + richtext_tracker_checkout_with_coverage( + &mut self.list.tracker, + vv, + self.list.coverage.as_ref(), + ); self.inner.current_mode = mode; } @@ -1583,7 +1593,7 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { richtext_tracker_checkout_causal_with_coverage( &mut this.tracker, vv, - &this.coverage, + this.coverage.as_ref(), ); } @@ -1659,7 +1669,7 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { _ => unreachable!(), } if updates_tracker { - record_op_coverage(&mut this.coverage, &op); + record_op_coverage(this.coverage.as_mut(), &op); } }; } From 82dd1dc47b7f78a299d0c08ecef8395d5a2e598e Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 22 May 2026 12:56:33 +0000 Subject: [PATCH 43/46] perf: reuse coverage-local richtext tracker versions --- .../loro-internal/src/container/richtext.rs | 4 +- .../src/container/richtext/tracker.rs | 657 ++++++++++++------ crates/loro-internal/src/diff_calc.rs | 166 +++-- crates/loro-internal/src/version.rs | 3 + 4 files changed, 578 insertions(+), 252 deletions(-) diff --git a/crates/loro-internal/src/container/richtext.rs b/crates/loro-internal/src/container/richtext.rs index 7682b1e47..7790f52ce 100644 --- a/crates/loro-internal/src/container/richtext.rs +++ b/crates/loro-internal/src/container/richtext.rs @@ -28,7 +28,9 @@ use std::fmt::Debug; pub(crate) use fugue_span::{RichtextChunk, RichtextChunkValue}; pub(crate) use richtext_state::RichtextState; pub(crate) use style_range_map::Styles; -pub(crate) use tracker::{CrdtRopeDelta, PeerSpanCoverage, Tracker as RichtextTracker}; +pub(crate) use tracker::{ + CrdtRopeDelta, PeerSpanCoverage, Tracker as RichtextTracker, TrackerMaterializedVersion, +}; /// This is the data structure that represents a span of rich text. /// It's used to communicate with the frontend. diff --git a/crates/loro-internal/src/container/richtext/tracker.rs b/crates/loro-internal/src/container/richtext/tracker.rs index 6b68bc586..a419f0b34 100644 --- a/crates/loro-internal/src/container/richtext/tracker.rs +++ b/crates/loro-internal/src/container/richtext/tracker.rs @@ -29,11 +29,289 @@ pub(crate) type PeerSpanCoverage = FxHashMap; #[derive(Debug)] pub(crate) struct Tracker { applied_vv: VersionVector, - current_vv: VersionVector, rope: CrdtRope, id_to_cursor: IdToCursor, } +/// Tracks the version currently materialized in a richtext tracker. +/// +/// This state intentionally lives outside [`Tracker`]. The diff calculators keep +/// it next to the tracker because the stable cross-round invariant is: +/// +/// - after `calculate_diff(from, to)` finishes, the tracker is materialized at +/// the coverage-local projection of `from`; +/// - during replay, this value may temporarily move through causal versions; +/// - diff-status checkout to `to` must not change it. +/// +/// Only peers that have ops in the container coverage need to be stored here. +/// Missing peers are treated as materialized at counter `0`. +/// +/// The type deliberately owns the mutable version vector. Tracker checkout that +/// mutates the materialized version requires `&mut Self`, while diff-status +/// checkout only takes `&Self`, so callers cannot accidentally advance the +/// stable materialized version while marking the `to` diff. +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub(crate) struct TrackerMaterializedVersion { + vv: Box, +} + +impl TrackerMaterializedVersion { + #[inline] + pub(crate) fn as_vv(&self) -> &VersionVector { + &self.vv + } + + #[inline] + pub(crate) fn includes_id(&self, id: ID) -> bool { + self.vv.includes_id(id) + } + + pub(crate) fn reset_to_version_projection( + &mut self, + target: &VersionVector, + coverage: &PeerSpanCoverage, + ) { + self.vv.clear(); + for &peer in coverage.keys() { + if let Some(&end) = target.get(&peer) { + if end > 0 { + self.vv.insert(peer, end); + } + } + } + } + + pub(crate) fn checkout_to_version( + &mut self, + tracker: &mut Tracker, + target: &VersionVector, + coverage: &PeerSpanCoverage, + ) { + let spans = self.checkout_spans_to_version(target, coverage); + self.checkout_peer_spans(tracker, &spans, Some(coverage)); + } + + /// Marks diff status at `target` without changing the stable materialized + /// version. This is the second half of diff calculation: after checkout to + /// `from`, mark which spans would change at `to`. + pub(crate) fn checkout_diff_status_to_version( + &self, + tracker: &mut Tracker, + target: &VersionVector, + coverage: &PeerSpanCoverage, + ) { + let spans = self.checkout_spans_to_version(target, coverage); + tracker.apply_peer_spans(&spans, true, Some(coverage)); + } + + pub(crate) fn checkout_to_causal( + &mut self, + tracker: &mut Tracker, + target: CausalVersion<'_>, + coverage: &PeerSpanCoverage, + ) { + let spans = self.checkout_spans_to_causal(target, coverage); + self.checkout_peer_spans(tracker, &spans, Some(coverage)); + } + + #[cfg(test)] + fn checkout_to_version_without_coverage( + &mut self, + tracker: &mut Tracker, + target: &VersionVector, + ) { + let spans = self.checkout_spans_to_version_without_coverage(target); + self.checkout_peer_spans(tracker, &spans, None); + } + + #[cfg(test)] + fn checkout_diff_status_to_version_without_coverage( + &self, + tracker: &mut Tracker, + target: &VersionVector, + ) { + let spans = self.checkout_spans_to_version_without_coverage(target); + tracker.apply_peer_spans(&spans, true, None); + } + + #[cfg(test)] + fn checkout_to_causal_without_coverage( + &mut self, + tracker: &mut Tracker, + target: CausalVersion<'_>, + ) { + let spans = self.checkout_spans_to_causal_without_coverage(target); + self.checkout_peer_spans(tracker, &spans, None); + } + + #[cfg(test)] + fn checkout_peer_spans_without_coverage( + &mut self, + tracker: &mut Tracker, + spans: &[IdSpan], + ) { + self.checkout_peer_spans(tracker, spans, None); + } + + fn checkout_peer_spans( + &mut self, + tracker: &mut Tracker, + spans: &[IdSpan], + coverage: Option<&PeerSpanCoverage>, + ) { + tracker.apply_peer_spans(spans, false, coverage); + + for &span in spans { + if coverage.is_some_and(|coverage| !coverage.contains_key(&span.peer)) { + continue; + } + + if span.is_reversed() { + self.vv.shrink_to_exclude(span); + } else { + self.vv.extend_to_include(span); + } + } + } + + fn checkout_spans_to_version( + &self, + target: &VersionVector, + coverage: &PeerSpanCoverage, + ) -> SmallVec<[IdSpan; 4]> { + let mut spans: SmallVec<[IdSpan; 4]> = SmallVec::new(); + self.push_retreat_spans_to_version(&mut spans, |peer| { + target.get(&peer).copied().unwrap_or(0) + }); + for &peer in coverage.keys() { + let target_end = target.get(&peer).copied().unwrap_or(0); + let current_end = self.vv.get(&peer).copied().unwrap_or(0); + if target_end > current_end { + spans.push(IdSpan::new(peer, current_end, target_end)); + } + } + + spans + } + + fn checkout_spans_to_causal( + &self, + target: CausalVersion<'_>, + coverage: &PeerSpanCoverage, + ) -> SmallVec<[IdSpan; 4]> { + let mut spans: SmallVec<[IdSpan; 4]> = SmallVec::new(); + self.push_retreat_spans_to_version(&mut spans, |peer| target.end_for_peer(peer)); + for &peer in coverage.keys() { + let target_end = target.end_for_peer(peer); + let current_end = self.vv.get(&peer).copied().unwrap_or(0); + if target_end > current_end { + spans.push(IdSpan::new(peer, current_end, target_end)); + } + } + + spans + } + + #[cfg(test)] + fn checkout_spans_to_version_without_coverage( + &self, + target: &VersionVector, + ) -> SmallVec<[IdSpan; 4]> { + let mut spans: SmallVec<[IdSpan; 4]> = SmallVec::new(); + spans.extend(self.vv.sub_iter(target).map(reversed_span)); + spans.extend(target.sub_iter(&self.vv)); + spans + } + + #[cfg(test)] + fn checkout_spans_to_causal_without_coverage( + &self, + target: CausalVersion<'_>, + ) -> SmallVec<[IdSpan; 4]> { + let mut spans: SmallVec<[IdSpan; 4]> = SmallVec::new(); + self.push_retreat_spans_to_version(&mut spans, |peer| target.end_for_peer(peer)); + + for (&peer, &base_end) in target.base().iter() { + let target_end = if peer == target.peer() { + base_end.max(target.peer_end()) + } else { + base_end + }; + let current_end = self.vv.get(&peer).copied().unwrap_or(0); + if target_end > current_end { + spans.push(IdSpan::new(peer, current_end, target_end)); + } + } + + if !target.base().contains_key(&target.peer()) { + let target_end = target.peer_end(); + let current_end = self.vv.get(&target.peer()).copied().unwrap_or(0); + if target_end > current_end { + spans.push(IdSpan::new(target.peer(), current_end, target_end)); + } + } + + spans + } + + fn push_retreat_spans_to_version( + &self, + spans: &mut SmallVec<[IdSpan; 4]>, + target_end_for_peer: impl Fn(PeerID) -> Counter, + ) { + for (&peer, &counter) in self.vv.iter() { + let target_end = target_end_for_peer(peer); + if counter > target_end { + spans.push(reversed_span(IdSpan::new(peer, target_end, counter))); + } + } + } + + fn extend_to_include_end_id(&mut self, id: ID) { + self.vv.extend_to_include_end_id(id); + } + + fn extend_to_include_last_id(&mut self, id: ID) { + self.vv.extend_to_include_last_id(id); + } + + #[cfg(debug_assertions)] + pub(crate) fn debug_assert_matches_version_projection( + &self, + target: &VersionVector, + coverage: &PeerSpanCoverage, + ) { + for &peer in coverage.keys() { + let expected = target.get(&peer).copied().unwrap_or(0); + let actual = self.vv.get(&peer).copied().unwrap_or(0); + debug_assert_eq!( + actual, expected, + "tracker materialized version must match the stable from-version projection" + ); + } + + for (&peer, &actual) in self.vv.iter() { + debug_assert!( + coverage.contains_key(&peer), + "tracker materialized version should only contain covered peers" + ); + let expected = target.get(&peer).copied().unwrap_or(0); + debug_assert_eq!( + actual, expected, + "tracker materialized version contains a stale peer counter" + ); + } + } + + #[cfg(not(debug_assertions))] + pub(crate) fn debug_assert_matches_version_projection( + &self, + _target: &VersionVector, + _coverage: &PeerSpanCoverage, + ) { + } +} + impl Default for Tracker { fn default() -> Self { Self::new_with_unknown() @@ -47,7 +325,6 @@ impl Tracker { rope: CrdtRope::new(), id_to_cursor: IdToCursor::default(), applied_vv: Default::default(), - current_vv: Default::default(), }; let result = this.rope.tree.push(FugueSpan { @@ -72,7 +349,6 @@ impl Tracker { rope: CrdtRope::new(), id_to_cursor: IdToCursor::default(), applied_vv: Default::default(), - current_vv: Default::default(), } } @@ -115,12 +391,13 @@ impl Tracker { &self.applied_vv } - #[inline] - pub fn current_vv(&self) -> &VersionVector { - &self.current_vv - } - - pub(crate) fn insert(&mut self, mut op_id: IdFull, mut pos: usize, mut content: RichtextChunk) { + pub(crate) fn insert( + &mut self, + materialized: &mut TrackerMaterializedVersion, + mut op_id: IdFull, + mut pos: usize, + mut content: RichtextChunk, + ) { // trace!( // "TrackerInsert op_id = {:#?}, pos = {:#?}, content = {:#?}", // op_id, @@ -129,7 +406,7 @@ impl Tracker { // ); // tracing::span!(tracing::Level::INFO, "TrackerInsert"); if let ControlFlow::Break(_) = - self.skip_applied(op_id.id(), content.len(), |applied_counter_end| { + self.skip_applied(materialized, op_id.id(), content.len(), |applied_counter_end| { // the op is partially included, need to slice the content let start = (applied_counter_end - op_id.counter) as usize; op_id.lamport += (applied_counter_end - op_id.counter) as Lamport; @@ -145,7 +422,9 @@ impl Tracker { // tracing::span!(tracing::Level::INFO, "before insert {} pos={}", op_id, pos); // debug_log::debug_dbg!(&self); // } + let end_id = op_id.inc(content.len() as Counter); self._insert(pos, content, op_id); + materialized.extend_to_include_end_id(end_id.id()); } fn _insert(&mut self, pos: usize, content: RichtextChunk, op_id: IdFull) { @@ -174,7 +453,6 @@ impl Tracker { self.update_insert_by_split(&result.splitted.arr); let end_id = op_id.inc(content.len() as Counter); - self.current_vv.extend_to_include_end_id(end_id.id()); self.applied_vv.extend_to_include_end_id(end_id.id()); } @@ -211,33 +489,38 @@ impl Tracker { /// - reverse: if true, the kth op delete the last kth element of the span pub(crate) fn delete( &mut self, + materialized: &mut TrackerMaterializedVersion, mut op_id: ID, mut target_start_id: ID, pos: usize, mut len: usize, reverse: bool, ) { - if let ControlFlow::Break(_) = self.skip_applied(op_id, len, |applied_counter_end: i32| { - // the op is partially included, need to slice the op - let start = (applied_counter_end - op_id.counter) as usize; - op_id.counter = applied_counter_end; - if !reverse { - target_start_id = target_start_id.inc(start as i32); - } - // Okay, this looks pretty weird, but it's correct. - // If it's reverse, we don't need to change the target_start_id, because target_start_id always pointing towards the - // leftmost element of the span. After applying the initial part of the deletion, which starts from the right side, - // the target_start_id will be still pointing towards the same leftmost element, thus no need to change. - len -= start; - // If reverse, don't need to change the pos, because it's deleting backwards. - // If not reverse, we don't need to change the pos either, because the `start` chars after it are already deleted - }) { + if let ControlFlow::Break(_) = + self.skip_applied(materialized, op_id, len, |applied_counter_end: i32| { + // the op is partially included, need to slice the op + let start = (applied_counter_end - op_id.counter) as usize; + op_id.counter = applied_counter_end; + if !reverse { + target_start_id = target_start_id.inc(start as i32); + } + // Okay, this looks pretty weird, but it's correct. + // If it's reverse, we don't need to change the target_start_id, because target_start_id always pointing towards the + // leftmost element of the span. After applying the initial part of the deletion, which starts from the right side, + // the target_start_id will be still pointing towards the same leftmost element, thus no need to change. + len -= start; + // If reverse, don't need to change the pos, because it's deleting backwards. + // If not reverse, we don't need to change the pos either, because the `start` chars after it are already deleted + }) + { return; } // tracing::info!("after forwarding pos={} len={}", pos, len); + let end_id = op_id.inc(len as Counter); self._delete(target_start_id, pos, len, reverse, op_id); + materialized.extend_to_include_end_id(end_id); } fn _delete(&mut self, target_start_id: ID, pos: usize, len: usize, reverse: bool, op_id: ID) { @@ -266,12 +549,12 @@ impl Tracker { } let end_id = op_id.inc(len as Counter); - self.current_vv.extend_to_include_end_id(end_id); self.applied_vv.extend_to_include_end_id(end_id); } fn skip_applied( &mut self, + materialized: &mut TrackerMaterializedVersion, op_id: ID, len: usize, mut f: impl FnMut(Counter), @@ -279,10 +562,10 @@ impl Tracker { let last_id = op_id.inc(len as Counter - 1); let applied_counter_end = self.applied_vv.get(&last_id.peer).copied().unwrap_or(0); if applied_counter_end > op_id.counter { - if !self.current_vv.includes_id(last_id) { + if !materialized.includes_id(last_id) { // PERF: may be slow let mut updates = Default::default(); - let cnt_start = self.current_vv.get(&op_id.peer).copied().unwrap_or(0); + let cnt_start = materialized.as_vv().get(&op_id.peer).copied().unwrap_or(0); self.forward( IdSpan::new(op_id.peer, cnt_start, op_id.counter + len as Counter), &mut updates, @@ -291,7 +574,7 @@ impl Tracker { } if applied_counter_end > last_id.counter { - self.current_vv.extend_to_include_last_id(last_id); + materialized.extend_to_include_last_id(last_id); return ControlFlow::Break(()); } @@ -307,12 +590,15 @@ impl Tracker { #[instrument(skip(self))] pub(crate) fn move_item( &mut self, + materialized: &mut TrackerMaterializedVersion, op_id: IdFull, deleted_id: ID, from_pos: usize, to_pos: usize, ) { - if let ControlFlow::Break(_) = self.skip_applied(op_id.id(), 1, |_| unreachable!()) { + if let ControlFlow::Break(_) = + self.skip_applied(materialized, op_id.id(), 1, |_| unreachable!()) + { return; } @@ -361,109 +647,15 @@ impl Tracker { ); let end_id = op_id.inc(1); - self.current_vv.extend_to_include_end_id(end_id.id()); self.applied_vv.extend_to_include_end_id(end_id.id()); - } - - #[inline] - pub(crate) fn checkout(&mut self, vv: &VersionVector) { - self._checkout(vv, false); - } - - #[inline] - pub(crate) fn checkout_causal(&mut self, vv: CausalVersion<'_>) { - self._checkout_causal(vv, false); + materialized.extend_to_include_end_id(end_id.id()); } /// Checkout by applying directed peer spans. /// /// Forward spans use the normal `[start, end)` representation. Retreat spans /// must use `CounterSpan`'s reversed representation for the same covered ids. - pub(crate) fn checkout_peer_spans(&mut self, spans: &[IdSpan]) { - self._checkout_peer_spans(spans, false, None); - } - - pub(crate) fn checkout_with_coverage( - &mut self, - vv: &VersionVector, - coverage: &PeerSpanCoverage, - ) { - let spans = self.checkout_spans_to_version(vv); - self.checkout_peer_spans_with_coverage(&spans, coverage); - } - - pub(crate) fn checkout_peer_spans_with_coverage( - &mut self, - spans: &[IdSpan], - coverage: &PeerSpanCoverage, - ) { - self._checkout_peer_spans(spans, false, Some(coverage)); - } - - pub(crate) fn checkout_causal_with_coverage( - &mut self, - vv: CausalVersion<'_>, - coverage: &PeerSpanCoverage, - ) { - let spans = self.checkout_spans_to_causal(vv); - self.checkout_peer_spans_with_coverage(&spans, coverage); - } - - fn _checkout(&mut self, vv: &VersionVector, on_diff_status: bool) { - // tracing::info!("Checkout to {:?} from {:?}", vv, self.current_vv); - let spans = self.checkout_spans_to_version(vv); - if on_diff_status { - self._checkout_peer_spans(&spans, true, None); - } else { - self.checkout_peer_spans(&spans); - } - } - - fn _checkout_causal(&mut self, vv: CausalVersion<'_>, on_diff_status: bool) { - let spans = self.checkout_spans_to_causal(vv); - self._checkout_peer_spans(&spans, on_diff_status, None); - } - - fn checkout_spans_to_version(&self, vv: &VersionVector) -> SmallVec<[IdSpan; 4]> { - let mut spans: SmallVec<[IdSpan; 4]> = SmallVec::new(); - spans.extend(self.current_vv.sub_iter(vv).map(reversed_span)); - spans.extend(vv.sub_iter(&self.current_vv)); - spans - } - - fn checkout_spans_to_causal(&self, vv: CausalVersion<'_>) -> SmallVec<[IdSpan; 4]> { - let mut spans: SmallVec<[IdSpan; 4]> = SmallVec::new(); - for (&peer, &counter) in self.current_vv.iter() { - let target_end = vv.end_for_peer(peer); - if counter > target_end { - spans.push(reversed_span(IdSpan::new(peer, target_end, counter))); - } - } - - for (&peer, &base_end) in vv.base().iter() { - let target_end = if peer == vv.peer() { - base_end.max(vv.peer_end()) - } else { - base_end - }; - let current_end = self.current_vv.get(&peer).copied().unwrap_or(0); - if target_end > current_end { - spans.push(IdSpan::new(peer, current_end, target_end)); - } - } - - if !vv.base().contains_key(&vv.peer()) { - let target_end = vv.peer_end(); - let current_end = self.current_vv.get(&vv.peer()).copied().unwrap_or(0); - if target_end > current_end { - spans.push(IdSpan::new(vv.peer(), current_end, target_end)); - } - } - - spans - } - - fn _checkout_peer_spans( + fn apply_peer_spans( &mut self, spans: &[IdSpan], on_diff_status: bool, @@ -480,7 +672,6 @@ impl Tracker { spans.len(), filtered_spans.len(), ); - let mut current_vv = std::mem::take(&mut self.current_vv); let mut updates = Vec::new(); for &span in filtered_spans.iter().filter(|span| span.is_reversed()) { for c in self.id_to_cursor.iter(span) { @@ -573,19 +764,6 @@ impl Tracker { self.forward(span, &mut updates); } - if !on_diff_status { - for &span in spans { - if span.is_reversed() { - current_vv.shrink_to_exclude(span); - } else { - current_vv.extend_to_include(span); - } - } - self.current_vv = current_vv; - } else { - self.current_vv = current_vv; - } - self.batch_update(updates, on_diff_status); } @@ -675,16 +853,16 @@ impl Tracker { } #[allow(unused)] - pub(crate) fn check(&self) { + pub(crate) fn check(&self, materialized: &TrackerMaterializedVersion) { if !cfg!(debug_assertions) { return; } - self.check_vv_correctness(); + self.check_vv_correctness(materialized); self.check_id_to_cursor_insertions_correctness(); } - fn check_vv_correctness(&self) { + fn check_vv_correctness(&self, materialized: &TrackerMaterializedVersion) { if !cfg!(debug_assertions) { return; } @@ -697,9 +875,9 @@ impl Tracker { let id_span = span.id_span(); assert!(self.all_vv().includes_id(id_span.id_last())); if span.status.future { - assert!(!self.current_vv.includes_id(id_span.id_start())); + assert!(!materialized.includes_id(id_span.id_start())); } else { - assert!(self.current_vv.includes_id(id_span.id_last())); + assert!(materialized.includes_id(id_span.id_last())); } } } @@ -768,14 +946,16 @@ impl Tracker { } // #[tracing::instrument(skip(self), level = "info")] + #[cfg(test)] pub(crate) fn diff( &mut self, + materialized: &mut TrackerMaterializedVersion, from: &VersionVector, to: &VersionVector, ) -> impl Iterator + '_ { // tracing::info!("Init: {:#?}, ", &self); - self._checkout(from, false); - self._checkout(to, true); + materialized.checkout_to_version_without_coverage(self, from); + materialized.checkout_diff_status_to_version_without_coverage(self, to); // self.id_to_cursor.diagnose(); // tracing::trace!("Trace::diff {:#?}, ", &self); @@ -784,14 +964,13 @@ impl Tracker { pub(crate) fn diff_with_coverage( &mut self, + materialized: &mut TrackerMaterializedVersion, from: &VersionVector, to: &VersionVector, coverage: &PeerSpanCoverage, ) -> impl Iterator + '_ { - let spans = self.checkout_spans_to_version(from); - self._checkout_peer_spans(&spans, false, Some(coverage)); - let spans = self.checkout_spans_to_version(to); - self._checkout_peer_spans(&spans, true, Some(coverage)); + materialized.checkout_to_version(self, from, coverage); + materialized.checkout_diff_status_to_version(self, to, coverage); self.rope.get_diff() } @@ -856,52 +1035,81 @@ mod test { use super::*; use std::time::Instant; + fn tracker() -> (Tracker, TrackerMaterializedVersion) { + (Tracker::new(), TrackerMaterializedVersion::default()) + } + + fn insert_text( + tracker: &mut Tracker, + materialized: &mut TrackerMaterializedVersion, + id: IdFull, + pos: usize, + text: std::ops::Range, + ) { + tracker.insert(materialized, id, pos, RichtextChunk::new_text(text)); + } + + fn delete_text( + tracker: &mut Tracker, + materialized: &mut TrackerMaterializedVersion, + op_id: ID, + target_start_id: ID, + pos: usize, + len: usize, + reverse: bool, + ) { + tracker.delete(materialized, op_id, target_start_id, pos, len, reverse); + } + #[test] fn test_len() { - let mut t = Tracker::new(); - t.insert(IdFull::new(1, 0, 0), 0, RichtextChunk::new_text(0..2)); + let (mut t, mut materialized) = tracker(); + insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 0, 0..2); assert_eq!(t.rope.len(), 2); - t.checkout(&Default::default()); + materialized.checkout_to_version_without_coverage(&mut t, &Default::default()); assert_eq!(t.rope.len(), 0); - t.insert(IdFull::new(2, 0, 0), 0, RichtextChunk::new_text(2..4)); + insert_text(&mut t, &mut materialized, IdFull::new(2, 0, 0), 0, 2..4); let v = vv!(1 => 2, 2 => 2); - t.checkout(&v); + materialized.checkout_to_version_without_coverage(&mut t, &v); assert_eq!(&t.applied_vv, &v); assert_eq!(t.rope.len(), 4); } #[test] fn checkout_causal_single_frontier_retreats_other_peers() { - let mut t = Tracker::new(); - t.insert(IdFull::new(2, 0, 0), 0, RichtextChunk::new_text(0..2)); - t.insert(IdFull::new(1, 0, 0), 2, RichtextChunk::new_text(2..4)); + let (mut t, mut materialized) = tracker(); + insert_text(&mut t, &mut materialized, IdFull::new(2, 0, 0), 0, 0..2); + insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 2, 2..4); assert_eq!(t.rope.len(), 4); let base = ImVersionVector::new(); - t.checkout_causal(CausalVersion::new(&base, 1, 2)); + materialized.checkout_to_causal_without_coverage( + &mut t, + CausalVersion::new(&base, 1, 2), + ); assert_eq!(t.rope.len(), 2); - assert_eq!(t.current_vv, vv!(1 => 2)); + assert_eq!(materialized.as_vv(), &vv!(1 => 2)); } #[test] fn checkout_peer_spans_uses_reversed_span_boundaries() { - let mut t = Tracker::new(); - t.insert(IdFull::new(1, 0, 0), 0, RichtextChunk::new_text(0..4)); - t.insert(IdFull::new(2, 0, 4), 4, RichtextChunk::new_text(4..6)); + let (mut t, mut materialized) = tracker(); + insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 0, 0..4); + insert_text(&mut t, &mut materialized, IdFull::new(2, 0, 4), 4, 4..6); assert_eq!(t.rope.len(), 6); - assert_eq!(t.current_vv, vv!(1 => 4, 2 => 2)); + assert_eq!(materialized.as_vv(), &vv!(1 => 4, 2 => 2)); let retreat_peer_2 = reversed_span(IdSpan::new(2, 0, 2)); - t.checkout_peer_spans(&[retreat_peer_2]); + materialized.checkout_peer_spans_without_coverage(&mut t, &[retreat_peer_2]); assert_eq!(t.rope.len(), 4); - assert_eq!(t.current_vv, vv!(1 => 4)); + assert_eq!(materialized.as_vv(), &vv!(1 => 4)); - t.checkout_peer_spans(&[IdSpan::new(2, 0, 2)]); + materialized.checkout_peer_spans_without_coverage(&mut t, &[IdSpan::new(2, 0, 2)]); assert_eq!(t.rope.len(), 6); - assert_eq!(t.current_vv, vv!(1 => 4, 2 => 2)); + assert_eq!(materialized.as_vv(), &vv!(1 => 4, 2 => 2)); } #[test] @@ -923,83 +1131,116 @@ mod test { } #[test] - fn coverage_filtered_checkout_still_updates_current_vv() { - let mut t = Tracker::new(); - t.insert(IdFull::new(1, 0, 0), 0, RichtextChunk::new_text(0..4)); - t.current_vv.set_end(ID::new(2, 5)); - assert_eq!(t.current_vv, vv!(1 => 4, 2 => 5)); + fn coverage_filtered_checkout_keeps_materialized_projection_local() { + let (mut t, mut materialized) = tracker(); + insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 0, 0..4); + assert_eq!(materialized.as_vv(), &vv!(1 => 4)); let mut coverage = PeerSpanCoverage::default(); coverage.insert(1, CounterSpan::new(0, 4)); - t.checkout_peer_spans_with_coverage(&[reversed_span(IdSpan::new(2, 0, 5))], &coverage); + materialized.checkout_peer_spans( + &mut t, + &[reversed_span(IdSpan::new(2, 0, 5))], + Some(&coverage), + ); assert_eq!(t.rope.len(), 4); - assert_eq!(t.current_vv, vv!(1 => 4)); + assert_eq!(materialized.as_vv(), &vv!(1 => 4)); } #[test] fn coverage_filtered_diff_matches_unfiltered_for_delete_span() { - fn tracker_with_delete() -> Tracker { - let mut t = Tracker::new(); - t.insert(IdFull::new(1, 0, 0), 0, RichtextChunk::new_text(0..10)); - t.delete(ID::new(2, 0), ID::NONE_ID, 0, 10, true); - t + fn tracker_with_delete() -> (Tracker, TrackerMaterializedVersion) { + let (mut t, mut materialized) = tracker(); + insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 0, 0..10); + delete_text( + &mut t, + &mut materialized, + ID::new(2, 0), + ID::NONE_ID, + 0, + 10, + true, + ); + (t, materialized) } let from = vv!(1 => 10); let to = vv!(1 => 10, 2 => 10); - let mut unfiltered = tracker_with_delete(); - let mut filtered = tracker_with_delete(); + let (mut unfiltered, mut unfiltered_materialized) = tracker_with_delete(); + let (mut filtered, mut filtered_materialized) = tracker_with_delete(); let mut coverage = PeerSpanCoverage::default(); coverage.insert(1, CounterSpan::new(0, 10)); coverage.insert(2, CounterSpan::new(0, 10)); - let unfiltered_delta = unfiltered.diff(&from, &to).collect::>(); + let unfiltered_delta = unfiltered + .diff(&mut unfiltered_materialized, &from, &to) + .collect::>(); let filtered_delta = filtered - .diff_with_coverage(&from, &to, &coverage) + .diff_with_coverage(&mut filtered_materialized, &from, &to, &coverage) .collect::>(); assert_eq!(filtered_delta, unfiltered_delta); - assert_eq!(filtered.current_vv, unfiltered.current_vv); + assert_eq!(filtered_materialized, unfiltered_materialized); assert_eq!(filtered.rope.len(), unfiltered.rope.len()); } + #[test] + fn diff_status_checkout_preserves_stable_materialized_version() { + let (mut t, mut materialized) = tracker(); + insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 0, 0..2); + insert_text(&mut t, &mut materialized, IdFull::new(2, 0, 2), 2, 2..4); + + let mut coverage = PeerSpanCoverage::default(); + coverage.insert(1, CounterSpan::new(0, 2)); + coverage.insert(2, CounterSpan::new(0, 2)); + let from = vv!(1 => 2); + let to = vv!(1 => 2, 2 => 2); + + materialized.checkout_to_version(&mut t, &from, &coverage); + let stable_from = materialized.clone(); + materialized.checkout_diff_status_to_version(&mut t, &to, &coverage); + + assert_eq!(materialized, stable_from); + materialized.debug_assert_matches_version_projection(&from, &coverage); + } + #[test] fn test_retreat_and_forward_delete() { - let mut t = Tracker::new(); - t.insert(IdFull::new(1, 0, 0), 0, RichtextChunk::new_text(0..10)); - t.delete(ID::new(2, 0), ID::NONE_ID, 0, 10, true); - t.checkout(&vv!(1 => 10, 2=>5)); + let (mut t, mut materialized) = tracker(); + insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 0, 0..10); + delete_text(&mut t, &mut materialized, ID::new(2, 0), ID::NONE_ID, 0, 10, true); + materialized.checkout_to_version_without_coverage(&mut t, &vv!(1 => 10, 2=>5)); assert_eq!(t.rope.len(), 5); - t.checkout(&vv!(1 => 10, 2=>0)); + materialized.checkout_to_version_without_coverage(&mut t, &vv!(1 => 10, 2=>0)); assert_eq!(t.rope.len(), 10); - t.checkout(&vv!(1 => 10, 2=>10)); + materialized.checkout_to_version_without_coverage(&mut t, &vv!(1 => 10, 2=>10)); assert_eq!(t.rope.len(), 0); - t.checkout(&vv!(1 => 10, 2=>0)); + materialized.checkout_to_version_without_coverage(&mut t, &vv!(1 => 10, 2=>0)); assert_eq!(t.rope.len(), 10); } #[test] fn repeated_tail_splits_keep_id_to_cursor_consistent() { - let mut t = Tracker::new(); - t.insert(IdFull::new(1, 0, 0), 0, RichtextChunk::new_text(0..300)); + let (mut t, mut materialized) = tracker(); + insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 0, 0..300); for (i, pos) in [100, 201, 252, 278].into_iter().enumerate() { let op_id = IdFull::new(2, i as Counter, i as Lamport); let start = 1000 + i as u32; - t.insert(op_id, pos, RichtextChunk::new_text(start..start + 1)); + insert_text(&mut t, &mut materialized, op_id, pos, start..start + 1); } - t.check(); + t.check(&materialized); } #[test] fn test_checkout_in_doc_with_del_span() { - let mut t = Tracker::new(); - t.insert(IdFull::new(1, 0, 0), 0, RichtextChunk::new_text(0..10)); - t.delete(ID::new(2, 0), ID::NONE_ID, 0, 10, false); - t.checkout(&vv!(1 => 10, 2=>4)); + let (mut t, mut materialized) = tracker(); + insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 0, 0..10); + delete_text(&mut t, &mut materialized, ID::new(2, 0), ID::NONE_ID, 0, 10, false); + materialized.checkout_to_version_without_coverage(&mut t, &vv!(1 => 10, 2=>4)); let v: Vec = t.rope.tree().iter().copied().collect(); assert_eq!(v.len(), 2); assert!(!v[0].is_activated()); @@ -1023,8 +1264,9 @@ mod test { let doc_len = CHUNK_LEN * fragments; - let mut t = Tracker::new(); + let (mut t, mut materialized) = tracker(); t.insert( + &mut materialized, IdFull::new(PEER_A, 0, 0), 0, RichtextChunk::new_text(0..doc_len as u32), @@ -1040,14 +1282,14 @@ mod test { let chunk = RichtextChunk::new_text( (doc_len as u32 + i as u32)..(doc_len as u32 + i as u32 + 1), ); - t.insert(op_id, pos, chunk); + t.insert(&mut materialized, op_id, pos, chunk); } let elapsed = start.elapsed(); let before_vv = vv!(PEER_A => doc_len as Counter); let after_vv = vv!(PEER_A => doc_len as Counter, PEER_B => (fragments - 1) as Counter); let diff_start = Instant::now(); - let diff_len = t.diff(&before_vv, &after_vv).count(); + let diff_len = t.diff(&mut materialized, &before_vv, &after_vv).count(); let diff_elapsed = diff_start.elapsed(); assert_eq!(t.rope.tree().iter().count(), 1 + 2 * (fragments - 1)); println!( @@ -1071,7 +1313,8 @@ mod test { let doc_len = CHUNK_LEN * fragments; let mut t = Tracker::new_with_unknown(); - t.checkout(&vv!()); + let mut materialized = TrackerMaterializedVersion::default(); + materialized.checkout_to_version_without_coverage(&mut t, &VersionVector::new()); t.id_to_cursor.diagnose(); let start = Instant::now(); @@ -1081,7 +1324,7 @@ mod test { let chunk = RichtextChunk::new_text( (doc_len as u32 + i as u32)..(doc_len as u32 + i as u32 + 1), ); - t.insert(op_id, pos, chunk); + t.insert(&mut materialized, op_id, pos, chunk); } let elapsed = start.elapsed(); diff --git a/crates/loro-internal/src/diff_calc.rs b/crates/loro-internal/src/diff_calc.rs index bf5e6091f..3eda8a637 100644 --- a/crates/loro-internal/src/diff_calc.rs +++ b/crates/loro-internal/src/diff_calc.rs @@ -28,7 +28,7 @@ use crate::{ richtext::{ richtext_state::{RichtextStateChunk, TextChunk}, AnchorType, CrdtRopeDelta, PeerSpanCoverage, RichtextChunk, RichtextChunkValue, - RichtextTracker, StyleOp, + RichtextTracker, StyleOp, TrackerMaterializedVersion, }, }, cursor::AbsolutePosition, @@ -676,6 +676,9 @@ use rle::{HasLength as _, Sliceable}; #[derive(Default)] pub(crate) struct ListDiffCalculator { start_vv: Box, + // Stable version currently materialized in `tracker`. After each checkout + // diff calculation it must equal the coverage-local projection of `from`. + materialized: TrackerMaterializedVersion, tracker: Box, coverage: Box, } @@ -704,13 +707,24 @@ impl std::fmt::Debug for ListDiffCalculator { impl DiffCalculatorTrait for ListDiffCalculator { fn start_tracking(&mut self, _oplog: &OpLog, vv: &crate::VersionVector, _mode: DiffMode) { - if !vv.includes_vv(self.start_vv.as_ref()) || !self.tracker.all_vv().includes_vv(vv) { + if !version_includes_covered_start(vv, self.start_vv.as_ref(), self.coverage.as_ref()) + || !tracker_has_covered_ops(&self.tracker, vv, self.coverage.as_ref()) + { *self.tracker = RichtextTracker::new_with_unknown(); *self.start_vv = vv.clone(); self.coverage.clear(); + self.materialized + .reset_to_version_projection(vv, self.coverage.as_ref()); } - richtext_tracker_checkout_with_coverage(&mut self.tracker, vv, self.coverage.as_ref()); + richtext_tracker_checkout_with_coverage( + &mut self.tracker, + &mut self.materialized, + vv, + self.coverage.as_ref(), + ); + self.materialized + .debug_assert_matches_version_projection(vv, self.coverage.as_ref()); } fn apply_change( @@ -722,6 +736,7 @@ impl DiffCalculatorTrait for ListDiffCalculator { if let Some(vv) = vv { richtext_tracker_checkout_causal_with_coverage( &mut self.tracker, + &mut self.materialized, vv, self.coverage.as_ref(), ); @@ -731,6 +746,7 @@ impl DiffCalculatorTrait for ListDiffCalculator { crate::op::InnerContent::List(l) => match l { InnerListOp::Insert { slice, pos } => { self.tracker.insert( + &mut self.materialized, op.id_full(), *pos, RichtextChunk::new_text(slice.0.clone()), @@ -738,6 +754,7 @@ impl DiffCalculatorTrait for ListDiffCalculator { } InnerListOp::Delete(del) => { self.tracker.delete( + &mut self.materialized, op.id_start(), del.id_start, del.start() as usize, @@ -763,14 +780,12 @@ impl DiffCalculatorTrait for ListDiffCalculator { mut on_new_container: impl FnMut(&ContainerID), ) -> (InternalDiff, DiffMode) { let mut delta = Delta::new(); - let diff_iter = if self.coverage.is_empty() { - Either::Left(self.tracker.diff(info.from_vv, info.to_vv)) - } else { - Either::Right( - self.tracker - .diff_with_coverage(info.from_vv, info.to_vv, self.coverage.as_ref()), - ) - }; + let diff_iter = self.tracker.diff_with_coverage( + &mut self.materialized, + info.from_vv, + info.to_vv, + self.coverage.as_ref(), + ); for item in diff_iter { match item { CrdtRopeDelta::Retain(len) => { @@ -885,6 +900,8 @@ impl DiffCalculatorTrait for ListDiffCalculator { delta } + self.materialized + .debug_assert_matches_version_projection(info.from_vv, self.coverage.as_ref()); (InternalDiff::ListRaw(delta), DiffMode::Checkout) } } @@ -899,6 +916,7 @@ pub(crate) struct RichtextDiffCalculator { enum RichtextCalcMode { Crdt { tracker: Box, + materialized: TrackerMaterializedVersion, /// (op, end_pos) styles: Vec<(StyleOp, usize)>, start_vv: VersionVector, @@ -916,6 +934,7 @@ impl RichtextDiffCalculator { container_idx, mode: Box::new(RichtextCalcMode::Crdt { tracker: Box::new(RichtextTracker::new_with_unknown()), + materialized: TrackerMaterializedVersion::default(), styles: Vec::new(), start_vv: VersionVector::new(), coverage: PeerSpanCoverage::default(), @@ -939,57 +958,45 @@ impl RichtextDiffCalculator { #[cfg(feature = "test_utils")] fn richtext_tracker_checkout_with_coverage( tracker: &mut RichtextTracker, + materialized: &mut TrackerMaterializedVersion, vv: &VersionVector, coverage: &PeerSpanCoverage, ) { let start = std::time::Instant::now(); - if coverage.is_empty() { - tracker.checkout(vv); - } else { - tracker.checkout_with_coverage(vv, coverage); - } + materialized.checkout_to_version(tracker, vv, coverage); profiling::record_richtext_tracker_checkout(start.elapsed()); } #[cfg(feature = "test_utils")] fn richtext_tracker_checkout_causal_with_coverage( tracker: &mut RichtextTracker, + materialized: &mut TrackerMaterializedVersion, vv: CausalVersion<'_>, coverage: &PeerSpanCoverage, ) { let start = std::time::Instant::now(); - if coverage.is_empty() { - tracker.checkout_causal(vv); - } else { - tracker.checkout_causal_with_coverage(vv, coverage); - } + materialized.checkout_to_causal(tracker, vv, coverage); profiling::record_richtext_tracker_checkout(start.elapsed()); } #[cfg(not(feature = "test_utils"))] fn richtext_tracker_checkout_with_coverage( tracker: &mut RichtextTracker, + materialized: &mut TrackerMaterializedVersion, vv: &VersionVector, coverage: &PeerSpanCoverage, ) { - if coverage.is_empty() { - tracker.checkout(vv); - } else { - tracker.checkout_with_coverage(vv, coverage); - } + materialized.checkout_to_version(tracker, vv, coverage); } #[cfg(not(feature = "test_utils"))] fn richtext_tracker_checkout_causal_with_coverage( tracker: &mut RichtextTracker, + materialized: &mut TrackerMaterializedVersion, vv: CausalVersion<'_>, coverage: &PeerSpanCoverage, ) { - if coverage.is_empty() { - tracker.checkout_causal(vv); - } else { - tracker.checkout_causal_with_coverage(vv, coverage); - } + materialized.checkout_to_causal(tracker, vv, coverage); } fn seed_coverage_from_state_chunks(coverage: &mut PeerSpanCoverage, chunks: &[RichtextStateChunk]) { @@ -1038,6 +1045,44 @@ fn record_coverage_span(coverage: &mut PeerSpanCoverage, span: IdSpan) { .or_insert_with(|| CounterSpan::new(start, end)); } +fn version_includes_covered_start( + target: &VersionVector, + start: &VersionVector, + coverage: &PeerSpanCoverage, +) -> bool { + // `start_vv` may include unrelated ops from the same peers. Only peers that + // have container-local coverage are relevant for deciding whether the + // reusable tracker can be moved from its stable lower bound. + coverage.keys().all(|peer| { + let start_end = start.get(peer).copied().unwrap_or(0); + let target_end = target.get(peer).copied().unwrap_or(0); + target_end >= start_end + }) +} + +fn tracker_has_covered_ops( + tracker: &RichtextTracker, + target: &VersionVector, + coverage: &PeerSpanCoverage, +) -> bool { + if coverage.is_empty() { + // No coverage means the tracker has no container-local proof for ops + // included by a non-empty start version. Reset so `start_vv` records + // this lower bound. Later checkouts before that bound rebuild from unknown. + return target.is_empty(); + } + + coverage.iter().all(|(&peer, span)| { + let target_end = target.get(&peer).copied().unwrap_or(0); + let required_end = target_end.min(span.norm_end()); + required_end <= span.min() + || tracker + .all_vv() + .get(&peer) + .is_some_and(|&applied_end| applied_end >= required_end) + }) +} + impl DiffCalculatorTrait for RichtextDiffCalculator { fn start_tracking( &mut self, @@ -1062,6 +1107,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { match &mut *self.mode { RichtextCalcMode::Crdt { tracker, + materialized, styles, start_vv, coverage, @@ -1079,20 +1125,31 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { *styles = seeded_styles; *start_vv = vv.clone(); seed_coverage_from_state_chunks(coverage, &chunks); - richtext_tracker_checkout_with_coverage(tracker, vv, coverage); + materialized.reset_to_version_projection(vv, coverage); + richtext_tracker_checkout_with_coverage( + tracker, + materialized, + vv, + coverage, + ); + materialized.debug_assert_matches_version_projection(vv, coverage); return; } } } - if !vv.includes_vv(start_vv) || !tracker.all_vv().includes_vv(vv) { + if !version_includes_covered_start(vv, start_vv, coverage) + || !tracker_has_covered_ops(tracker, vv, coverage) + { **tracker = RichtextTracker::new_with_unknown(); styles.clear(); *start_vv = vv.clone(); coverage.clear(); + materialized.reset_to_version_projection(vv, coverage); } - richtext_tracker_checkout_with_coverage(tracker, vv, coverage); + richtext_tracker_checkout_with_coverage(tracker, materialized, vv, coverage); + materialized.debug_assert_matches_version_projection(vv, coverage); } RichtextCalcMode::Linear { .. } => {} } @@ -1201,12 +1258,18 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { }, RichtextCalcMode::Crdt { tracker, + materialized, styles, start_vv: _, coverage, } => { if let Some(vv) = vv { - richtext_tracker_checkout_causal_with_coverage(tracker, vv, coverage); + richtext_tracker_checkout_causal_with_coverage( + tracker, + materialized, + vv, + coverage, + ); } match &op.raw_op().content { crate::op::InnerContent::List(l) => match l { @@ -1222,6 +1285,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { pos, } => { tracker.insert( + materialized, op.id_full(), *pos as usize, RichtextChunk::new_text(*unicode_start..*unicode_start + *len), @@ -1229,6 +1293,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { } InnerListOp::Delete(del) => { tracker.delete( + materialized, op.id_start(), del.id_start, del.start() as usize, @@ -1257,6 +1322,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { *end as usize, )); tracker.insert( + materialized, op.id_full(), *start as usize, RichtextChunk::new_style_anchor(style_id as u32, AnchorType::Start), @@ -1270,6 +1336,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { let style_id = styles.len() - pos - 1; let (_start_op, end_pos) = &styles[style_id]; tracker.insert( + materialized, op.id_full(), // need to shift 1 because we insert the start style anchor before this pos *end_pos + 1, @@ -1310,6 +1377,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { )); let style_id = styles.len() - 1; tracker.insert( + materialized, op.id_full(), // need to shift 1 because we insert the start style anchor before this pos *end as usize + 1, @@ -1342,6 +1410,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { ), RichtextCalcMode::Crdt { tracker, + materialized, styles, coverage, .. @@ -1349,11 +1418,8 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { let mut delta = DeltaRope::new(); #[cfg(feature = "test_utils")] let tracker_diff_start = std::time::Instant::now(); - let diff_iter = if coverage.is_empty() { - Either::Left(tracker.diff(info.from_vv, info.to_vv)) - } else { - Either::Right(tracker.diff_with_coverage(info.from_vv, info.to_vv, coverage)) - }; + let diff_iter = + tracker.diff_with_coverage(materialized, info.from_vv, info.to_vv, coverage); #[cfg(feature = "test_utils")] profiling::record_richtext_tracker_diff(tracker_diff_start.elapsed()); #[cfg(feature = "test_utils")] @@ -1450,6 +1516,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { #[cfg(feature = "test_utils")] profiling::record_richtext_delta_build(delta_build_start.elapsed()); + materialized.debug_assert_matches_version_projection(info.from_vv, coverage); (InternalDiff::RichtextRaw(delta), DiffMode::Checkout) } } @@ -1484,19 +1551,26 @@ struct MovableListInner { impl DiffCalculatorTrait for MovableListDiffCalculator { fn start_tracking(&mut self, _oplog: &OpLog, vv: &crate::VersionVector, mode: DiffMode) { - if !vv.includes_vv(self.list.start_vv.as_ref()) - || !self.list.tracker.all_vv().includes_vv(vv) + if !version_includes_covered_start(vv, self.list.start_vv.as_ref(), &self.list.coverage) + || !tracker_has_covered_ops(&self.list.tracker, vv, &self.list.coverage) { *self.list.tracker = RichtextTracker::new_with_unknown(); *self.list.start_vv = vv.clone(); self.list.coverage.clear(); + self.list + .materialized + .reset_to_version_projection(vv, &self.list.coverage); } richtext_tracker_checkout_with_coverage( &mut self.list.tracker, + &mut self.list.materialized, vv, self.list.coverage.as_ref(), ); + self.list + .materialized + .debug_assert_matches_version_projection(vv, self.list.coverage.as_ref()); self.inner.current_mode = mode; } @@ -1592,6 +1666,7 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { if let Some(vv) = vv { richtext_tracker_checkout_causal_with_coverage( &mut this.tracker, + &mut this.materialized, vv, this.coverage.as_ref(), ); @@ -1604,6 +1679,7 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { InnerListOp::Insert { slice, pos } => { updates_tracker = true; this.tracker.insert( + &mut this.materialized, op.id_full(), *pos, RichtextChunk::new_text(slice.0.clone()), @@ -1612,6 +1688,7 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { InnerListOp::Delete(del) => { updates_tracker = true; this.tracker.delete( + &mut this.materialized, op.id_start(), del.id_start, del.start() as usize, @@ -1622,14 +1699,14 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { InnerListOp::Move { from, elem_id, to } => { updates_tracker = true; self.inner.move_id_to_elem_id.insert(op.id(), *elem_id); - if !this.tracker.current_vv().includes_id(op.id()) { + if !this.materialized.includes_id(op.id()) { let last_pos = if is_checkout { // TODO: PERF: this lookup can be optimized oplog.with_history_cache(|h| { let list = &h.get_checkout_index().movable_list; list.last_pos( *elem_id, - this.tracker.current_vv(), + this.materialized.as_vv(), // TODO: PERF: Provide the lamport of to version Lamport::MAX, oplog, @@ -1652,6 +1729,7 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { FAKE_ID }; this.tracker.move_item( + &mut this.materialized, op.id_full(), last_pos, *from as usize, diff --git a/crates/loro-internal/src/version.rs b/crates/loro-internal/src/version.rs index 7d4f4b6e7..936a39421 100644 --- a/crates/loro-internal/src/version.rs +++ b/crates/loro-internal/src/version.rs @@ -183,16 +183,19 @@ impl<'a> CausalVersion<'a> { } #[inline] + #[allow(dead_code)] pub(crate) fn base(&self) -> &'a ImVersionVector { self.base } #[inline] + #[allow(dead_code)] pub(crate) fn peer(&self) -> PeerID { self.peer } #[inline] + #[allow(dead_code)] pub(crate) fn peer_end(&self) -> Counter { self.peer_end } From 091e3c9e7c4284bd24b6bf4330c9841ba6e2f095 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 22 May 2026 15:23:04 +0000 Subject: [PATCH 44/46] fix: guard richtext tracker reuse --- .../src/container/richtext/tracker.rs | 42 +++++++++----- crates/loro-internal/src/diff_calc.rs | 56 ++++++++++--------- 2 files changed, 56 insertions(+), 42 deletions(-) diff --git a/crates/loro-internal/src/container/richtext/tracker.rs b/crates/loro-internal/src/container/richtext/tracker.rs index a419f0b34..c05c82bd6 100644 --- a/crates/loro-internal/src/container/richtext/tracker.rs +++ b/crates/loro-internal/src/container/richtext/tracker.rs @@ -145,11 +145,7 @@ impl TrackerMaterializedVersion { } #[cfg(test)] - fn checkout_peer_spans_without_coverage( - &mut self, - tracker: &mut Tracker, - spans: &[IdSpan], - ) { + fn checkout_peer_spans_without_coverage(&mut self, tracker: &mut Tracker, spans: &[IdSpan]) { self.checkout_peer_spans(tracker, spans, None); } @@ -405,16 +401,19 @@ impl Tracker { // &content // ); // tracing::span!(tracing::Level::INFO, "TrackerInsert"); - if let ControlFlow::Break(_) = - self.skip_applied(materialized, op_id.id(), content.len(), |applied_counter_end| { + if let ControlFlow::Break(_) = self.skip_applied( + materialized, + op_id.id(), + content.len(), + |applied_counter_end| { // the op is partially included, need to slice the content let start = (applied_counter_end - op_id.counter) as usize; op_id.lamport += (applied_counter_end - op_id.counter) as Lamport; op_id.counter = applied_counter_end; pos += start; content = content.slice(start..); - }) - { + }, + ) { return; } @@ -1083,10 +1082,7 @@ mod test { assert_eq!(t.rope.len(), 4); let base = ImVersionVector::new(); - materialized.checkout_to_causal_without_coverage( - &mut t, - CausalVersion::new(&base, 1, 2), - ); + materialized.checkout_to_causal_without_coverage(&mut t, CausalVersion::new(&base, 1, 2)); assert_eq!(t.rope.len(), 2); assert_eq!(materialized.as_vv(), &vv!(1 => 2)); @@ -1210,7 +1206,15 @@ mod test { fn test_retreat_and_forward_delete() { let (mut t, mut materialized) = tracker(); insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 0, 0..10); - delete_text(&mut t, &mut materialized, ID::new(2, 0), ID::NONE_ID, 0, 10, true); + delete_text( + &mut t, + &mut materialized, + ID::new(2, 0), + ID::NONE_ID, + 0, + 10, + true, + ); materialized.checkout_to_version_without_coverage(&mut t, &vv!(1 => 10, 2=>5)); assert_eq!(t.rope.len(), 5); materialized.checkout_to_version_without_coverage(&mut t, &vv!(1 => 10, 2=>0)); @@ -1239,7 +1243,15 @@ mod test { fn test_checkout_in_doc_with_del_span() { let (mut t, mut materialized) = tracker(); insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 0, 0..10); - delete_text(&mut t, &mut materialized, ID::new(2, 0), ID::NONE_ID, 0, 10, false); + delete_text( + &mut t, + &mut materialized, + ID::new(2, 0), + ID::NONE_ID, + 0, + 10, + false, + ); materialized.checkout_to_version_without_coverage(&mut t, &vv!(1 => 10, 2=>4)); let v: Vec = t.rope.tree().iter().copied().collect(); assert_eq!(v.len(), 2); diff --git a/crates/loro-internal/src/diff_calc.rs b/crates/loro-internal/src/diff_calc.rs index 3eda8a637..33e71c3b3 100644 --- a/crates/loro-internal/src/diff_calc.rs +++ b/crates/loro-internal/src/diff_calc.rs @@ -150,8 +150,9 @@ pub(crate) mod profiling { input.saturating_sub(filtered) as u64; profile.richtext_tracker_max_span_count = profile.richtext_tracker_max_span_count.max(input); - profile.richtext_tracker_max_filtered_span_count = - profile.richtext_tracker_max_filtered_span_count.max(filtered); + profile.richtext_tracker_max_filtered_span_count = profile + .richtext_tracker_max_filtered_span_count + .max(filtered); } }); } @@ -1048,39 +1049,36 @@ fn record_coverage_span(coverage: &mut PeerSpanCoverage, span: IdSpan) { fn version_includes_covered_start( target: &VersionVector, start: &VersionVector, - coverage: &PeerSpanCoverage, + _coverage: &PeerSpanCoverage, ) -> bool { - // `start_vv` may include unrelated ops from the same peers. Only peers that - // have container-local coverage are relevant for deciding whether the - // reusable tracker can be moved from its stable lower bound. - coverage.keys().all(|peer| { - let start_end = start.get(peer).copied().unwrap_or(0); - let target_end = target.get(peer).copied().unwrap_or(0); - target_end >= start_end - }) + // `start_vv` is the lower bound used when the reusable tracker was last + // rebuilt from unknown. Even though tracker checkout only applies covered + // spans, the tracker sequence can contain positional anchors that are needed + // to replay later covered ops. Reusing it before this full lower bound risks + // preserving anchors from a future version. + target.includes_vv(start) } fn tracker_has_covered_ops( tracker: &RichtextTracker, target: &VersionVector, - coverage: &PeerSpanCoverage, + _coverage: &PeerSpanCoverage, ) -> bool { - if coverage.is_empty() { - // No coverage means the tracker has no container-local proof for ops - // included by a non-empty start version. Reset so `start_vv` records - // this lower bound. Later checkouts before that bound rebuild from unknown. - return target.is_empty(); + // Coverage is only a filter for tracker checkout work; it is not a proof + // that every earlier op affecting positions has been materialized. Reusing + // a tracker across a causal gap can make later positional ops, such as text + // deletes or movable-list moves, apply against the wrong local sequence. + tracker.all_vv().includes_vv(target) +} + +fn materialize_causal_version(vv: CausalVersion<'_>) -> VersionVector { + let mut version = VersionVector::from_im_vv(vv.base()); + let peer_end = vv.peer_end(); + if peer_end > version.get(&vv.peer()).copied().unwrap_or(0) { + version.insert(vv.peer(), peer_end); } - coverage.iter().all(|(&peer, span)| { - let target_end = target.get(&peer).copied().unwrap_or(0); - let required_end = target_end.min(span.norm_end()); - required_end <= span.min() - || tracker - .all_vv() - .get(&peer) - .is_some_and(|&applied_end| applied_end >= required_end) - }) + version } impl DiffCalculatorTrait for RichtextDiffCalculator { @@ -1663,6 +1661,7 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { { // Apply change on the list items let this = &mut self.list; + let causal_lookup_vv = vv.map(materialize_causal_version); if let Some(vv) = vv { richtext_tracker_checkout_causal_with_coverage( &mut this.tracker, @@ -1704,9 +1703,12 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { // TODO: PERF: this lookup can be optimized oplog.with_history_cache(|h| { let list = &h.get_checkout_index().movable_list; + let lookup_vv = causal_lookup_vv + .as_ref() + .unwrap_or(this.materialized.as_vv()); list.last_pos( *elem_id, - this.materialized.as_vv(), + lookup_vv, // TODO: PERF: Provide the lamport of to version Lamport::MAX, oplog, From 3c2b53e544a2a0caa8d5b57212cf6d0afa61c93b Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Fri, 22 May 2026 15:30:05 +0000 Subject: [PATCH 45/46] test: skip shallow peers in gc fuzzer sync --- crates/fuzz/src/crdt_fuzzer.rs | 9 +++++++++ crates/fuzz/tests/test.rs | 29 +++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/crates/fuzz/src/crdt_fuzzer.rs b/crates/fuzz/src/crdt_fuzzer.rs index 269043e18..6a7c756e8 100644 --- a/crates/fuzz/src/crdt_fuzzer.rs +++ b/crates/fuzz/src/crdt_fuzzer.rs @@ -731,6 +731,15 @@ pub fn test_multi_sites_with_gc( let (a, b) = array_mut_ref!(&mut this.actors, [i, j]); let a_doc = &mut a.loro; let b_doc = &mut b.loro; + let a_shallow = a_doc.is_shallow(); + let b_shallow = b_doc.is_shallow(); + // Shallow docs cannot export ops before the shallow root, so + // they cannot sync complete history to empty peers. This mirrors + // the non-GC `check_equal` guard. + if a_shallow || b_shallow { + continue; + } + info_span!("Attach", peer = i).in_scope(|| { a_doc.attach(); }); diff --git a/crates/fuzz/tests/test.rs b/crates/fuzz/tests/test.rs index 037a843ed..45713d0be 100644 --- a/crates/fuzz/tests/test.rs +++ b/crates/fuzz/tests/test.rs @@ -10172,6 +10172,35 @@ fn shallow_arb_test() { arbtest::builder().budget_ms(1000).run(|u| prop(u, 5)) } +#[test] +fn shallow_import_after_empty_shallow_export_and_text_edit_converges() { + test_multi_sites_with_gc( + 5, + vec![FuzzTarget::All], + &mut [ + Commit { site: 54 }, + ExportShallow { site: 160 }, + Handle { + site: 85, + target: 222, + container: 228, + action: Generic(GenericAction { + value: Container(Map), + bool: false, + key: 441515985, + pos: 11802333225252155855, + length: 9719789893245689708, + prop: 1572726038975133702, + }), + }, + ImportShallow { + site: 21, + from: 240, + }, + ], + ) +} + #[test] fn shallow_fuzz_snapshot_after_shallow_import_and_diff_apply() { test_multi_sites_with_gc( From 079709f36c20818e7c2ee186587a9ed96960e40a Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Tue, 26 May 2026 01:58:07 +0000 Subject: [PATCH 46/46] fix: harden checkout replay invariants --- crates/loro-internal/src/diff_calc.rs | 34 ++++++++++++++++++++--- crates/loro-internal/src/version.rs | 7 +++-- crates/loro/tests/mov.rs | 39 +++++++++++++++++++++++++++ crates/loro/tests/panic_test.rs | 37 +++++++++++++++++++++++++ 4 files changed, 112 insertions(+), 5 deletions(-) diff --git a/crates/loro-internal/src/diff_calc.rs b/crates/loro-internal/src/diff_calc.rs index 33e71c3b3..dbc7fbe5e 100644 --- a/crates/loro-internal/src/diff_calc.rs +++ b/crates/loro-internal/src/diff_calc.rs @@ -348,7 +348,14 @@ impl DiffCalculator { calculator.start_tracking(oplog, &lca, diff_mode); } - if visited.contains(&op.container) { + // Movable-list move replay needs the before-op causal VV for + // history-cache last_pos lookups. The tracker's materialized + // version is only a container projection and may not include + // causal deps that inserted the moved element, so it cannot + // replace CausalVersion here. + let should_reuse_container_checkout = visited.contains(&op.container) + && !matches!(calculator, ContainerDiffCalculator::MovableList(_)); + if should_reuse_container_checkout { // don't checkout if we have already checked out this container in this round calculator.apply_change(oplog, RichOp::new_by_change(&change, op), None); } else { @@ -1706,6 +1713,12 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { let lookup_vv = causal_lookup_vv .as_ref() .unwrap_or(this.materialized.as_vv()); + // Invariant: a valid move's elem_id must have a previous + // position in its before-op causal VV. The original insert + // causally precedes the move in full history; shallow docs + // seed visible root elements into the checkout history cache. + // Missing last_pos means either the op log or the replay VV is + // inconsistent, so failing fast is better than corrupting state. list.last_pos( *elem_id, lookup_vv, @@ -1713,7 +1726,14 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { Lamport::MAX, oplog, ) - .unwrap() + .unwrap_or_else(|| { + panic!( + "missing previous movable-list position: move_op={:?}, elem_id={:?}, lookup_vv={:?}", + op.id(), + elem_id, + lookup_vv + ) + }) .id() }) } else { @@ -1843,9 +1863,17 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { return false; }; // TODO: PERF: Provide the lamport of to version + // Invariant: if an element has a position at to_vv, it must also + // have a value at to_vv. Full history falls back to the original + // insert value; shallow roots record position and value together. let value = checkout_index .last_value(id, info.to_vv, Lamport::MAX, oplog) - .unwrap(); + .unwrap_or_else(|| { + panic!( + "missing movable-list value for positioned element: elem_id={:?}, to_vv={:?}", + id, info.to_vv + ) + }); // TODO: PERF: Provide the lamport of to version let old_pos = checkout_index.last_pos(id, info.from_vv, Lamport::MAX, oplog); // TODO: PERF: Provide the lamport of to version diff --git a/crates/loro-internal/src/version.rs b/crates/loro-internal/src/version.rs index 89002d149..331895ce3 100644 --- a/crates/loro-internal/src/version.rs +++ b/crates/loro-internal/src/version.rs @@ -166,8 +166,11 @@ pub struct ImVersionVector(im::HashMap { base: &'a ImVersionVector, diff --git a/crates/loro/tests/mov.rs b/crates/loro/tests/mov.rs index 4a1655966..f7b9140c2 100644 --- a/crates/loro/tests/mov.rs +++ b/crates/loro/tests/mov.rs @@ -59,3 +59,42 @@ fn conflict_moves() -> Result<(), LoroError> { Ok(()) } + +#[test] +fn checkout_movable_list_multi_op_change_after_snapshot() -> Result<(), LoroError> { + let doc = LoroDoc::new(); + doc.set_peer_id(1)?; + let list = doc.get_movable_list("list"); + list.insert(0, 0)?; + list.insert(1, 1)?; + list.insert(2, 2)?; + doc.commit(); + let base = doc.state_frontiers(); + + doc.set_peer_id(2)?; + list.insert(0, 9)?; + list.mov(1, 3)?; + doc.commit(); + let latest = doc.state_frontiers(); + assert_eq!( + doc.get_deep_value().to_json_value(), + json!({ + "list": [9, 1, 2, 0] + }) + ); + + let restored = LoroDoc::new(); + restored.import(&doc.export(ExportMode::Snapshot)?)?; + restored.checkout(&base)?; + assert_eq!( + restored.get_deep_value().to_json_value(), + json!({ + "list": [0, 1, 2] + }) + ); + + restored.checkout(&latest)?; + assert_eq!(restored.get_deep_value(), doc.get_deep_value()); + + Ok(()) +} diff --git a/crates/loro/tests/panic_test.rs b/crates/loro/tests/panic_test.rs index d9e70b296..b8bd98d27 100644 --- a/crates/loro/tests/panic_test.rs +++ b/crates/loro/tests/panic_test.rs @@ -369,6 +369,43 @@ fn import_json_updates_with_text_mark_end_without_mark_should_error_without_pani assert_eq!(dst.get_deep_value(), before_value); } +#[test] +#[parallel] +fn import_json_updates_with_text_mark_end_counter_gap_should_error_without_panic() { + let src = LoroDoc::new(); + src.set_peer_id(35).unwrap(); + let text = src.get_text("text"); + text.insert(0, "abc").unwrap(); + src.commit(); + let first = src.export_json_updates(&Default::default(), &src.oplog_vv()); + let first_vv = src.oplog_vv(); + + text.mark(0..2, "bold", true).unwrap(); + src.commit(); + let mut suffix = src.export_json_updates(&first_vv, &src.oplog_vv()); + suffix.changes[0].ops[1].counter += 1; + + let dst = ManuallyDrop::new(LoroDoc::new()); + dst.import_json_updates(first).unwrap(); + let before_vv = dst.oplog_vv(); + let before_frontiers = dst.oplog_frontiers(); + let before_value = dst.get_deep_value(); + + let result = std::panic::catch_unwind(AssertUnwindSafe(|| dst.import_json_updates(suffix))); + assert!( + result.is_ok(), + "malformed text MarkEnd counter JSON import should not panic" + ); + assert!( + result.unwrap().is_err(), + "malformed text MarkEnd counter JSON import unexpectedly succeeded; imported value = {:?}", + dst.get_deep_value() + ); + assert_eq!(dst.oplog_vv(), before_vv); + assert_eq!(dst.oplog_frontiers(), before_frontiers); + assert_eq!(dst.get_deep_value(), before_value); +} + // --------------------------------------------------------------------------- // 9. Detached tree methods that used to panic — FIXED // ---------------------------------------------------------------------------