diff --git a/Cargo.lock b/Cargo.lock index 6cabc2bcb..f3af8e2da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1873,7 +1873,7 @@ checksum = "3f3d053a135388e6b1df14e8af1212af5064746e9b87a06a345a7a779ee9695a" [[package]] name = "loro-wasm" -version = "1.12.1" +version = "1.12.2" dependencies = [ "console_error_panic_hook", "js-sys", diff --git a/crates/fuzz/src/crdt_fuzzer.rs b/crates/fuzz/src/crdt_fuzzer.rs index 269043e18..6a7c756e8 100644 --- a/crates/fuzz/src/crdt_fuzzer.rs +++ b/crates/fuzz/src/crdt_fuzzer.rs @@ -731,6 +731,15 @@ pub fn test_multi_sites_with_gc( let (a, b) = array_mut_ref!(&mut this.actors, [i, j]); let a_doc = &mut a.loro; let b_doc = &mut b.loro; + let a_shallow = a_doc.is_shallow(); + let b_shallow = b_doc.is_shallow(); + // Shallow docs cannot export ops before the shallow root, so + // they cannot sync complete history to empty peers. This mirrors + // the non-GC `check_equal` guard. + if a_shallow || b_shallow { + continue; + } + info_span!("Attach", peer = i).in_scope(|| { a_doc.attach(); }); diff --git a/crates/fuzz/src/one_doc_fuzzer.rs b/crates/fuzz/src/one_doc_fuzzer.rs index 7872ccb7d..6dc72dcc4 100644 --- a/crates/fuzz/src/one_doc_fuzzer.rs +++ b/crates/fuzz/src/one_doc_fuzzer.rs @@ -551,7 +551,11 @@ impl OneDocFuzzer { peer: before.0, counter: before.1, }; - tree.mov_before(target, before).unwrap(); + if let Err(LoroError::TreeError(e)) = + tree.mov_before(target, before) + { + tracing::warn!("move error {}", e); + } } crate::container::TreeActionInner::MoveAfter { target, after } => { let target = TreeID { @@ -562,7 +566,11 @@ impl OneDocFuzzer { peer: after.0, counter: after.1, }; - tree.mov_after(target, after).unwrap(); + if let Err(LoroError::TreeError(e)) = + tree.mov_after(target, after) + { + tracing::warn!("move error {}", e); + } } crate::container::TreeActionInner::Meta { meta: (k, v) } => { let meta = tree.get_meta(target).unwrap(); diff --git a/crates/fuzz/tests/test.rs b/crates/fuzz/tests/test.rs index 39875b144..660f610bb 100644 --- a/crates/fuzz/tests/test.rs +++ b/crates/fuzz/tests/test.rs @@ -172,6 +172,163 @@ fn all_fuzz_state_only_before_shallow_root() { ) } +#[test] +fn all_fuzz_state_only_roundtrip_after_diff_apply_text_update() { + test_multi_sites( + 5, + vec![FuzzTarget::All], + &mut [ + Handle { + site: 196, + target: 0, + container: 151, + action: Generic(GenericAction { + value: Container(List), + bool: true, + key: 1835887981, + pos: 15359179523395251565, + length: 6845301837235606980, + prop: 4959913191460359423, + }), + }, + SyncAll, + SyncAll, + SetCommitOptions { + site: 255, + origin: 255, + msg: 93, + }, + Handle { + site: 0, + target: 0, + container: 0, + action: Generic(GenericAction { + value: Container(Unknown(255)), + bool: true, + key: 4294967295, + pos: 3225938275189391359, + length: 7885078839350357357, + prop: 3617008642897571181, + }), + }, + DiffApply { from: 125, to: 178 }, + SetCommitOptions { + site: 242, + origin: 242, + msg: 242, + }, + DiffApply { from: 255, to: 255 }, + SetCommitOptions { + site: 109, + origin: 109, + msg: 109, + }, + SyncAll, + ForkAt { + site: 109, + to: 1835887981, + }, + SyncAll, + SyncAll, + SyncAll, + SyncAll, + SyncAll, + SyncAll, + SyncAll, + SyncAll, + Sync { from: 91, to: 91 }, + Sync { from: 91, to: 91 }, + SyncAll, + StateOnlyRoundTrip { site: 213 }, + SyncAll, + StateOnlyRoundTrip { site: 255 }, + ], + ) +} + +#[test] +fn all_fuzz_one_doc_ignores_cyclic_move_before_error() { + test_multi_sites_on_one_doc( + 5, + &mut [ + Handle { + site: 11, + target: 148, + container: 148, + action: Generic(GenericAction { + value: I32(0), + bool: false, + key: 65296, + pos: 18446744073709551600, + length: 18446744073709551615, + prop: 11240984665823117311, + }), + }, + Query { + site: 125, + target: 125, + query_type: 119, + }, + Handle { + site: 0, + target: 125, + container: 125, + action: Generic(GenericAction { + value: I32(2105376125), + bool: true, + key: 2105376125, + pos: 9042521604759584125, + length: 9042521604759584125, + prop: 9042521604759584125, + }), + }, + ForkAt { + site: 125, + to: 2105376125, + }, + Query { + site: 155, + target: 155, + query_type: 155, + }, + SyncAll, + SyncAll, + SyncAll, + SyncAll, + SyncAll, + SyncAll, + SyncAll, + Handle { + site: 3, + target: 136, + container: 107, + action: Generic(GenericAction { + value: I32(1802075016), + bool: true, + key: 1802201963, + pos: 7740429931049413483, + length: 285424485, + prop: 29704420010754048, + }), + }, + SyncAll, + Handle { + site: 3, + target: 136, + container: 107, + action: Generic(GenericAction { + value: I32(1802070920), + bool: true, + key: 1929407339, + pos: 3026537059180544374, + length: 232891317779694337, + prop: 3124043742624574344, + }), + }, + ], + ) +} + #[test] fn test_local_events() { fuzz_local_events(vec![ @@ -10051,6 +10208,35 @@ fn shallow_arb_test() { arbtest::builder().budget_ms(1000).run(|u| prop(u, 5)) } +#[test] +fn shallow_import_after_empty_shallow_export_and_text_edit_converges() { + test_multi_sites_with_gc( + 5, + vec![FuzzTarget::All], + &mut [ + Commit { site: 54 }, + ExportShallow { site: 160 }, + Handle { + site: 85, + target: 222, + container: 228, + action: Generic(GenericAction { + value: Container(Map), + bool: false, + key: 441515985, + pos: 11802333225252155855, + length: 9719789893245689708, + prop: 1572726038975133702, + }), + }, + ImportShallow { + site: 21, + from: 240, + }, + ], + ) +} + #[test] fn shallow_fuzz_snapshot_after_shallow_import_and_diff_apply() { test_multi_sites_with_gc( diff --git a/crates/loro-common/src/lib.rs b/crates/loro-common/src/lib.rs index 81f10ed12..4c6f0002c 100644 --- a/crates/loro-common/src/lib.rs +++ b/crates/loro-common/src/lib.rs @@ -366,6 +366,26 @@ impl ContainerType { } } + /// Returns whether importing ops for this container type may need import + /// rollback protection during state diff application. + /// + /// This is used by import preflight: if an imported or newly-unblocked + /// pending change touches one of these container types, the oplog enables + /// rollback bookkeeping before applying the change to the document state. + /// Keep this list aligned with container states whose diff validation or + /// application can return an error after the oplog has already advanced. + /// + /// Container types not listed here may still be complex, but their current + /// state-apply path does not report recoverable errors through + /// `LoroResult`, so enabling rollback for them would only add import + /// overhead. + pub fn may_need_state_apply_rollback(&self) -> bool { + matches!( + self, + ContainerType::List | ContainerType::Text | ContainerType::Tree + ) + } + pub fn to_u8(self) -> u8 { match self { ContainerType::Map => 0, diff --git a/crates/loro-internal/Cargo.toml b/crates/loro-internal/Cargo.toml index da75ce0b0..5aeab2238 100644 --- a/crates/loro-internal/Cargo.toml +++ b/crates/loro-internal/Cargo.toml @@ -101,6 +101,10 @@ jsonpath = [] name = "text_r" harness = false +[[bench]] +name = "text_checkout" +harness = false + [[bench]] name = "list" harness = false diff --git a/crates/loro-internal/benches/text_checkout.rs b/crates/loro-internal/benches/text_checkout.rs new file mode 100644 index 000000000..10e97d6b4 --- /dev/null +++ b/crates/loro-internal/benches/text_checkout.rs @@ -0,0 +1,854 @@ +use criterion::{criterion_group, criterion_main, Criterion}; + +#[cfg(feature = "test_utils")] +mod text_checkout { + use std::{hint::black_box, sync::Arc, time::Duration}; + + use criterion::{measurement::WallTime, BenchmarkGroup, BenchmarkId, Criterion}; + use loro_internal::{ + cursor::PosType, + id::PeerID, + loro::{CheckoutProfile, ExportMode, TextStateProfile}, + version::Frontiers, + LoroDoc, Subscription, + }; + use rand::{rngs::StdRng, Rng, SeedableRng}; + + const TEXT: &str = "fn checkout_profile() { let value = document.version(); }\n"; + + #[derive(Debug, Clone, Copy)] + struct FixtureStats { + scenario: &'static str, + peer_count: usize, + change_count: usize, + base_len: usize, + text_container_count: usize, + large_text_container_count: usize, + large_text_len: usize, + version_count: usize, + subscribed: bool, + } + + struct CheckoutFixture { + doc: LoroDoc, + frontiers: Vec, + stats: FixtureStats, + _subscription: Option, + } + + #[derive(Debug, Default)] + struct ProfileTotals { + samples: u64, + total: Duration, + frontier_prepare: Duration, + frontiers_to_vv: Duration, + diff_calc: Duration, + state_apply: Duration, + emit_events: Duration, + richtext_tracker_checkout: Duration, + richtext_tracker_diff: Duration, + richtext_delta_build: Duration, + richtext_insert_future_scan: Duration, + causal_vv_materialize: Duration, + max_frontiers_width: usize, + max_vv_width: usize, + max_causal_vv_width: usize, + max_diff_container_count: usize, + diff_container_count_sum: u64, + richtext_tracker_checkout_count: u64, + richtext_tracker_diff_count: u64, + richtext_delta_build_count: u64, + richtext_insert_future_scan_count: u64, + richtext_insert_future_scan_visited: u64, + richtext_insert_future_scan_max_visited: usize, + causal_vv_materialize_count: u64, + richtext_tracker_span_filter_count: u64, + richtext_tracker_span_count: u64, + richtext_tracker_filtered_span_count: u64, + richtext_tracker_skipped_span_count: u64, + richtext_tracker_max_span_count: usize, + richtext_tracker_max_filtered_span_count: usize, + richtext_id_to_cursor_iter_count: u64, + richtext_id_to_cursor_empty_iter_count: u64, + recording_event_samples: u64, + forward_diff_calculator_samples: u64, + } + + impl ProfileTotals { + fn add(&mut self, profile: CheckoutProfile) { + self.samples += 1; + self.total += profile.total; + self.frontier_prepare += profile.frontier_prepare; + self.frontiers_to_vv += profile.frontiers_to_vv; + self.diff_calc += profile.diff_calc; + self.state_apply += profile.state_apply; + self.emit_events += profile.emit_events; + self.richtext_tracker_checkout += profile.richtext_tracker_checkout; + self.richtext_tracker_diff += profile.richtext_tracker_diff; + self.richtext_delta_build += profile.richtext_delta_build; + self.richtext_insert_future_scan += profile.richtext_insert_future_scan; + self.causal_vv_materialize += profile.causal_vv_materialize; + self.max_frontiers_width = self + .max_frontiers_width + .max(profile.from_frontiers_len) + .max(profile.to_frontiers_len); + self.max_vv_width = self + .max_vv_width + .max(profile.from_vv_len) + .max(profile.to_vv_len); + self.max_causal_vv_width = self.max_causal_vv_width.max(profile.max_causal_vv_width); + self.max_diff_container_count = self + .max_diff_container_count + .max(profile.diff_container_count); + self.diff_container_count_sum += profile.diff_container_count as u64; + self.richtext_tracker_checkout_count += profile.richtext_tracker_checkout_count; + self.richtext_tracker_diff_count += profile.richtext_tracker_diff_count; + self.richtext_delta_build_count += profile.richtext_delta_build_count; + self.richtext_insert_future_scan_count += profile.richtext_insert_future_scan_count; + self.richtext_insert_future_scan_visited += profile.richtext_insert_future_scan_visited; + self.richtext_insert_future_scan_max_visited = self + .richtext_insert_future_scan_max_visited + .max(profile.richtext_insert_future_scan_max_visited); + self.causal_vv_materialize_count += profile.causal_vv_materialize_count; + self.richtext_tracker_span_filter_count += profile.richtext_tracker_span_filter_count; + self.richtext_tracker_span_count += profile.richtext_tracker_span_count; + self.richtext_tracker_filtered_span_count += + profile.richtext_tracker_filtered_span_count; + self.richtext_tracker_skipped_span_count += profile.richtext_tracker_skipped_span_count; + self.richtext_tracker_max_span_count = self + .richtext_tracker_max_span_count + .max(profile.richtext_tracker_max_span_count); + self.richtext_tracker_max_filtered_span_count = self + .richtext_tracker_max_filtered_span_count + .max(profile.richtext_tracker_max_filtered_span_count); + self.richtext_id_to_cursor_iter_count += profile.richtext_id_to_cursor_iter_count; + self.richtext_id_to_cursor_empty_iter_count += + profile.richtext_id_to_cursor_empty_iter_count; + if profile.recording_events { + self.recording_event_samples += 1; + } + if profile.forward_diff_calculator { + self.forward_diff_calculator_samples += 1; + } + } + } + + pub fn text_checkout(c: &mut Criterion) { + let peer_count = env_usize("LORO_TEXT_CHECKOUT_PEERS", 1000).max(1); + let base_len = env_usize("LORO_TEXT_CHECKOUT_BASE_LEN", 8192).max(1); + let sequential_changes = env_usize("LORO_TEXT_CHECKOUT_CHANGES", peer_count.max(1000)); + let text_container_count = env_usize("LORO_TEXT_CHECKOUT_TEXT_CONTAINERS", 10_000).max(1); + let large_text_container_count = + env_usize("LORO_TEXT_CHECKOUT_LARGE_TEXT_CONTAINERS", 8).min(text_container_count); + let small_text_len = env_usize("LORO_TEXT_CHECKOUT_SMALL_TEXT_LEN", 8); + let large_text_len = env_usize("LORO_TEXT_CHECKOUT_LARGE_TEXT_LEN", 65_536); + let container_edit_count = + env_usize("LORO_TEXT_CHECKOUT_CONTAINER_EDITS", text_container_count).max(1); + + let mut group = c.benchmark_group("text checkout"); + group.sample_size(10); + + bench_fixture( + &mut group, + "plain/random-peer-checkout", + build_concurrent_plain(peer_count, base_len, false, false), + ); + bench_fixture( + &mut group, + "plain/same-position-peer-checkout", + build_concurrent_plain(peer_count, base_len, true, false), + ); + bench_fixture( + &mut group, + "plain/random-peer-checkout/subscribed", + build_concurrent_plain(peer_count, base_len, false, true), + ); + bench_fixture( + &mut group, + "plain/wide-causal-peer-checkout", + build_wide_causal_plain(peer_count, base_len, false), + ); + bench_fixture( + &mut group, + "rich/overlap-mark-peer-checkout", + build_concurrent_rich_marks(peer_count, base_len, false), + ); + bench_fixture( + &mut group, + "rich/overlap-mark-peer-checkout/subscribed", + build_concurrent_rich_marks(peer_count, base_len, true), + ); + bench_fixture( + &mut group, + "rich/unmark-style-peer-checkout", + build_concurrent_rich_unmarks(peer_count, base_len, false), + ); + bench_fixture( + &mut group, + "code/sequential-one-op-txn", + build_code_like_history(sequential_changes, base_len, 1, false), + ); + bench_fixture( + &mut group, + "code/sequential-eight-op-txn", + build_code_like_history((sequential_changes / 8).max(1), base_len, 8, false), + ); + bench_checkout_to_latest_fixture( + &mut group, + "code/checkout-to-latest-linear", + build_code_like_history(sequential_changes, base_len, 1, false), + ); + bench_checkout_latest_to_base_fixture( + &mut group, + "multi-container/latest-to-base", + build_many_text_container_history( + peer_count, + text_container_count, + large_text_container_count, + small_text_len, + large_text_len, + container_edit_count, + false, + ), + ); + + group.finish(); + } + + fn bench_fixture( + group: &mut BenchmarkGroup<'_, WallTime>, + name: &str, + fixture: CheckoutFixture, + ) { + let CheckoutFixture { + doc, + frontiers, + stats, + _subscription, + } = fixture; + let mut totals = ProfileTotals::default(); + let mut rng = StdRng::seed_from_u64(0x74ea_7c0d); + let mut last_frontier_idx = usize::MAX; + + group.bench_with_input( + BenchmarkId::new(name, stats.version_count), + &frontiers, + |b, frontiers| { + b.iter_custom(|iters| { + let start = std::time::Instant::now(); + for _ in 0..iters { + let mut frontier_idx = rng.gen_range(0..frontiers.len()); + if frontiers.len() > 1 && frontier_idx == last_frontier_idx { + frontier_idx = (frontier_idx + 1) % frontiers.len(); + } + last_frontier_idx = frontier_idx; + let frontier = &frontiers[frontier_idx]; + let profile = doc.checkout_with_profile(frontier).unwrap(); + totals.add(profile); + black_box(profile); + } + + start.elapsed() + }); + }, + ); + + let state_profile = doc.text_state_profile("text"); + maybe_report_profile(name, stats, &totals, state_profile); + } + + fn bench_checkout_latest_to_base_fixture( + group: &mut BenchmarkGroup<'_, WallTime>, + name: &str, + fixture: CheckoutFixture, + ) { + let CheckoutFixture { + doc, + frontiers, + stats, + _subscription, + } = fixture; + let base_frontier = frontiers.first().unwrap().clone(); + let latest_frontier = frontiers.last().unwrap().clone(); + let mut totals = ProfileTotals::default(); + + group.bench_with_input( + BenchmarkId::new(name, stats.version_count), + &base_frontier, + |b, base_frontier| { + b.iter_custom(|iters| { + let mut measured = Duration::ZERO; + for _ in 0..iters { + doc.checkout(&latest_frontier).unwrap(); + let start = std::time::Instant::now(); + let profile = doc.checkout_with_profile(base_frontier).unwrap(); + measured += start.elapsed(); + totals.add(profile); + black_box(profile); + } + + measured + }); + }, + ); + + let state_profile = doc.text_state_profile("text"); + maybe_report_profile(name, stats, &totals, state_profile); + } + + fn bench_checkout_to_latest_fixture( + group: &mut BenchmarkGroup<'_, WallTime>, + name: &str, + fixture: CheckoutFixture, + ) { + let CheckoutFixture { + doc, + frontiers, + stats, + _subscription, + } = fixture; + let old_frontier_idx = if frontiers.len() > 2 { + frontiers.len() / 2 + } else { + 0 + }; + let old_frontier = frontiers[old_frontier_idx].clone(); + let latest_frontier = frontiers.last().unwrap().clone(); + let mut totals = ProfileTotals::default(); + + group.bench_with_input( + BenchmarkId::new(name, stats.version_count), + &latest_frontier, + |b, latest_frontier| { + b.iter_custom(|iters| { + let mut measured = Duration::ZERO; + for _ in 0..iters { + doc.checkout(&old_frontier).unwrap(); + let start = std::time::Instant::now(); + let profile = doc.checkout_with_profile(latest_frontier).unwrap(); + measured += start.elapsed(); + totals.add(profile); + black_box(profile); + } + + measured + }); + }, + ); + + let state_profile = doc.text_state_profile("text"); + maybe_report_profile(name, stats, &totals, state_profile); + } + + fn build_concurrent_plain( + peer_count: usize, + base_len: usize, + same_position: bool, + subscribed: bool, + ) -> CheckoutFixture { + let (snapshot, base_vv) = build_base_snapshot(base_len); + let doc = LoroDoc::new_auto_commit(); + doc.import(&snapshot).unwrap(); + let mut frontiers = Vec::with_capacity(peer_count + 1); + frontiers.push(doc.oplog_frontiers()); + let mut rng = StdRng::seed_from_u64(if same_position { 1 } else { 2 }); + + for peer in 0..peer_count { + let peer_doc = doc_from_snapshot(&snapshot, peer as PeerID + 2); + let text = peer_doc.get_text("text"); + let pos = if same_position { + 0 + } else { + rng.gen_range(0..=base_len) + }; + text.insert(pos, "x", PosType::Unicode).unwrap(); + peer_doc.commit_then_renew(); + let update = peer_doc.export(ExportMode::updates(&base_vv)).unwrap(); + doc.import(&update).unwrap(); + frontiers.push(doc.oplog_frontiers()); + } + + attach_subscription( + doc, + frontiers, + FixtureStats { + scenario: if same_position { + "plain same-position concurrent inserts" + } else { + "plain random concurrent inserts" + }, + peer_count, + change_count: peer_count, + base_len, + text_container_count: 1, + large_text_container_count: 0, + large_text_len: 0, + version_count: peer_count + 1, + subscribed, + }, + subscribed, + ) + } + + fn build_wide_causal_plain( + peer_count: usize, + base_len: usize, + subscribed: bool, + ) -> CheckoutFixture { + let doc = LoroDoc::new_auto_commit(); + doc.set_peer_id(1).unwrap(); + let text = doc.get_text("text"); + let base = repeated_text(base_len); + text.insert(0, &base, PosType::Unicode).unwrap(); + doc.commit_then_renew(); + let mut frontiers = Vec::with_capacity(peer_count + 1); + frontiers.push(doc.oplog_frontiers()); + let mut rng = StdRng::seed_from_u64(6); + + for (peer, len) in (0..peer_count).zip(base_len..) { + let snapshot = doc.export(ExportMode::snapshot()).unwrap(); + let base_vv = doc.oplog_vv(); + let peer_doc = doc_from_snapshot(&snapshot, peer as PeerID + 2); + let text = peer_doc.get_text("text"); + let pos = rng.gen_range(0..=len); + text.insert(pos, "x", PosType::Unicode).unwrap(); + peer_doc.commit_then_renew(); + let update = peer_doc.export(ExportMode::updates(&base_vv)).unwrap(); + doc.import(&update).unwrap(); + frontiers.push(doc.oplog_frontiers()); + } + + attach_subscription( + doc, + frontiers, + FixtureStats { + scenario: "plain sequential multi-peer edits with wide causal VV", + peer_count, + change_count: peer_count, + base_len, + text_container_count: 1, + large_text_container_count: 0, + large_text_len: 0, + version_count: peer_count + 1, + subscribed, + }, + subscribed, + ) + } + + fn build_concurrent_rich_marks( + peer_count: usize, + base_len: usize, + subscribed: bool, + ) -> CheckoutFixture { + let (snapshot, base_vv) = build_base_snapshot(base_len); + let doc = LoroDoc::new_auto_commit(); + doc.import(&snapshot).unwrap(); + let mut frontiers = Vec::with_capacity(peer_count + 1); + frontiers.push(doc.oplog_frontiers()); + let mut rng = StdRng::seed_from_u64(3); + let keys = ["bold", "italic", "comment"]; + + for peer in 0..peer_count { + let peer_doc = doc_from_snapshot(&snapshot, peer as PeerID + 2); + let text = peer_doc.get_text("text"); + let start = rng.gen_range(0..base_len); + let end = (start + rng.gen_range(1..=32)).min(base_len); + text.mark( + start, + end, + keys[peer % keys.len()], + true.into(), + PosType::Unicode, + ) + .unwrap(); + peer_doc.commit_then_renew(); + let update = peer_doc.export(ExportMode::updates(&base_vv)).unwrap(); + doc.import(&update).unwrap(); + frontiers.push(doc.oplog_frontiers()); + } + + attach_subscription( + doc, + frontiers, + FixtureStats { + scenario: "rich text overlapping concurrent marks", + peer_count, + change_count: peer_count, + base_len, + text_container_count: 1, + large_text_container_count: 0, + large_text_len: 0, + version_count: peer_count + 1, + subscribed, + }, + subscribed, + ) + } + + fn build_concurrent_rich_unmarks( + peer_count: usize, + base_len: usize, + subscribed: bool, + ) -> CheckoutFixture { + let (snapshot, base_vv) = build_styled_base_snapshot(base_len); + let doc = LoroDoc::new_auto_commit(); + doc.import(&snapshot).unwrap(); + let mut frontiers = Vec::with_capacity(peer_count + 1); + frontiers.push(doc.oplog_frontiers()); + let mut rng = StdRng::seed_from_u64(5); + + for peer in 0..peer_count { + let peer_doc = doc_from_snapshot(&snapshot, peer as PeerID + 2); + let text = peer_doc.get_text("text"); + let start = rng.gen_range(0..base_len); + let end = (start + rng.gen_range(1..=32)).min(base_len).max(start + 1); + text.unmark(start, end, "bold", PosType::Unicode).unwrap(); + peer_doc.commit_then_renew(); + let update = peer_doc.export(ExportMode::updates(&base_vv)).unwrap(); + doc.import(&update).unwrap(); + frontiers.push(doc.oplog_frontiers()); + } + + attach_subscription( + doc, + frontiers, + FixtureStats { + scenario: "rich text concurrent style deletion", + peer_count, + change_count: peer_count, + base_len, + text_container_count: 1, + large_text_container_count: 0, + large_text_len: 0, + version_count: peer_count + 1, + subscribed, + }, + subscribed, + ) + } + + fn build_code_like_history( + change_count: usize, + base_len: usize, + ops_per_commit: usize, + subscribed: bool, + ) -> CheckoutFixture { + let doc = LoroDoc::new_auto_commit(); + doc.set_peer_id(1).unwrap(); + let text = doc.get_text("text"); + let base = repeated_text(base_len); + text.insert(0, &base, PosType::Unicode).unwrap(); + doc.commit_then_renew(); + let mut frontiers = Vec::with_capacity(change_count + 1); + frontiers.push(doc.oplog_frontiers()); + let mut rng = StdRng::seed_from_u64(4 + ops_per_commit as u64); + let mut len = base_len; + + for change in 0..change_count { + for op in 0..ops_per_commit { + if len > 0 && (change + op) % 5 == 0 { + let pos = rng.gen_range(0..len); + text.delete(pos, 1, PosType::Unicode).unwrap(); + len -= 1; + } else { + let token = if op % 2 == 0 { "\nlet x = 1;" } else { ";" }; + let pos = rng.gen_range(0..=len); + text.insert(pos, token, PosType::Unicode).unwrap(); + len += token.chars().count(); + } + } + doc.commit_then_renew(); + frontiers.push(doc.oplog_frontiers()); + } + + attach_subscription( + doc, + frontiers, + FixtureStats { + scenario: if ops_per_commit == 1 { + "code-like sequential one-op transactions" + } else { + "code-like sequential multi-op transactions" + }, + peer_count: 1, + change_count, + base_len, + text_container_count: 1, + large_text_container_count: 0, + large_text_len: 0, + version_count: change_count + 1, + subscribed, + }, + subscribed, + ) + } + + fn build_many_text_container_history( + peer_count: usize, + text_container_count: usize, + large_text_container_count: usize, + small_text_len: usize, + large_text_len: usize, + edit_count: usize, + subscribed: bool, + ) -> CheckoutFixture { + let doc = LoroDoc::new_auto_commit(); + doc.set_peer_id(1).unwrap(); + let small_text = repeated_text(small_text_len); + let large_text = repeated_text(large_text_len); + let mut texts = Vec::with_capacity(text_container_count); + let mut lens = Vec::with_capacity(text_container_count); + + for idx in 0..text_container_count { + let name = text_container_name(idx); + let text = doc.get_text(name.as_str()); + let initial = if idx < large_text_container_count { + &large_text + } else { + &small_text + }; + if !initial.is_empty() { + text.insert(0, initial, PosType::Unicode).unwrap(); + } + texts.push(text); + lens.push(initial.chars().count()); + } + + doc.commit_then_renew(); + let mut frontiers = Vec::with_capacity(edit_count + 1); + frontiers.push(doc.oplog_frontiers()); + let mut rng = StdRng::seed_from_u64(0x7e57_c001); + + for edit in 0..edit_count { + let peer = edit % peer_count; + doc.set_peer_id(peer as PeerID + 2).unwrap(); + let text_idx = edit % text_container_count; + let pos = rng.gen_range(0..=lens[text_idx]); + texts[text_idx].insert(pos, "x", PosType::Unicode).unwrap(); + lens[text_idx] += 1; + doc.commit_then_renew(); + frontiers.push(doc.oplog_frontiers()); + } + + attach_subscription( + doc, + frontiers, + FixtureStats { + scenario: "many text containers with wide multi-peer checkout", + peer_count, + change_count: edit_count, + base_len: small_text_len, + text_container_count, + large_text_container_count, + large_text_len, + version_count: edit_count + 1, + subscribed, + }, + subscribed, + ) + } + + fn build_base_snapshot(base_len: usize) -> (Vec, loro_internal::VersionVector) { + let doc = LoroDoc::new_auto_commit(); + doc.set_peer_id(1).unwrap(); + let text = doc.get_text("text"); + let base = repeated_text(base_len); + text.insert(0, &base, PosType::Unicode).unwrap(); + doc.commit_then_renew(); + (doc.export(ExportMode::snapshot()).unwrap(), doc.oplog_vv()) + } + + fn build_styled_base_snapshot(base_len: usize) -> (Vec, loro_internal::VersionVector) { + let doc = LoroDoc::new_auto_commit(); + doc.set_peer_id(1).unwrap(); + let text = doc.get_text("text"); + let base = repeated_text(base_len); + text.insert(0, &base, PosType::Unicode).unwrap(); + text.mark(0, base_len, "bold", true.into(), PosType::Unicode) + .unwrap(); + doc.commit_then_renew(); + (doc.export(ExportMode::snapshot()).unwrap(), doc.oplog_vv()) + } + + fn doc_from_snapshot(snapshot: &[u8], peer: PeerID) -> LoroDoc { + let doc = LoroDoc::new_auto_commit(); + doc.import(snapshot).unwrap(); + doc.set_peer_id(peer).unwrap(); + doc + } + + fn attach_subscription( + doc: LoroDoc, + frontiers: Vec, + stats: FixtureStats, + subscribed: bool, + ) -> CheckoutFixture { + let subscription = subscribed.then(|| { + doc.subscribe_root(Arc::new(|event| { + black_box(event); + })) + }); + + CheckoutFixture { + doc, + frontiers, + stats, + _subscription: subscription, + } + } + + fn repeated_text(len: usize) -> String { + let mut out = String::with_capacity(len); + while out.len() < len { + out.push_str(TEXT); + } + out.truncate(len); + out + } + + fn text_container_name(index: usize) -> String { + if index == 0 { + "text".to_string() + } else { + format!("text_{index}") + } + } + + fn env_usize(name: &str, default: usize) -> usize { + std::env::var(name) + .ok() + .and_then(|value| value.parse().ok()) + .unwrap_or(default) + } + + fn maybe_report_profile( + name: &str, + stats: FixtureStats, + totals: &ProfileTotals, + state_profile: Option, + ) { + if std::env::var_os("LORO_TEXT_CHECKOUT_PROFILE").is_none() || totals.samples == 0 { + return; + } + + let samples = totals.samples as u32; + let state_profile = state_profile.unwrap_or_default(); + let avg_future_scan_visited = totals + .richtext_insert_future_scan_visited + .checked_div(totals.richtext_insert_future_scan_count) + .unwrap_or(0); + let avg_tracker_spans_per_checkout = totals + .richtext_tracker_span_count + .checked_div(totals.richtext_tracker_span_filter_count) + .unwrap_or(0); + let avg_filtered_tracker_spans_per_checkout = totals + .richtext_tracker_filtered_span_count + .checked_div(totals.richtext_tracker_span_filter_count) + .unwrap_or(0); + let avg_diff_containers = totals + .diff_container_count_sum + .checked_div(totals.samples) + .unwrap_or(0); + eprintln!( + concat!( + "[text-checkout-profile] {name}: scenario={scenario}, peers={peers}, ", + "changes={changes}, base_len={base_len}, versions={versions}, ", + "text_containers={text_containers}, large_text_containers={large_text_containers}, ", + "large_text_len={large_text_len}, ", + "subscribed={subscribed}, samples={samples}, avg_total={avg_total:?}, ", + "avg_frontier_prepare={avg_frontier_prepare:?}, ", + "avg_frontiers_to_vv={avg_frontiers_to_vv:?}, avg_diff_calc={avg_diff_calc:?}, ", + "avg_causal_vv_materialize={avg_causal_vv_materialize:?}, ", + "causal_vv_materialize_calls={causal_vv_materialize_calls}, ", + "max_causal_vv_width={max_causal_vv_width}, ", + "avg_state_apply={avg_state_apply:?}, avg_emit_events={avg_emit_events:?}, ", + "avg_richtext_tracker_checkout={avg_richtext_tracker_checkout:?}, ", + "avg_richtext_tracker_diff={avg_richtext_tracker_diff:?}, ", + "avg_richtext_delta_build={avg_richtext_delta_build:?}, ", + "avg_richtext_insert_future_scan={avg_richtext_insert_future_scan:?}, ", + "richtext_tracker_checkout_calls={richtext_tracker_checkout_calls}, ", + "richtext_tracker_diff_calls={richtext_tracker_diff_calls}, ", + "richtext_delta_build_calls={richtext_delta_build_calls}, ", + "richtext_insert_future_scan_calls={richtext_insert_future_scan_calls}, ", + "avg_future_scan_visited={avg_future_scan_visited}, ", + "max_future_scan_visited={max_future_scan_visited}, ", + "tracker_spans={tracker_spans}, filtered_tracker_spans={filtered_tracker_spans}, ", + "skipped_tracker_spans={skipped_tracker_spans}, id_to_cursor_iters={id_to_cursor_iters}, ", + "empty_id_to_cursor_iters={empty_id_to_cursor_iters}, ", + "tracker_span_filter_calls={tracker_span_filter_calls}, ", + "avg_tracker_spans_per_checkout={avg_tracker_spans_per_checkout}, ", + "max_tracker_spans_per_checkout={max_tracker_spans_per_checkout}, ", + "avg_filtered_tracker_spans_per_checkout={avg_filtered_tracker_spans_per_checkout}, ", + "max_filtered_tracker_spans_per_checkout={max_filtered_tracker_spans_per_checkout}, ", + "max_frontiers_width={max_frontiers_width}, max_vv_width={max_vv_width}, ", + "avg_diff_containers={avg_diff_containers}, max_diff_containers={max_diff_containers}, ", + "recording_event_samples={recording_event_samples}, ", + "forward_diff_calculator_samples={forward_diff_calculator_samples}, ", + "richtext_tree_nodes={richtext_tree_nodes}, richtext_chunks={richtext_chunks}, ", + "text_chunks={text_chunks}, style_anchors={style_anchors}, ", + "style_range_tree_nodes={style_range_tree_nodes}, style_range_chunks={style_range_chunks}" + ), + name = name, + scenario = stats.scenario, + peers = stats.peer_count, + changes = stats.change_count, + base_len = stats.base_len, + versions = stats.version_count, + text_containers = stats.text_container_count, + large_text_containers = stats.large_text_container_count, + large_text_len = stats.large_text_len, + subscribed = stats.subscribed, + samples = totals.samples, + avg_total = totals.total / samples, + avg_frontier_prepare = totals.frontier_prepare / samples, + avg_frontiers_to_vv = totals.frontiers_to_vv / samples, + avg_diff_calc = totals.diff_calc / samples, + avg_causal_vv_materialize = totals.causal_vv_materialize / samples, + causal_vv_materialize_calls = totals.causal_vv_materialize_count, + max_causal_vv_width = totals.max_causal_vv_width, + avg_state_apply = totals.state_apply / samples, + avg_emit_events = totals.emit_events / samples, + avg_richtext_tracker_checkout = totals.richtext_tracker_checkout / samples, + avg_richtext_tracker_diff = totals.richtext_tracker_diff / samples, + avg_richtext_delta_build = totals.richtext_delta_build / samples, + avg_richtext_insert_future_scan = totals.richtext_insert_future_scan / samples, + richtext_tracker_checkout_calls = totals.richtext_tracker_checkout_count, + richtext_tracker_diff_calls = totals.richtext_tracker_diff_count, + richtext_delta_build_calls = totals.richtext_delta_build_count, + richtext_insert_future_scan_calls = totals.richtext_insert_future_scan_count, + avg_future_scan_visited = avg_future_scan_visited, + max_future_scan_visited = totals.richtext_insert_future_scan_max_visited, + tracker_spans = totals.richtext_tracker_span_count, + filtered_tracker_spans = totals.richtext_tracker_filtered_span_count, + skipped_tracker_spans = totals.richtext_tracker_skipped_span_count, + id_to_cursor_iters = totals.richtext_id_to_cursor_iter_count, + empty_id_to_cursor_iters = totals.richtext_id_to_cursor_empty_iter_count, + tracker_span_filter_calls = totals.richtext_tracker_span_filter_count, + avg_tracker_spans_per_checkout = avg_tracker_spans_per_checkout, + max_tracker_spans_per_checkout = totals.richtext_tracker_max_span_count, + avg_filtered_tracker_spans_per_checkout = avg_filtered_tracker_spans_per_checkout, + max_filtered_tracker_spans_per_checkout = totals.richtext_tracker_max_filtered_span_count, + max_frontiers_width = totals.max_frontiers_width, + max_vv_width = totals.max_vv_width, + avg_diff_containers = avg_diff_containers, + max_diff_containers = totals.max_diff_container_count, + recording_event_samples = totals.recording_event_samples, + forward_diff_calculator_samples = totals.forward_diff_calculator_samples, + richtext_tree_nodes = state_profile.richtext_tree_node_count, + richtext_chunks = state_profile.richtext_chunk_count, + text_chunks = state_profile.text_chunk_count, + style_anchors = state_profile.style_anchor_count, + style_range_tree_nodes = state_profile.style_range_tree_node_count, + style_range_chunks = state_profile.style_range_chunk_count, + ); + } +} + +pub fn dumb(_c: &mut Criterion) {} + +#[cfg(feature = "test_utils")] +criterion_group!(benches, text_checkout::text_checkout); +#[cfg(not(feature = "test_utils"))] +criterion_group!(benches, dumb); +criterion_main!(benches); diff --git a/crates/loro-internal/src/container/richtext.rs b/crates/loro-internal/src/container/richtext.rs index ffe4aa339..7790f52ce 100644 --- a/crates/loro-internal/src/container/richtext.rs +++ b/crates/loro-internal/src/container/richtext.rs @@ -28,7 +28,9 @@ use std::fmt::Debug; pub(crate) use fugue_span::{RichtextChunk, RichtextChunkValue}; pub(crate) use richtext_state::RichtextState; pub(crate) use style_range_map::Styles; -pub(crate) use tracker::{CrdtRopeDelta, Tracker as RichtextTracker}; +pub(crate) use tracker::{ + CrdtRopeDelta, PeerSpanCoverage, Tracker as RichtextTracker, TrackerMaterializedVersion, +}; /// This is the data structure that represents a span of rich text. /// It's used to communicate with the frontend. diff --git a/crates/loro-internal/src/container/richtext/richtext_state.rs b/crates/loro-internal/src/container/richtext/richtext_state.rs index 032d70ccf..7ca4aca16 100644 --- a/crates/loro-internal/src/container/richtext/richtext_state.rs +++ b/crates/loro-internal/src/container/richtext/richtext_state.rs @@ -1322,6 +1322,40 @@ impl RichtextState { } } + #[cfg(feature = "test_utils")] + pub(crate) fn debug_counts(&self) -> (usize, usize, usize, usize, usize, usize) { + let mut chunk_count = 0; + let mut text_chunk_count = 0; + let mut style_anchor_count = 0; + for chunk in self.tree.iter() { + chunk_count += 1; + match chunk { + RichtextStateChunk::Text(_) => text_chunk_count += 1, + RichtextStateChunk::Style { .. } => style_anchor_count += 1, + } + } + + let style_range_tree_node_count = self + .style_ranges + .as_ref() + .map(|x| x.debug_node_len()) + .unwrap_or(0); + let style_range_chunk_count = self + .style_ranges + .as_ref() + .map(|x| x.debug_chunk_len()) + .unwrap_or(0); + + ( + self.tree.node_len(), + chunk_count, + text_chunk_count, + style_anchor_count, + style_range_tree_node_count, + style_range_chunk_count, + ) + } + pub(crate) fn get_entity_index_for_text_insert( &mut self, pos: usize, @@ -1481,6 +1515,29 @@ impl RichtextState { result } + /// Plain-text insertion path for internal diff application when no style/event data is needed. + pub(crate) fn insert_text_chunk_at_entity_index( + &mut self, + entity_index: usize, + text: TextChunk, + ) { + self.check_cache(); + { + debug_assert!(self.style_ranges.as_ref().is_none_or(|x| !x.has_style())); + let elem = RichtextStateChunk::Text(text); + self.clear_cache(); + match self.tree.query::(&entity_index) { + Some(result) => { + self.tree.insert_by_path(result.cursor, elem); + } + None => { + self.tree.push(elem); + } + } + } + self.check_cache(); + } + /// This is used to accept changes from DiffCalculator. /// /// Return (event_index, styles) @@ -2295,6 +2352,44 @@ impl RichtextState { result } + /// Plain-text deletion path for internal diff application when no style/event data is needed. + #[instrument(skip(self))] + pub(crate) fn drain_plain_text_by_entity_index(&mut self, pos: usize, len: usize) { + if len == 0 { + return; + } + + assert!( + pos + len <= self.len_entity(), + "pos: {}, len: {}, self.len(): {}", + pos, + len, + &self.len_entity(), + ); + debug_assert!(self.style_ranges.as_ref().is_none_or(|x| !x.has_style())); + + self.clear_cache(); + let range = pos..pos + len; + let start = self.tree.query::(&range.start); + let start_cursor = start.unwrap().cursor(); + let elem = self.tree.get_elem(start_cursor.leaf).unwrap(); + if elem.rle_len() >= start_cursor.offset + len { + self.tree.update_leaf(start_cursor.leaf, |elem| match elem { + RichtextStateChunk::Text(text) => { + let (next, _) = text.delete_by_entity_index(start_cursor.offset, len); + (true, next.map(RichtextStateChunk::Text), None) + } + RichtextStateChunk::Style { .. } => { + *elem = RichtextStateChunk::Text(TextChunk::new_empty()); + (true, None, None) + } + }); + } else { + let end = self.tree.query::(&range.end); + for _ in generic_btree::iter::Drain::new(&mut self.tree, start, end) {} + } + } + pub fn entity_index_to_event_index(&self, index: usize) -> usize { if index == 0 { // the tree maybe empty diff --git a/crates/loro-internal/src/container/richtext/style_range_map.rs b/crates/loro-internal/src/container/richtext/style_range_map.rs index f7487d4db..69b572b23 100644 --- a/crates/loro-internal/src/container/richtext/style_range_map.rs +++ b/crates/loro-internal/src/container/richtext/style_range_map.rs @@ -134,6 +134,16 @@ impl StyleRangeMap { } } + #[cfg(feature = "test_utils")] + pub(super) fn debug_node_len(&self) -> usize { + self.tree.node_len() + } + + #[cfg(feature = "test_utils")] + pub(super) fn debug_chunk_len(&self) -> usize { + self.tree.iter().count() + } + pub fn annotate( &mut self, range: Range, diff --git a/crates/loro-internal/src/container/richtext/tracker.rs b/crates/loro-internal/src/container/richtext/tracker.rs index 70cab4f18..c05c82bd6 100644 --- a/crates/loro-internal/src/container/richtext/tracker.rs +++ b/crates/loro-internal/src/container/richtext/tracker.rs @@ -4,31 +4,310 @@ use generic_btree::{ rle::{HasLength as _, Sliceable}, LeafIndex, }; -use loro_common::{Counter, HasId, HasIdSpan, IdFull, IdSpan, Lamport, PeerID, ID}; +use loro_common::{Counter, CounterSpan, HasId, HasIdSpan, IdFull, IdSpan, Lamport, PeerID, ID}; use rle::HasLength as _; +use rustc_hash::FxHashMap; +use smallvec::SmallVec; use tracing::instrument; -use crate::{cursor::AbsolutePosition, VersionVector}; +use crate::{cursor::AbsolutePosition, version::CausalVersion, VersionVector}; use self::{crdt_rope::CrdtRope, id_to_cursor::IdToCursor}; use super::{ fugue_span::{FugueSpan, Status}, - RichtextChunk, + richtext_state::RichtextStateChunk, + RichtextChunk, StyleOp, }; mod crdt_rope; mod id_to_cursor; pub(crate) use crdt_rope::CrdtRopeDelta; +pub(crate) type PeerSpanCoverage = FxHashMap; + #[derive(Debug)] pub(crate) struct Tracker { applied_vv: VersionVector, - current_vv: VersionVector, rope: CrdtRope, id_to_cursor: IdToCursor, } +/// Tracks the version currently materialized in a richtext tracker. +/// +/// This state intentionally lives outside [`Tracker`]. The diff calculators keep +/// it next to the tracker because the stable cross-round invariant is: +/// +/// - after `calculate_diff(from, to)` finishes, the tracker is materialized at +/// the coverage-local projection of `from`; +/// - during replay, this value may temporarily move through causal versions; +/// - diff-status checkout to `to` must not change it. +/// +/// Only peers that have ops in the container coverage need to be stored here. +/// Missing peers are treated as materialized at counter `0`. +/// +/// The type deliberately owns the mutable version vector. Tracker checkout that +/// mutates the materialized version requires `&mut Self`, while diff-status +/// checkout only takes `&Self`, so callers cannot accidentally advance the +/// stable materialized version while marking the `to` diff. +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub(crate) struct TrackerMaterializedVersion { + vv: Box, +} + +impl TrackerMaterializedVersion { + #[inline] + pub(crate) fn as_vv(&self) -> &VersionVector { + &self.vv + } + + #[inline] + pub(crate) fn includes_id(&self, id: ID) -> bool { + self.vv.includes_id(id) + } + + pub(crate) fn reset_to_version_projection( + &mut self, + target: &VersionVector, + coverage: &PeerSpanCoverage, + ) { + self.vv.clear(); + for &peer in coverage.keys() { + if let Some(&end) = target.get(&peer) { + if end > 0 { + self.vv.insert(peer, end); + } + } + } + } + + pub(crate) fn checkout_to_version( + &mut self, + tracker: &mut Tracker, + target: &VersionVector, + coverage: &PeerSpanCoverage, + ) { + let spans = self.checkout_spans_to_version(target, coverage); + self.checkout_peer_spans(tracker, &spans, Some(coverage)); + } + + /// Marks diff status at `target` without changing the stable materialized + /// version. This is the second half of diff calculation: after checkout to + /// `from`, mark which spans would change at `to`. + pub(crate) fn checkout_diff_status_to_version( + &self, + tracker: &mut Tracker, + target: &VersionVector, + coverage: &PeerSpanCoverage, + ) { + let spans = self.checkout_spans_to_version(target, coverage); + tracker.apply_peer_spans(&spans, true, Some(coverage)); + } + + pub(crate) fn checkout_to_causal( + &mut self, + tracker: &mut Tracker, + target: CausalVersion<'_>, + coverage: &PeerSpanCoverage, + ) { + let spans = self.checkout_spans_to_causal(target, coverage); + self.checkout_peer_spans(tracker, &spans, Some(coverage)); + } + + #[cfg(test)] + fn checkout_to_version_without_coverage( + &mut self, + tracker: &mut Tracker, + target: &VersionVector, + ) { + let spans = self.checkout_spans_to_version_without_coverage(target); + self.checkout_peer_spans(tracker, &spans, None); + } + + #[cfg(test)] + fn checkout_diff_status_to_version_without_coverage( + &self, + tracker: &mut Tracker, + target: &VersionVector, + ) { + let spans = self.checkout_spans_to_version_without_coverage(target); + tracker.apply_peer_spans(&spans, true, None); + } + + #[cfg(test)] + fn checkout_to_causal_without_coverage( + &mut self, + tracker: &mut Tracker, + target: CausalVersion<'_>, + ) { + let spans = self.checkout_spans_to_causal_without_coverage(target); + self.checkout_peer_spans(tracker, &spans, None); + } + + #[cfg(test)] + fn checkout_peer_spans_without_coverage(&mut self, tracker: &mut Tracker, spans: &[IdSpan]) { + self.checkout_peer_spans(tracker, spans, None); + } + + fn checkout_peer_spans( + &mut self, + tracker: &mut Tracker, + spans: &[IdSpan], + coverage: Option<&PeerSpanCoverage>, + ) { + tracker.apply_peer_spans(spans, false, coverage); + + for &span in spans { + if coverage.is_some_and(|coverage| !coverage.contains_key(&span.peer)) { + continue; + } + + if span.is_reversed() { + self.vv.shrink_to_exclude(span); + } else { + self.vv.extend_to_include(span); + } + } + } + + fn checkout_spans_to_version( + &self, + target: &VersionVector, + coverage: &PeerSpanCoverage, + ) -> SmallVec<[IdSpan; 4]> { + let mut spans: SmallVec<[IdSpan; 4]> = SmallVec::new(); + self.push_retreat_spans_to_version(&mut spans, |peer| { + target.get(&peer).copied().unwrap_or(0) + }); + for &peer in coverage.keys() { + let target_end = target.get(&peer).copied().unwrap_or(0); + let current_end = self.vv.get(&peer).copied().unwrap_or(0); + if target_end > current_end { + spans.push(IdSpan::new(peer, current_end, target_end)); + } + } + + spans + } + + fn checkout_spans_to_causal( + &self, + target: CausalVersion<'_>, + coverage: &PeerSpanCoverage, + ) -> SmallVec<[IdSpan; 4]> { + let mut spans: SmallVec<[IdSpan; 4]> = SmallVec::new(); + self.push_retreat_spans_to_version(&mut spans, |peer| target.end_for_peer(peer)); + for &peer in coverage.keys() { + let target_end = target.end_for_peer(peer); + let current_end = self.vv.get(&peer).copied().unwrap_or(0); + if target_end > current_end { + spans.push(IdSpan::new(peer, current_end, target_end)); + } + } + + spans + } + + #[cfg(test)] + fn checkout_spans_to_version_without_coverage( + &self, + target: &VersionVector, + ) -> SmallVec<[IdSpan; 4]> { + let mut spans: SmallVec<[IdSpan; 4]> = SmallVec::new(); + spans.extend(self.vv.sub_iter(target).map(reversed_span)); + spans.extend(target.sub_iter(&self.vv)); + spans + } + + #[cfg(test)] + fn checkout_spans_to_causal_without_coverage( + &self, + target: CausalVersion<'_>, + ) -> SmallVec<[IdSpan; 4]> { + let mut spans: SmallVec<[IdSpan; 4]> = SmallVec::new(); + self.push_retreat_spans_to_version(&mut spans, |peer| target.end_for_peer(peer)); + + for (&peer, &base_end) in target.base().iter() { + let target_end = if peer == target.peer() { + base_end.max(target.peer_end()) + } else { + base_end + }; + let current_end = self.vv.get(&peer).copied().unwrap_or(0); + if target_end > current_end { + spans.push(IdSpan::new(peer, current_end, target_end)); + } + } + + if !target.base().contains_key(&target.peer()) { + let target_end = target.peer_end(); + let current_end = self.vv.get(&target.peer()).copied().unwrap_or(0); + if target_end > current_end { + spans.push(IdSpan::new(target.peer(), current_end, target_end)); + } + } + + spans + } + + fn push_retreat_spans_to_version( + &self, + spans: &mut SmallVec<[IdSpan; 4]>, + target_end_for_peer: impl Fn(PeerID) -> Counter, + ) { + for (&peer, &counter) in self.vv.iter() { + let target_end = target_end_for_peer(peer); + if counter > target_end { + spans.push(reversed_span(IdSpan::new(peer, target_end, counter))); + } + } + } + + fn extend_to_include_end_id(&mut self, id: ID) { + self.vv.extend_to_include_end_id(id); + } + + fn extend_to_include_last_id(&mut self, id: ID) { + self.vv.extend_to_include_last_id(id); + } + + #[cfg(debug_assertions)] + pub(crate) fn debug_assert_matches_version_projection( + &self, + target: &VersionVector, + coverage: &PeerSpanCoverage, + ) { + for &peer in coverage.keys() { + let expected = target.get(&peer).copied().unwrap_or(0); + let actual = self.vv.get(&peer).copied().unwrap_or(0); + debug_assert_eq!( + actual, expected, + "tracker materialized version must match the stable from-version projection" + ); + } + + for (&peer, &actual) in self.vv.iter() { + debug_assert!( + coverage.contains_key(&peer), + "tracker materialized version should only contain covered peers" + ); + let expected = target.get(&peer).copied().unwrap_or(0); + debug_assert_eq!( + actual, expected, + "tracker materialized version contains a stale peer counter" + ); + } + } + + #[cfg(not(debug_assertions))] + pub(crate) fn debug_assert_matches_version_projection( + &self, + _target: &VersionVector, + _coverage: &PeerSpanCoverage, + ) { + } +} + impl Default for Tracker { fn default() -> Self { Self::new_with_unknown() @@ -42,7 +321,6 @@ impl Tracker { rope: CrdtRope::new(), id_to_cursor: IdToCursor::default(), applied_vv: Default::default(), - current_vv: Default::default(), }; let result = this.rope.tree.push(FugueSpan { @@ -67,21 +345,55 @@ impl Tracker { rope: CrdtRope::new(), id_to_cursor: IdToCursor::default(), applied_vv: Default::default(), - current_vv: Default::default(), } } - #[inline] - pub fn all_vv(&self) -> &VersionVector { - &self.applied_vv + pub(crate) fn new_from_state_chunks( + chunks: &[RichtextStateChunk], + _styles: &mut Vec<(StyleOp, usize)>, + ) -> Option { + let mut last_lamport = None; + for chunk in chunks { + let RichtextStateChunk::Text(text) = chunk else { + return None; + }; + let id = text.id_full(); + if last_lamport.is_some_and(|last| last > id.lamport) { + return None; + } + last_lamport = Some(id.lamport); + } + + let mut this = Self::new(); + let mut pos = 0; + for chunk in chunks { + let RichtextStateChunk::Text(text) = chunk else { + unreachable!("style chunks are rejected before seeding richtext tracker") + }; + let len = text.unicode_len() as usize; + if len == 0 { + continue; + } + + this._insert(pos, RichtextChunk::new_unknown(len as u32), text.id_full()); + pos += len; + } + + Some(this) } #[inline] - pub fn current_vv(&self) -> &VersionVector { - &self.current_vv + pub fn all_vv(&self) -> &VersionVector { + &self.applied_vv } - pub(crate) fn insert(&mut self, mut op_id: IdFull, mut pos: usize, mut content: RichtextChunk) { + pub(crate) fn insert( + &mut self, + materialized: &mut TrackerMaterializedVersion, + mut op_id: IdFull, + mut pos: usize, + mut content: RichtextChunk, + ) { // trace!( // "TrackerInsert op_id = {:#?}, pos = {:#?}, content = {:#?}", // op_id, @@ -89,16 +401,19 @@ impl Tracker { // &content // ); // tracing::span!(tracing::Level::INFO, "TrackerInsert"); - if let ControlFlow::Break(_) = - self.skip_applied(op_id.id(), content.len(), |applied_counter_end| { + if let ControlFlow::Break(_) = self.skip_applied( + materialized, + op_id.id(), + content.len(), + |applied_counter_end| { // the op is partially included, need to slice the content let start = (applied_counter_end - op_id.counter) as usize; op_id.lamport += (applied_counter_end - op_id.counter) as Lamport; op_id.counter = applied_counter_end; pos += start; content = content.slice(start..); - }) - { + }, + ) { return; } @@ -106,7 +421,9 @@ impl Tracker { // tracing::span!(tracing::Level::INFO, "before insert {} pos={}", op_id, pos); // debug_log::debug_dbg!(&self); // } + let end_id = op_id.inc(content.len() as Counter); self._insert(pos, content, op_id); + materialized.extend_to_include_end_id(end_id.id()); } fn _insert(&mut self, pos: usize, content: RichtextChunk, op_id: IdFull) { @@ -135,7 +452,6 @@ impl Tracker { self.update_insert_by_split(&result.splitted.arr); let end_id = op_id.inc(content.len() as Counter); - self.current_vv.extend_to_include_end_id(end_id.id()); self.applied_vv.extend_to_include_end_id(end_id.id()); } @@ -172,33 +488,38 @@ impl Tracker { /// - reverse: if true, the kth op delete the last kth element of the span pub(crate) fn delete( &mut self, + materialized: &mut TrackerMaterializedVersion, mut op_id: ID, mut target_start_id: ID, pos: usize, mut len: usize, reverse: bool, ) { - if let ControlFlow::Break(_) = self.skip_applied(op_id, len, |applied_counter_end: i32| { - // the op is partially included, need to slice the op - let start = (applied_counter_end - op_id.counter) as usize; - op_id.counter = applied_counter_end; - if !reverse { - target_start_id = target_start_id.inc(start as i32); - } - // Okay, this looks pretty weird, but it's correct. - // If it's reverse, we don't need to change the target_start_id, because target_start_id always pointing towards the - // leftmost element of the span. After applying the initial part of the deletion, which starts from the right side, - // the target_start_id will be still pointing towards the same leftmost element, thus no need to change. - len -= start; - // If reverse, don't need to change the pos, because it's deleting backwards. - // If not reverse, we don't need to change the pos either, because the `start` chars after it are already deleted - }) { + if let ControlFlow::Break(_) = + self.skip_applied(materialized, op_id, len, |applied_counter_end: i32| { + // the op is partially included, need to slice the op + let start = (applied_counter_end - op_id.counter) as usize; + op_id.counter = applied_counter_end; + if !reverse { + target_start_id = target_start_id.inc(start as i32); + } + // Okay, this looks pretty weird, but it's correct. + // If it's reverse, we don't need to change the target_start_id, because target_start_id always pointing towards the + // leftmost element of the span. After applying the initial part of the deletion, which starts from the right side, + // the target_start_id will be still pointing towards the same leftmost element, thus no need to change. + len -= start; + // If reverse, don't need to change the pos, because it's deleting backwards. + // If not reverse, we don't need to change the pos either, because the `start` chars after it are already deleted + }) + { return; } // tracing::info!("after forwarding pos={} len={}", pos, len); + let end_id = op_id.inc(len as Counter); self._delete(target_start_id, pos, len, reverse, op_id); + materialized.extend_to_include_end_id(end_id); } fn _delete(&mut self, target_start_id: ID, pos: usize, len: usize, reverse: bool, op_id: ID) { @@ -227,12 +548,12 @@ impl Tracker { } let end_id = op_id.inc(len as Counter); - self.current_vv.extend_to_include_end_id(end_id); self.applied_vv.extend_to_include_end_id(end_id); } fn skip_applied( &mut self, + materialized: &mut TrackerMaterializedVersion, op_id: ID, len: usize, mut f: impl FnMut(Counter), @@ -240,10 +561,10 @@ impl Tracker { let last_id = op_id.inc(len as Counter - 1); let applied_counter_end = self.applied_vv.get(&last_id.peer).copied().unwrap_or(0); if applied_counter_end > op_id.counter { - if !self.current_vv.includes_id(last_id) { + if !materialized.includes_id(last_id) { // PERF: may be slow let mut updates = Default::default(); - let cnt_start = self.current_vv.get(&op_id.peer).copied().unwrap_or(0); + let cnt_start = materialized.as_vv().get(&op_id.peer).copied().unwrap_or(0); self.forward( IdSpan::new(op_id.peer, cnt_start, op_id.counter + len as Counter), &mut updates, @@ -252,7 +573,7 @@ impl Tracker { } if applied_counter_end > last_id.counter { - self.current_vv.extend_to_include_last_id(last_id); + materialized.extend_to_include_last_id(last_id); return ControlFlow::Break(()); } @@ -268,12 +589,15 @@ impl Tracker { #[instrument(skip(self))] pub(crate) fn move_item( &mut self, + materialized: &mut TrackerMaterializedVersion, op_id: IdFull, deleted_id: ID, from_pos: usize, to_pos: usize, ) { - if let ControlFlow::Break(_) = self.skip_applied(op_id.id(), 1, |_| unreachable!()) { + if let ControlFlow::Break(_) = + self.skip_applied(materialized, op_id.id(), 1, |_| unreachable!()) + { return; } @@ -322,25 +646,33 @@ impl Tracker { ); let end_id = op_id.inc(1); - self.current_vv.extend_to_include_end_id(end_id.id()); self.applied_vv.extend_to_include_end_id(end_id.id()); + materialized.extend_to_include_end_id(end_id.id()); } - #[inline] - pub(crate) fn checkout(&mut self, vv: &VersionVector) { - self._checkout(vv, false); - } - - fn _checkout(&mut self, vv: &VersionVector, on_diff_status: bool) { - // tracing::info!("Checkout to {:?} from {:?}", vv, self.current_vv); + /// Checkout by applying directed peer spans. + /// + /// Forward spans use the normal `[start, end)` representation. Retreat spans + /// must use `CounterSpan`'s reversed representation for the same covered ids. + fn apply_peer_spans( + &mut self, + spans: &[IdSpan], + on_diff_status: bool, + coverage: Option<&PeerSpanCoverage>, + ) { + debug_assert_no_mixed_peer_directions(spans); if on_diff_status { self.rope.clear_diff_status(); } - let current_vv = std::mem::take(&mut self.current_vv); - let (retreat, forward) = current_vv.diff_iter(vv); + let filtered_spans = filter_spans_by_coverage(spans, coverage); + #[cfg(feature = "test_utils")] + crate::diff_calc::profiling::record_richtext_tracker_span_filter( + spans.len(), + filtered_spans.len(), + ); let mut updates = Vec::new(); - for span in retreat { + for &span in filtered_spans.iter().filter(|span| span.is_reversed()) { for c in self.id_to_cursor.iter(span) { match c { id_to_cursor::IterCursor::Insert { leaf, id_span } => { @@ -427,16 +759,10 @@ impl Tracker { } } - for span in forward { + for &span in filtered_spans.iter().filter(|span| !span.is_reversed()) { self.forward(span, &mut updates); } - if !on_diff_status { - self.current_vv = vv.clone(); - } else { - self.current_vv = current_vv; - } - self.batch_update(updates, on_diff_status); } @@ -526,16 +852,16 @@ impl Tracker { } #[allow(unused)] - pub(crate) fn check(&self) { + pub(crate) fn check(&self, materialized: &TrackerMaterializedVersion) { if !cfg!(debug_assertions) { return; } - self.check_vv_correctness(); + self.check_vv_correctness(materialized); self.check_id_to_cursor_insertions_correctness(); } - fn check_vv_correctness(&self) { + fn check_vv_correctness(&self, materialized: &TrackerMaterializedVersion) { if !cfg!(debug_assertions) { return; } @@ -548,9 +874,9 @@ impl Tracker { let id_span = span.id_span(); assert!(self.all_vv().includes_id(id_span.id_last())); if span.status.future { - assert!(!self.current_vv.includes_id(id_span.id_start())); + assert!(!materialized.includes_id(id_span.id_start())); } else { - assert!(self.current_vv.includes_id(id_span.id_last())); + assert!(materialized.includes_id(id_span.id_last())); } } } @@ -619,78 +945,314 @@ impl Tracker { } // #[tracing::instrument(skip(self), level = "info")] + #[cfg(test)] pub(crate) fn diff( &mut self, + materialized: &mut TrackerMaterializedVersion, from: &VersionVector, to: &VersionVector, ) -> impl Iterator + '_ { // tracing::info!("Init: {:#?}, ", &self); - self._checkout(from, false); - self._checkout(to, true); + materialized.checkout_to_version_without_coverage(self, from); + materialized.checkout_diff_status_to_version_without_coverage(self, to); // self.id_to_cursor.diagnose(); // tracing::trace!("Trace::diff {:#?}, ", &self); self.rope.get_diff() } + + pub(crate) fn diff_with_coverage( + &mut self, + materialized: &mut TrackerMaterializedVersion, + from: &VersionVector, + to: &VersionVector, + coverage: &PeerSpanCoverage, + ) -> impl Iterator + '_ { + materialized.checkout_to_version(self, from, coverage); + materialized.checkout_diff_status_to_version(self, to, coverage); + + self.rope.get_diff() + } +} + +fn reversed_span(mut span: IdSpan) -> IdSpan { + span.reverse(); + span +} + +fn filter_spans_by_coverage( + spans: &[IdSpan], + coverage: Option<&PeerSpanCoverage>, +) -> SmallVec<[IdSpan; 4]> { + match coverage { + Some(coverage) => spans + .iter() + .filter_map(|span| intersect_span_with_coverage(*span, coverage)) + .collect(), + None => spans.iter().copied().collect(), + } +} + +fn intersect_span_with_coverage(span: IdSpan, coverage: &PeerSpanCoverage) -> Option { + let coverage = coverage.get(&span.peer)?; + let start = span.counter.min().max(coverage.min()); + let end = span.counter.norm_end().min(coverage.norm_end()); + if start >= end { + return None; + } + + let mut ans = IdSpan::new(span.peer, start, end); + if span.is_reversed() { + ans.reverse(); + } + Some(ans) +} + +#[cfg(debug_assertions)] +fn debug_assert_no_mixed_peer_directions(spans: &[IdSpan]) { + for (index, span) in spans.iter().enumerate() { + for other in &spans[index + 1..] { + if span.peer == other.peer { + debug_assert_eq!(span.is_reversed(), other.is_reversed()); + } + } + } } +#[cfg(not(debug_assertions))] +fn debug_assert_no_mixed_peer_directions(_spans: &[IdSpan]) {} + #[cfg(test)] mod test { - use crate::{container::richtext::RichtextChunk, vv}; + use crate::{ + container::richtext::RichtextChunk, + version::{CausalVersion, ImVersionVector}, + vv, + }; use generic_btree::rle::HasLength; use super::*; use std::time::Instant; + fn tracker() -> (Tracker, TrackerMaterializedVersion) { + (Tracker::new(), TrackerMaterializedVersion::default()) + } + + fn insert_text( + tracker: &mut Tracker, + materialized: &mut TrackerMaterializedVersion, + id: IdFull, + pos: usize, + text: std::ops::Range, + ) { + tracker.insert(materialized, id, pos, RichtextChunk::new_text(text)); + } + + fn delete_text( + tracker: &mut Tracker, + materialized: &mut TrackerMaterializedVersion, + op_id: ID, + target_start_id: ID, + pos: usize, + len: usize, + reverse: bool, + ) { + tracker.delete(materialized, op_id, target_start_id, pos, len, reverse); + } + #[test] fn test_len() { - let mut t = Tracker::new(); - t.insert(IdFull::new(1, 0, 0), 0, RichtextChunk::new_text(0..2)); + let (mut t, mut materialized) = tracker(); + insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 0, 0..2); assert_eq!(t.rope.len(), 2); - t.checkout(&Default::default()); + materialized.checkout_to_version_without_coverage(&mut t, &Default::default()); assert_eq!(t.rope.len(), 0); - t.insert(IdFull::new(2, 0, 0), 0, RichtextChunk::new_text(2..4)); + insert_text(&mut t, &mut materialized, IdFull::new(2, 0, 0), 0, 2..4); let v = vv!(1 => 2, 2 => 2); - t.checkout(&v); + materialized.checkout_to_version_without_coverage(&mut t, &v); assert_eq!(&t.applied_vv, &v); assert_eq!(t.rope.len(), 4); } + #[test] + fn checkout_causal_single_frontier_retreats_other_peers() { + let (mut t, mut materialized) = tracker(); + insert_text(&mut t, &mut materialized, IdFull::new(2, 0, 0), 0, 0..2); + insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 2, 2..4); + assert_eq!(t.rope.len(), 4); + + let base = ImVersionVector::new(); + materialized.checkout_to_causal_without_coverage(&mut t, CausalVersion::new(&base, 1, 2)); + + assert_eq!(t.rope.len(), 2); + assert_eq!(materialized.as_vv(), &vv!(1 => 2)); + } + + #[test] + fn checkout_peer_spans_uses_reversed_span_boundaries() { + let (mut t, mut materialized) = tracker(); + insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 0, 0..4); + insert_text(&mut t, &mut materialized, IdFull::new(2, 0, 4), 4, 4..6); + assert_eq!(t.rope.len(), 6); + assert_eq!(materialized.as_vv(), &vv!(1 => 4, 2 => 2)); + + let retreat_peer_2 = reversed_span(IdSpan::new(2, 0, 2)); + materialized.checkout_peer_spans_without_coverage(&mut t, &[retreat_peer_2]); + + assert_eq!(t.rope.len(), 4); + assert_eq!(materialized.as_vv(), &vv!(1 => 4)); + + materialized.checkout_peer_spans_without_coverage(&mut t, &[IdSpan::new(2, 0, 2)]); + + assert_eq!(t.rope.len(), 6); + assert_eq!(materialized.as_vv(), &vv!(1 => 4, 2 => 2)); + } + + #[test] + fn span_coverage_intersection_preserves_direction() { + let mut coverage = PeerSpanCoverage::default(); + coverage.insert(1, CounterSpan::new(3, 6)); + + assert_eq!( + intersect_span_with_coverage(IdSpan::new(1, 0, 10), &coverage), + Some(IdSpan::new(1, 3, 6)) + ); + + let reversed = reversed_span(IdSpan::new(1, 0, 10)); + let expected = reversed_span(IdSpan::new(1, 3, 6)); + assert_eq!( + intersect_span_with_coverage(reversed, &coverage), + Some(expected) + ); + } + + #[test] + fn coverage_filtered_checkout_keeps_materialized_projection_local() { + let (mut t, mut materialized) = tracker(); + insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 0, 0..4); + assert_eq!(materialized.as_vv(), &vv!(1 => 4)); + + let mut coverage = PeerSpanCoverage::default(); + coverage.insert(1, CounterSpan::new(0, 4)); + materialized.checkout_peer_spans( + &mut t, + &[reversed_span(IdSpan::new(2, 0, 5))], + Some(&coverage), + ); + + assert_eq!(t.rope.len(), 4); + assert_eq!(materialized.as_vv(), &vv!(1 => 4)); + } + + #[test] + fn coverage_filtered_diff_matches_unfiltered_for_delete_span() { + fn tracker_with_delete() -> (Tracker, TrackerMaterializedVersion) { + let (mut t, mut materialized) = tracker(); + insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 0, 0..10); + delete_text( + &mut t, + &mut materialized, + ID::new(2, 0), + ID::NONE_ID, + 0, + 10, + true, + ); + (t, materialized) + } + + let from = vv!(1 => 10); + let to = vv!(1 => 10, 2 => 10); + let (mut unfiltered, mut unfiltered_materialized) = tracker_with_delete(); + let (mut filtered, mut filtered_materialized) = tracker_with_delete(); + + let mut coverage = PeerSpanCoverage::default(); + coverage.insert(1, CounterSpan::new(0, 10)); + coverage.insert(2, CounterSpan::new(0, 10)); + + let unfiltered_delta = unfiltered + .diff(&mut unfiltered_materialized, &from, &to) + .collect::>(); + let filtered_delta = filtered + .diff_with_coverage(&mut filtered_materialized, &from, &to, &coverage) + .collect::>(); + + assert_eq!(filtered_delta, unfiltered_delta); + assert_eq!(filtered_materialized, unfiltered_materialized); + assert_eq!(filtered.rope.len(), unfiltered.rope.len()); + } + + #[test] + fn diff_status_checkout_preserves_stable_materialized_version() { + let (mut t, mut materialized) = tracker(); + insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 0, 0..2); + insert_text(&mut t, &mut materialized, IdFull::new(2, 0, 2), 2, 2..4); + + let mut coverage = PeerSpanCoverage::default(); + coverage.insert(1, CounterSpan::new(0, 2)); + coverage.insert(2, CounterSpan::new(0, 2)); + let from = vv!(1 => 2); + let to = vv!(1 => 2, 2 => 2); + + materialized.checkout_to_version(&mut t, &from, &coverage); + let stable_from = materialized.clone(); + materialized.checkout_diff_status_to_version(&mut t, &to, &coverage); + + assert_eq!(materialized, stable_from); + materialized.debug_assert_matches_version_projection(&from, &coverage); + } + #[test] fn test_retreat_and_forward_delete() { - let mut t = Tracker::new(); - t.insert(IdFull::new(1, 0, 0), 0, RichtextChunk::new_text(0..10)); - t.delete(ID::new(2, 0), ID::NONE_ID, 0, 10, true); - t.checkout(&vv!(1 => 10, 2=>5)); + let (mut t, mut materialized) = tracker(); + insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 0, 0..10); + delete_text( + &mut t, + &mut materialized, + ID::new(2, 0), + ID::NONE_ID, + 0, + 10, + true, + ); + materialized.checkout_to_version_without_coverage(&mut t, &vv!(1 => 10, 2=>5)); assert_eq!(t.rope.len(), 5); - t.checkout(&vv!(1 => 10, 2=>0)); + materialized.checkout_to_version_without_coverage(&mut t, &vv!(1 => 10, 2=>0)); assert_eq!(t.rope.len(), 10); - t.checkout(&vv!(1 => 10, 2=>10)); + materialized.checkout_to_version_without_coverage(&mut t, &vv!(1 => 10, 2=>10)); assert_eq!(t.rope.len(), 0); - t.checkout(&vv!(1 => 10, 2=>0)); + materialized.checkout_to_version_without_coverage(&mut t, &vv!(1 => 10, 2=>0)); assert_eq!(t.rope.len(), 10); } #[test] fn repeated_tail_splits_keep_id_to_cursor_consistent() { - let mut t = Tracker::new(); - t.insert(IdFull::new(1, 0, 0), 0, RichtextChunk::new_text(0..300)); + let (mut t, mut materialized) = tracker(); + insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 0, 0..300); for (i, pos) in [100, 201, 252, 278].into_iter().enumerate() { let op_id = IdFull::new(2, i as Counter, i as Lamport); let start = 1000 + i as u32; - t.insert(op_id, pos, RichtextChunk::new_text(start..start + 1)); + insert_text(&mut t, &mut materialized, op_id, pos, start..start + 1); } - t.check(); + t.check(&materialized); } #[test] fn test_checkout_in_doc_with_del_span() { - let mut t = Tracker::new(); - t.insert(IdFull::new(1, 0, 0), 0, RichtextChunk::new_text(0..10)); - t.delete(ID::new(2, 0), ID::NONE_ID, 0, 10, false); - t.checkout(&vv!(1 => 10, 2=>4)); + let (mut t, mut materialized) = tracker(); + insert_text(&mut t, &mut materialized, IdFull::new(1, 0, 0), 0, 0..10); + delete_text( + &mut t, + &mut materialized, + ID::new(2, 0), + ID::NONE_ID, + 0, + 10, + false, + ); + materialized.checkout_to_version_without_coverage(&mut t, &vv!(1 => 10, 2=>4)); let v: Vec = t.rope.tree().iter().copied().collect(); assert_eq!(v.len(), 2); assert!(!v[0].is_activated()); @@ -714,8 +1276,9 @@ mod test { let doc_len = CHUNK_LEN * fragments; - let mut t = Tracker::new(); + let (mut t, mut materialized) = tracker(); t.insert( + &mut materialized, IdFull::new(PEER_A, 0, 0), 0, RichtextChunk::new_text(0..doc_len as u32), @@ -731,14 +1294,14 @@ mod test { let chunk = RichtextChunk::new_text( (doc_len as u32 + i as u32)..(doc_len as u32 + i as u32 + 1), ); - t.insert(op_id, pos, chunk); + t.insert(&mut materialized, op_id, pos, chunk); } let elapsed = start.elapsed(); let before_vv = vv!(PEER_A => doc_len as Counter); let after_vv = vv!(PEER_A => doc_len as Counter, PEER_B => (fragments - 1) as Counter); let diff_start = Instant::now(); - let diff_len = t.diff(&before_vv, &after_vv).count(); + let diff_len = t.diff(&mut materialized, &before_vv, &after_vv).count(); let diff_elapsed = diff_start.elapsed(); assert_eq!(t.rope.tree().iter().count(), 1 + 2 * (fragments - 1)); println!( @@ -762,7 +1325,8 @@ mod test { let doc_len = CHUNK_LEN * fragments; let mut t = Tracker::new_with_unknown(); - t.checkout(&vv!()); + let mut materialized = TrackerMaterializedVersion::default(); + materialized.checkout_to_version_without_coverage(&mut t, &VersionVector::new()); t.id_to_cursor.diagnose(); let start = Instant::now(); @@ -772,7 +1336,7 @@ mod test { let chunk = RichtextChunk::new_text( (doc_len as u32 + i as u32)..(doc_len as u32 + i as u32 + 1), ); - t.insert(op_id, pos, chunk); + t.insert(&mut materialized, op_id, pos, chunk); } let elapsed = start.elapsed(); diff --git a/crates/loro-internal/src/container/richtext/tracker/crdt_rope.rs b/crates/loro-internal/src/container/richtext/tracker/crdt_rope.rs index 6c3c901c5..27f5340c2 100644 --- a/crates/loro-internal/src/container/richtext/tracker/crdt_rope.rs +++ b/crates/loro-internal/src/container/richtext/tracker/crdt_rope.rs @@ -136,84 +136,111 @@ impl CrdtRope { let mut insert_pos = start.cursor; if !in_between.is_empty() { + #[cfg(feature = "test_utils")] + let future_scan_start = std::time::Instant::now(); + #[cfg(feature = "test_utils")] + let future_scan_visited = in_between.len(); // find insert pos - let mut scanning = false; - let mut visited: SmallVec<[IdSpan; 4]> = Default::default(); - for (other_leaf, other_elem) in in_between.iter() { - // tracing::info!("Visiting {}", &other_elem.id); - let other_origin_left = other_elem.origin_left; - if other_origin_left != content.origin_left - && other_origin_left - .map(|left| visited.iter().all(|x| !x.contains_id(left.to_id()))) - .unwrap_or(true) - { - // The other_elem's origin_left must be at the left side of content's origin_left. - // So the content must be at the left side of other_elem. - - // tracing::info!("Break because the node's origin_left is at the left side of new_elem's origin left"); - break; + if in_between.iter().all(|(_, other_elem)| { + other_elem.origin_left == content.origin_left + && other_elem.origin_right == content.origin_right + }) { + debug_assert!(in_between + .windows(2) + .all(|window| window[0].1.id.peer <= window[1].1.id.peer)); + let insert_index = in_between + .partition_point(|(_, other_elem)| other_elem.id.peer <= content.id.peer); + if insert_index > 0 { + let (other_leaf, other_elem) = in_between[insert_index - 1]; + insert_pos = Cursor { + leaf: other_leaf, + offset: other_elem.rle_len(), + }; } + } else { + let mut scanning = false; + let mut visited: SmallVec<[IdSpan; 4]> = Default::default(); + for (other_leaf, other_elem) in in_between.iter() { + // tracing::info!("Visiting {}", &other_elem.id); + let other_origin_left = other_elem.origin_left; + if other_origin_left != content.origin_left + && other_origin_left + .map(|left| visited.iter().all(|x| !x.contains_id(left.to_id()))) + .unwrap_or(true) + { + // The other_elem's origin_left must be at the left side of content's origin_left. + // So the content must be at the left side of other_elem. + + // tracing::info!("Break because the node's origin_left is at the left side of new_elem's origin left"); + break; + } - visited.push(IdSpan::new( - other_elem.id.peer, - other_elem.id.counter, - other_elem.id.counter + other_elem.rle_len() as Counter, - )); - - if content.origin_left == other_origin_left { - if other_elem.origin_right == content.origin_right { - // tracing::info!("Same right parent"); - // Same right parent - if other_elem.id.peer > content.id.peer { - // tracing::info!("Break on larger peer"); - break; + visited.push(IdSpan::new( + other_elem.id.peer, + other_elem.id.counter, + other_elem.id.counter + other_elem.rle_len() as Counter, + )); + + if content.origin_left == other_origin_left { + if other_elem.origin_right == content.origin_right { + // tracing::info!("Same right parent"); + // Same right parent + if other_elem.id.peer > content.id.peer { + // tracing::info!("Break on larger peer"); + break; + } else { + scanning = false; + } } else { - scanning = false; - } - } else { - // tracing::info!("Different right parent"); - // Different right parent, we need to compare the right parents' position - - let other_parent_right_idx = - if let Some(other_origin_right) = other_elem.origin_right { - let elem_idx = find_elem(other_origin_right.to_id()); - let elem = self.tree.get_elem(elem_idx).unwrap(); - // It must be the start of the elem - assert_eq!(elem.id.id(), other_origin_right.to_id()); - if elem.origin_left == content.origin_left { - Some(elem_idx) + // tracing::info!("Different right parent"); + // Different right parent, we need to compare the right parents' position + + let other_parent_right_idx = + if let Some(other_origin_right) = other_elem.origin_right { + let elem_idx = find_elem(other_origin_right.to_id()); + let elem = self.tree.get_elem(elem_idx).unwrap(); + // It must be the start of the elem + assert_eq!(elem.id.id(), other_origin_right.to_id()); + if elem.origin_left == content.origin_left { + Some(elem_idx) + } else { + None + } } else { None - } - } else { - None - }; + }; - match self.cmp_pos(other_parent_right_idx, parent_right_leaf) { - Ordering::Less => { - // tracing::info!("Less"); - scanning = true; - } - Ordering::Equal if other_elem.id.peer > content.id.peer => { - // tracing::info!("Break on eq"); - break; - } - _ => { - // tracing::info!("Scanning"); - scanning = false; + match self.cmp_pos(other_parent_right_idx, parent_right_leaf) { + Ordering::Less => { + // tracing::info!("Less"); + scanning = true; + } + Ordering::Equal if other_elem.id.peer > content.id.peer => { + // tracing::info!("Break on eq"); + break; + } + _ => { + // tracing::info!("Scanning"); + scanning = false; + } } } } - } - if !scanning { - insert_pos = Cursor { - leaf: *other_leaf, - offset: other_elem.rle_len(), - }; - // tracing::info!("updating insert pos {:?}", &insert_pos); + if !scanning { + insert_pos = Cursor { + leaf: *other_leaf, + offset: other_elem.rle_len(), + }; + // tracing::info!("updating insert pos {:?}", &insert_pos); + } } } + #[cfg(feature = "test_utils")] + crate::diff_calc::profiling::record_richtext_insert_future_scan( + future_scan_start.elapsed(), + future_scan_visited, + ); } // tracing::info!("Inserting at {:?}", insert_pos); @@ -717,6 +744,38 @@ mod test { span } + fn leaf_of(rope: &CrdtRope, id: ID) -> LeafIndex { + for iter in rope.tree.iter_range(..) { + if iter.elem.id_span().contains_id(id) { + return iter.cursor().leaf; + } + } + + panic!("cannot find leaf for {id:?}") + } + + fn leaf_lookup(rope: &CrdtRope) -> Vec<(IdSpan, LeafIndex)> { + rope.tree + .iter_range(..) + .map(|iter| (iter.elem.id_span(), iter.cursor().leaf)) + .collect() + } + + fn lookup_leaf(lookup: &[(IdSpan, LeafIndex)], id: ID) -> LeafIndex { + lookup + .iter() + .find_map(|(span, leaf)| span.contains_id(id).then_some(*leaf)) + .unwrap_or_else(|| panic!("cannot find leaf for {id:?}")) + } + + fn future_peers(rope: &CrdtRope) -> Vec { + rope.tree + .iter() + .filter(|span| span.status.future) + .map(|span| span.id.peer) + .collect() + } + #[test] fn len_test() { let mut rope = CrdtRope::new(); @@ -852,6 +911,75 @@ mod test { assert_eq!(split.len(), 0); } + #[test] + fn same_parent_future_spans_keep_peer_order() { + let mut rope = CrdtRope::new(); + rope.insert(0, span(0, 0..10), |_| panic!()); + for peer in [5, 3, 7, 4] { + rope.insert(5, future_span(peer, peer * 10..peer * 10 + 1), |_| panic!()); + } + + assert_eq!(future_peers(&rope), vec![3, 4, 5, 7]); + for span in rope.tree.iter().filter(|span| span.status.future) { + assert_eq!(span.origin_left, Some(CompactId::new(0, 4))); + assert_eq!(span.origin_right, Some(CompactId::new(0, 5))); + } + } + + #[test] + fn same_parent_future_spans_keep_order_after_retreat_forward() { + let mut rope = CrdtRope::new(); + rope.insert(0, span(0, 0..10), |_| panic!()); + rope.insert(5, future_span(5, 50..51), |_| panic!()); + rope.insert(5, future_span(3, 30..31), |_| panic!()); + assert_eq!(future_peers(&rope), vec![3, 5]); + + let leaf = leaf_of(&rope, ID::new(3, 0)); + rope.update( + vec![LeafUpdate { + leaf, + id_span: IdSpan::new(3, 0, 1), + set_future: Some(false), + delete_times_diff: 0, + }], + false, + ); + assert_eq!(future_peers(&rope), vec![5]); + + let leaf = leaf_of(&rope, ID::new(3, 0)); + rope.update( + vec![LeafUpdate { + leaf, + id_span: IdSpan::new(3, 0, 1), + set_future: Some(true), + delete_times_diff: 0, + }], + false, + ); + rope.insert(5, future_span(4, 40..41), |_| panic!()); + + assert_eq!(future_peers(&rope), vec![3, 4, 5]); + } + + #[test] + fn mixed_right_parent_future_spans_fall_back_to_general_ordering() { + let mut rope = CrdtRope::new(); + rope.insert(0, span(0, 0..10), |_| panic!()); + rope.insert(5, future_span(2, 20..21), |_| panic!()); + rope.insert(5, span(9, 90..91), |_| panic!()); + + let lookup = leaf_lookup(&rope); + let inserted = rope.insert(5, future_span(3, 30..31), |id| lookup_leaf(&lookup, id)); + assert_eq!(inserted.content.origin_left, Some(CompactId::new(0, 4))); + assert_eq!(inserted.content.origin_right, Some(CompactId::new(9, 0))); + assert_eq!(future_peers(&rope), vec![2, 3]); + + rope.delete(ID::new(10, 0), 5, 1, false, &mut |_| {}); + let lookup = leaf_lookup(&rope); + rope.insert(5, future_span(4, 40..41), |id| lookup_leaf(&lookup, id)); + assert_eq!(future_peers(&rope), vec![2, 3, 4]); + } + #[test] fn checkout() { let mut rope = CrdtRope::new(); diff --git a/crates/loro-internal/src/container/richtext/tracker/id_to_cursor.rs b/crates/loro-internal/src/container/richtext/tracker/id_to_cursor.rs index d716069a3..7d7d16d1f 100644 --- a/crates/loro-internal/src/container/richtext/tracker/id_to_cursor.rs +++ b/crates/loro-internal/src/container/richtext/tracker/id_to_cursor.rs @@ -200,11 +200,15 @@ impl IdToCursor { } pub fn iter(&self, mut iter_id_span: IdSpan) -> impl Iterator + '_ { + #[cfg(feature = "test_utils")] + crate::diff_calc::profiling::record_richtext_id_to_cursor_iter_call(); iter_id_span.normalize_(); let list = self.map.get(&iter_id_span.peer).unwrap_or(&EMPTY_VEC); // Index in the list let mut index = 0; let mut insert_set_iter: Option>> = None; + #[cfg(feature = "test_utils")] + let mut yielded = false; if !list.is_empty() { index = match list.binary_search_by_key(&iter_id_span.counter.start, |x| x.counter) { @@ -215,6 +219,10 @@ impl IdToCursor { std::iter::from_fn(move || loop { if index >= list.len() { + #[cfg(feature = "test_utils")] + if !yielded { + crate::diff_calc::profiling::record_richtext_id_to_cursor_empty_iter(); + } return None; } @@ -227,12 +235,20 @@ impl IdToCursor { continue; }; + #[cfg(feature = "test_utils")] + { + yielded = true; + } return Some(next); } let f = &list[index]; let iter_counter = f.counter; if iter_counter >= iter_id_span.counter.end { + #[cfg(feature = "test_utils")] + if !yielded { + crate::diff_calc::profiling::record_richtext_id_to_cursor_empty_iter(); + } return None; } @@ -262,11 +278,19 @@ impl IdToCursor { continue; } + #[cfg(feature = "test_utils")] + { + yielded = true; + } return Some(IterCursor::Delete(span.slice(from as usize, to as usize))); } Cursor::Move { from, to } => { index += 1; let op_id = ID::new(iter_id_span.peer, f.counter); + #[cfg(feature = "test_utils")] + { + yielded = true; + } return Some(IterCursor::Move { from_id: *from, to_leaf: *to, diff --git a/crates/loro-internal/src/diff/diff_impl.rs b/crates/loro-internal/src/diff/diff_impl.rs index aef44cd5b..79a7a9b07 100644 --- a/crates/loro-internal/src/diff/diff_impl.rs +++ b/crates/loro-internal/src/diff/diff_impl.rs @@ -15,6 +15,7 @@ //! The implementation of this algorithm is based on the implementation by //! Brandon Williams. use crate::change::get_sys_timestamp; +use loro_common::ContainerID; use rustc_hash::FxHashMap; use std::cmp::Ordering; use std::collections::BinaryHeap; @@ -44,6 +45,10 @@ impl Default for UpdateOptions { pub enum UpdateTimeoutError { #[error("Timeout")] Timeout, + #[error( + "The container {container} is deleted. You cannot apply the op on a deleted container." + )] + ContainerDeleted { container: Box }, } /// Utility function to check if a range is empty that works on older rust versions diff --git a/crates/loro-internal/src/diff_calc.rs b/crates/loro-internal/src/diff_calc.rs index 71bb4c2c5..dbc7fbe5e 100644 --- a/crates/loro-internal/src/diff_calc.rs +++ b/crates/loro-internal/src/diff_calc.rs @@ -12,7 +12,8 @@ use itertools::Itertools; use enum_dispatch::enum_dispatch; use loro_common::{ - CompactIdLp, ContainerID, Counter, HasCounterSpan, IdFull, IdLp, IdSpan, LoroValue, PeerID, ID, + CompactIdLp, ContainerID, Counter, CounterSpan, HasCounterSpan, IdFull, IdLp, IdSpan, + LoroValue, PeerID, ID, }; use loro_delta::DeltaRope; use rustc_hash::{FxHashMap, FxHashSet}; @@ -26,7 +27,8 @@ use crate::{ list::list_op::InnerListOp, richtext::{ richtext_state::{RichtextStateChunk, TextChunk}, - AnchorType, CrdtRopeDelta, RichtextChunk, RichtextChunkValue, RichtextTracker, StyleOp, + AnchorType, CrdtRopeDelta, PeerSpanCoverage, RichtextChunk, RichtextChunkValue, + RichtextTracker, StyleOp, TrackerMaterializedVersion, }, }, cursor::AbsolutePosition, @@ -36,7 +38,7 @@ use crate::{ event::{DiffVariant, InternalDiff}, op::{InnerContent, RichOp, SliceRange, SliceWithId}, span::{HasId, HasLamport}, - version::Frontiers, + version::{CausalVersion, Frontiers}, InternalString, VersionVector, }; @@ -46,6 +48,132 @@ use self::unknown::UnknownDiffCalculator; use super::{event::InternalContainerDiff, oplog::OpLog}; +#[cfg(feature = "test_utils")] +pub(crate) mod profiling { + use std::{cell::RefCell, time::Duration}; + + #[derive(Debug, Clone, Copy, Default)] + pub(crate) struct DiffCalcProfile { + pub richtext_tracker_checkout: Duration, + pub richtext_tracker_diff: Duration, + pub richtext_delta_build: Duration, + pub richtext_insert_future_scan: Duration, + pub causal_vv_materialize: Duration, + pub richtext_tracker_checkout_count: u64, + pub richtext_tracker_diff_count: u64, + pub richtext_delta_build_count: u64, + pub richtext_insert_future_scan_count: u64, + pub richtext_insert_future_scan_visited: u64, + pub richtext_insert_future_scan_max_visited: usize, + pub causal_vv_materialize_count: u64, + pub max_causal_vv_width: usize, + pub richtext_tracker_span_filter_count: u64, + pub richtext_tracker_span_count: u64, + pub richtext_tracker_filtered_span_count: u64, + pub richtext_tracker_skipped_span_count: u64, + pub richtext_tracker_max_span_count: usize, + pub richtext_tracker_max_filtered_span_count: usize, + pub richtext_id_to_cursor_iter_count: u64, + pub richtext_id_to_cursor_empty_iter_count: u64, + } + + thread_local! { + static PROFILE: RefCell> = const { RefCell::new(None) }; + } + + pub(crate) fn begin() { + PROFILE.with(|profile| { + *profile.borrow_mut() = Some(DiffCalcProfile::default()); + }); + } + + pub(crate) fn finish() -> DiffCalcProfile { + PROFILE.with(|profile| profile.borrow_mut().take().unwrap_or_default()) + } + + pub(crate) fn record_richtext_tracker_checkout(duration: Duration) { + PROFILE.with(|profile| { + if let Some(profile) = profile.borrow_mut().as_mut() { + profile.richtext_tracker_checkout += duration; + profile.richtext_tracker_checkout_count += 1; + } + }); + } + + pub(crate) fn record_richtext_tracker_diff(duration: Duration) { + PROFILE.with(|profile| { + if let Some(profile) = profile.borrow_mut().as_mut() { + profile.richtext_tracker_diff += duration; + profile.richtext_tracker_diff_count += 1; + } + }); + } + + pub(crate) fn record_richtext_delta_build(duration: Duration) { + PROFILE.with(|profile| { + if let Some(profile) = profile.borrow_mut().as_mut() { + profile.richtext_delta_build += duration; + profile.richtext_delta_build_count += 1; + } + }); + } + + pub(crate) fn record_richtext_insert_future_scan(duration: Duration, visited: usize) { + PROFILE.with(|profile| { + if let Some(profile) = profile.borrow_mut().as_mut() { + profile.richtext_insert_future_scan += duration; + profile.richtext_insert_future_scan_count += 1; + profile.richtext_insert_future_scan_visited += visited as u64; + profile.richtext_insert_future_scan_max_visited = + profile.richtext_insert_future_scan_max_visited.max(visited); + } + }); + } + + pub(crate) fn record_causal_vv_materialize(duration: Duration, width: usize) { + PROFILE.with(|profile| { + if let Some(profile) = profile.borrow_mut().as_mut() { + profile.causal_vv_materialize += duration; + profile.causal_vv_materialize_count += 1; + profile.max_causal_vv_width = profile.max_causal_vv_width.max(width); + } + }); + } + + pub(crate) fn record_richtext_tracker_span_filter(input: usize, filtered: usize) { + PROFILE.with(|profile| { + if let Some(profile) = profile.borrow_mut().as_mut() { + profile.richtext_tracker_span_filter_count += 1; + profile.richtext_tracker_span_count += input as u64; + profile.richtext_tracker_filtered_span_count += filtered as u64; + profile.richtext_tracker_skipped_span_count += + input.saturating_sub(filtered) as u64; + profile.richtext_tracker_max_span_count = + profile.richtext_tracker_max_span_count.max(input); + profile.richtext_tracker_max_filtered_span_count = profile + .richtext_tracker_max_filtered_span_count + .max(filtered); + } + }); + } + + pub(crate) fn record_richtext_id_to_cursor_iter_call() { + PROFILE.with(|profile| { + if let Some(profile) = profile.borrow_mut().as_mut() { + profile.richtext_id_to_cursor_iter_count += 1; + } + }); + } + + pub(crate) fn record_richtext_id_to_cursor_empty_iter() { + PROFILE.with(|profile| { + if let Some(profile) = profile.borrow_mut().as_mut() { + profile.richtext_id_to_cursor_empty_iter_count += 1; + } + }); + } +} + /// Calculate the diff between two versions. given [OpLog][super::oplog::OpLog] /// and [AppState][super::state::AppState]. /// @@ -172,7 +300,7 @@ impl DiffCalculator { let affected_set = { loro_common::debug!("LCA: {:?} mode={:?}", &lca, diff_mode); let mut started_set = FxHashSet::default(); - for (change, (start_counter, end_counter), vv) in iter { + for (change, (start_counter, end_counter), base_vv, _base_frontiers) in iter { let iter_start = change .ops .binary_search_by(|op| op.ctr_last().cmp(&start_counter)) @@ -205,8 +333,7 @@ impl DiffCalculator { op = stack_sliced_op.as_ref().unwrap(); } - let vv = &mut vv.borrow_mut(); - vv.extend_to_include_end_id(ID::new(change.peer(), op.counter)); + let causal_vv = CausalVersion::new(&base_vv, change.peer(), op.counter); let container = op.container; let depth = oplog.arena.get_depth(container); let (old_depth, calculator) = self.get_or_create_calc(container, depth); @@ -221,14 +348,21 @@ impl DiffCalculator { calculator.start_tracking(oplog, &lca, diff_mode); } - if visited.contains(&op.container) { + // Movable-list move replay needs the before-op causal VV for + // history-cache last_pos lookups. The tracker's materialized + // version is only a container projection and may not include + // causal deps that inserted the moved element, so it cannot + // replace CausalVersion here. + let should_reuse_container_checkout = visited.contains(&op.container) + && !matches!(calculator, ContainerDiffCalculator::MovableList(_)); + if should_reuse_container_checkout { // don't checkout if we have already checked out this container in this round calculator.apply_change(oplog, RichOp::new_by_change(&change, op), None); } else { calculator.apply_change( oplog, RichOp::new_by_change(&change, op), - Some(vv), + Some(causal_vv), ); visited.insert(container); } @@ -348,7 +482,7 @@ impl DiffCalculator { .or_insert_with(|| match idx.get_type() { crate::ContainerType::Text => ( depth, - ContainerDiffCalculator::Richtext(RichtextDiffCalculator::new()), + ContainerDiffCalculator::Richtext(RichtextDiffCalculator::new(idx)), ), crate::ContainerType::Map => ( depth, @@ -389,12 +523,7 @@ impl DiffCalculator { #[enum_dispatch] pub(crate) trait DiffCalculatorTrait { fn start_tracking(&mut self, oplog: &OpLog, vv: &crate::VersionVector, mode: DiffMode); - fn apply_change( - &mut self, - oplog: &OpLog, - op: crate::op::RichOp, - vv: Option<&crate::VersionVector>, - ); + fn apply_change(&mut self, oplog: &OpLog, op: crate::op::RichOp, vv: Option>); fn calculate_diff( &mut self, idx: ContainerIdx, @@ -451,7 +580,7 @@ impl DiffCalculatorTrait for MapDiffCalculator { &mut self, _oplog: &crate::OpLog, op: crate::op::RichOp, - _vv: Option<&crate::VersionVector>, + _vv: Option>, ) { if matches!(self.current_mode, DiffMode::Checkout) { // We need to use history cache anyway @@ -554,8 +683,12 @@ use rle::{HasLength as _, Sliceable}; #[derive(Default)] pub(crate) struct ListDiffCalculator { - start_vv: VersionVector, + start_vv: Box, + // Stable version currently materialized in `tracker`. After each checkout + // diff calculation it must equal the coverage-local projection of `from`. + materialized: TrackerMaterializedVersion, tracker: Box, + coverage: Box, } impl ListDiffCalculator { @@ -582,28 +715,46 @@ impl std::fmt::Debug for ListDiffCalculator { impl DiffCalculatorTrait for ListDiffCalculator { fn start_tracking(&mut self, _oplog: &OpLog, vv: &crate::VersionVector, _mode: DiffMode) { - if !vv.includes_vv(&self.start_vv) || !self.tracker.all_vv().includes_vv(vv) { + if !version_includes_covered_start(vv, self.start_vv.as_ref(), self.coverage.as_ref()) + || !tracker_has_covered_ops(&self.tracker, vv, self.coverage.as_ref()) + { *self.tracker = RichtextTracker::new_with_unknown(); - self.start_vv = vv.clone(); + *self.start_vv = vv.clone(); + self.coverage.clear(); + self.materialized + .reset_to_version_projection(vv, self.coverage.as_ref()); } - self.tracker.checkout(vv); + richtext_tracker_checkout_with_coverage( + &mut self.tracker, + &mut self.materialized, + vv, + self.coverage.as_ref(), + ); + self.materialized + .debug_assert_matches_version_projection(vv, self.coverage.as_ref()); } fn apply_change( &mut self, _oplog: &OpLog, op: crate::op::RichOp, - vv: Option<&crate::VersionVector>, + vv: Option>, ) { if let Some(vv) = vv { - self.tracker.checkout(vv); + richtext_tracker_checkout_causal_with_coverage( + &mut self.tracker, + &mut self.materialized, + vv, + self.coverage.as_ref(), + ); } match &op.op().content { crate::op::InnerContent::List(l) => match l { InnerListOp::Insert { slice, pos } => { self.tracker.insert( + &mut self.materialized, op.id_full(), *pos, RichtextChunk::new_text(slice.0.clone()), @@ -611,6 +762,7 @@ impl DiffCalculatorTrait for ListDiffCalculator { } InnerListOp::Delete(del) => { self.tracker.delete( + &mut self.materialized, op.id_start(), del.id_start, del.start() as usize, @@ -622,6 +774,8 @@ impl DiffCalculatorTrait for ListDiffCalculator { }, _ => unreachable!(), } + + record_op_coverage(self.coverage.as_mut(), &op); } fn finish_this_round(&mut self) {} @@ -634,7 +788,13 @@ impl DiffCalculatorTrait for ListDiffCalculator { mut on_new_container: impl FnMut(&ContainerID), ) -> (InternalDiff, DiffMode) { let mut delta = Delta::new(); - for item in self.tracker.diff(info.from_vv, info.to_vv) { + let diff_iter = self.tracker.diff_with_coverage( + &mut self.materialized, + info.from_vv, + info.to_vv, + self.coverage.as_ref(), + ); + for item in diff_iter { match item { CrdtRopeDelta::Retain(len) => { delta = delta.retain(len); @@ -744,16 +904,19 @@ impl DiffCalculatorTrait for ListDiffCalculator { } } - debug_assert_eq!(acc_len, len as usize); + debug_assert!(acc_len <= len as usize); delta } + self.materialized + .debug_assert_matches_version_projection(info.from_vv, self.coverage.as_ref()); (InternalDiff::ListRaw(delta), DiffMode::Checkout) } } #[derive(Debug)] pub(crate) struct RichtextDiffCalculator { + container_idx: ContainerIdx, mode: Box, } @@ -761,9 +924,11 @@ pub(crate) struct RichtextDiffCalculator { enum RichtextCalcMode { Crdt { tracker: Box, + materialized: TrackerMaterializedVersion, /// (op, end_pos) styles: Vec<(StyleOp, usize)>, start_vv: VersionVector, + coverage: PeerSpanCoverage, }, Linear { diff: DeltaRope, @@ -772,12 +937,15 @@ enum RichtextCalcMode { } impl RichtextDiffCalculator { - pub fn new() -> Self { + pub fn new(container_idx: ContainerIdx) -> Self { Self { + container_idx, mode: Box::new(RichtextCalcMode::Crdt { tracker: Box::new(RichtextTracker::new_with_unknown()), + materialized: TrackerMaterializedVersion::default(), styles: Vec::new(), start_vv: VersionVector::new(), + coverage: PeerSpanCoverage::default(), }), } } @@ -795,10 +963,135 @@ impl RichtextDiffCalculator { } } +#[cfg(feature = "test_utils")] +fn richtext_tracker_checkout_with_coverage( + tracker: &mut RichtextTracker, + materialized: &mut TrackerMaterializedVersion, + vv: &VersionVector, + coverage: &PeerSpanCoverage, +) { + let start = std::time::Instant::now(); + materialized.checkout_to_version(tracker, vv, coverage); + profiling::record_richtext_tracker_checkout(start.elapsed()); +} + +#[cfg(feature = "test_utils")] +fn richtext_tracker_checkout_causal_with_coverage( + tracker: &mut RichtextTracker, + materialized: &mut TrackerMaterializedVersion, + vv: CausalVersion<'_>, + coverage: &PeerSpanCoverage, +) { + let start = std::time::Instant::now(); + materialized.checkout_to_causal(tracker, vv, coverage); + profiling::record_richtext_tracker_checkout(start.elapsed()); +} + +#[cfg(not(feature = "test_utils"))] +fn richtext_tracker_checkout_with_coverage( + tracker: &mut RichtextTracker, + materialized: &mut TrackerMaterializedVersion, + vv: &VersionVector, + coverage: &PeerSpanCoverage, +) { + materialized.checkout_to_version(tracker, vv, coverage); +} + +#[cfg(not(feature = "test_utils"))] +fn richtext_tracker_checkout_causal_with_coverage( + tracker: &mut RichtextTracker, + materialized: &mut TrackerMaterializedVersion, + vv: CausalVersion<'_>, + coverage: &PeerSpanCoverage, +) { + materialized.checkout_to_causal(tracker, vv, coverage); +} + +fn seed_coverage_from_state_chunks(coverage: &mut PeerSpanCoverage, chunks: &[RichtextStateChunk]) { + coverage.clear(); + for chunk in chunks { + let RichtextStateChunk::Text(text) = chunk else { + continue; + }; + let id = text.id(); + record_coverage_span( + coverage, + IdSpan::new( + id.peer, + id.counter, + id.counter + text.unicode_len() as Counter, + ), + ); + } +} + +fn record_op_coverage(coverage: &mut PeerSpanCoverage, op: &crate::op::RichOp<'_>) { + record_coverage_span( + coverage, + IdSpan::new( + op.peer, + op.counter(), + op.counter() + op.atom_len() as Counter, + ), + ); +} + +fn record_coverage_span(coverage: &mut PeerSpanCoverage, span: IdSpan) { + if span.peer == PeerID::MAX || span.atom_len() == 0 { + return; + } + + let start = span.counter.min(); + let end = span.counter.norm_end(); + coverage + .entry(span.peer) + .and_modify(|coverage| { + let start = coverage.min().min(start); + let end = coverage.norm_end().max(end); + *coverage = CounterSpan::new(start, end); + }) + .or_insert_with(|| CounterSpan::new(start, end)); +} + +fn version_includes_covered_start( + target: &VersionVector, + start: &VersionVector, + _coverage: &PeerSpanCoverage, +) -> bool { + // `start_vv` is the lower bound used when the reusable tracker was last + // rebuilt from unknown. Even though tracker checkout only applies covered + // spans, the tracker sequence can contain positional anchors that are needed + // to replay later covered ops. Reusing it before this full lower bound risks + // preserving anchors from a future version. + target.includes_vv(start) +} + +fn tracker_has_covered_ops( + tracker: &RichtextTracker, + target: &VersionVector, + _coverage: &PeerSpanCoverage, +) -> bool { + // Coverage is only a filter for tracker checkout work; it is not a proof + // that every earlier op affecting positions has been materialized. Reusing + // a tracker across a causal gap can make later positional ops, such as text + // deletes or movable-list moves, apply against the wrong local sequence. + tracker.all_vv().includes_vv(target) +} + +fn materialize_causal_version(vv: CausalVersion<'_>) -> VersionVector { + let mut version = VersionVector::from_im_vv(vv.base()); + let peer_end = vv.peer_end(); + if peer_end > version.get(&vv.peer()).copied().unwrap_or(0) { + version.insert(vv.peer(), peer_end); + } + + version +} + impl DiffCalculatorTrait for RichtextDiffCalculator { fn start_tracking( &mut self, - _oplog: &super::oplog::OpLog, + oplog: &super::oplog::OpLog, vv: &crate::VersionVector, mode: DiffMode, ) { @@ -819,16 +1112,49 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { match &mut *self.mode { RichtextCalcMode::Crdt { tracker, + materialized, styles, start_vv, + coverage, } => { - if !vv.includes_vv(start_vv) || !tracker.all_vv().includes_vv(vv) { + let shallow_root_vv = oplog.dag().frontiers_to_vv(oplog.shallow_since_frontiers()); + if shallow_root_vv.as_ref() == Some(vv) { + let chunks = oplog + .with_history_cache(|h| h.text_chunks_at_shallow_root(self.container_idx)); + if let Some(chunks) = chunks { + let mut seeded_styles = Vec::new(); + if let Some(seeded_tracker) = + RichtextTracker::new_from_state_chunks(&chunks, &mut seeded_styles) + { + **tracker = seeded_tracker; + *styles = seeded_styles; + *start_vv = vv.clone(); + seed_coverage_from_state_chunks(coverage, &chunks); + materialized.reset_to_version_projection(vv, coverage); + richtext_tracker_checkout_with_coverage( + tracker, + materialized, + vv, + coverage, + ); + materialized.debug_assert_matches_version_projection(vv, coverage); + return; + } + } + } + + if !version_includes_covered_start(vv, start_vv, coverage) + || !tracker_has_covered_ops(tracker, vv, coverage) + { **tracker = RichtextTracker::new_with_unknown(); styles.clear(); *start_vv = vv.clone(); + coverage.clear(); + materialized.reset_to_version_projection(vv, coverage); } - tracker.checkout(vv); + richtext_tracker_checkout_with_coverage(tracker, materialized, vv, coverage); + materialized.debug_assert_matches_version_projection(vv, coverage); } RichtextCalcMode::Linear { .. } => {} } @@ -838,7 +1164,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { &mut self, oplog: &super::oplog::OpLog, op: crate::op::RichOp, - vv: Option<&crate::VersionVector>, + vv: Option>, ) { match &mut *self.mode { RichtextCalcMode::Linear { @@ -937,11 +1263,18 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { }, RichtextCalcMode::Crdt { tracker, + materialized, styles, start_vv: _, + coverage, } => { if let Some(vv) = vv { - tracker.checkout(vv); + richtext_tracker_checkout_causal_with_coverage( + tracker, + materialized, + vv, + coverage, + ); } match &op.raw_op().content { crate::op::InnerContent::List(l) => match l { @@ -957,6 +1290,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { pos, } => { tracker.insert( + materialized, op.id_full(), *pos as usize, RichtextChunk::new_text(*unicode_start..*unicode_start + *len), @@ -964,6 +1298,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { } InnerListOp::Delete(del) => { tracker.delete( + materialized, op.id_start(), del.id_start, del.start() as usize, @@ -992,6 +1327,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { *end as usize, )); tracker.insert( + materialized, op.id_full(), *start as usize, RichtextChunk::new_style_anchor(style_id as u32, AnchorType::Start), @@ -1005,6 +1341,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { let style_id = styles.len() - pos - 1; let (_start_op, end_pos) = &styles[style_id]; tracker.insert( + materialized, op.id_full(), // need to shift 1 because we insert the start style anchor before this pos *end_pos + 1, @@ -1045,6 +1382,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { )); let style_id = styles.len() - 1; tracker.insert( + materialized, op.id_full(), // need to shift 1 because we insert the start style anchor before this pos *end as usize + 1, @@ -1058,6 +1396,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { }, _ => unreachable!(), } + record_op_coverage(coverage, &op); } } } @@ -1075,10 +1414,22 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { DiffMode::Linear, ), RichtextCalcMode::Crdt { - tracker, styles, .. + tracker, + materialized, + styles, + coverage, + .. } => { let mut delta = DeltaRope::new(); - for item in tracker.diff(info.from_vv, info.to_vv) { + #[cfg(feature = "test_utils")] + let tracker_diff_start = std::time::Instant::now(); + let diff_iter = + tracker.diff_with_coverage(materialized, info.from_vv, info.to_vv, coverage); + #[cfg(feature = "test_utils")] + profiling::record_richtext_tracker_diff(tracker_diff_start.elapsed()); + #[cfg(feature = "test_utils")] + let delta_build_start = std::time::Instant::now(); + for item in diff_iter { match item { CrdtRopeDelta::Retain(len) => { delta.push_retain(len, ()); @@ -1158,7 +1509,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { } } - debug_assert_eq!(acc_len, len as usize); + debug_assert!(acc_len <= len as usize); } RichtextChunkValue::MoveAnchor => unreachable!(), }, @@ -1167,7 +1518,10 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { } } } + #[cfg(feature = "test_utils")] + profiling::record_richtext_delta_build(delta_build_start.elapsed()); + materialized.debug_assert_matches_version_projection(info.from_vv, coverage); (InternalDiff::RichtextRaw(delta), DiffMode::Checkout) } } @@ -1202,12 +1556,26 @@ struct MovableListInner { impl DiffCalculatorTrait for MovableListDiffCalculator { fn start_tracking(&mut self, _oplog: &OpLog, vv: &crate::VersionVector, mode: DiffMode) { - if !vv.includes_vv(&self.list.start_vv) || !self.list.tracker.all_vv().includes_vv(vv) { + if !version_includes_covered_start(vv, self.list.start_vv.as_ref(), &self.list.coverage) + || !tracker_has_covered_ops(&self.list.tracker, vv, &self.list.coverage) + { *self.list.tracker = RichtextTracker::new_with_unknown(); - self.list.start_vv = vv.clone(); + *self.list.start_vv = vv.clone(); + self.list.coverage.clear(); + self.list + .materialized + .reset_to_version_projection(vv, &self.list.coverage); } - self.list.tracker.checkout(vv); + richtext_tracker_checkout_with_coverage( + &mut self.list.tracker, + &mut self.list.materialized, + vv, + self.list.coverage.as_ref(), + ); + self.list + .materialized + .debug_assert_matches_version_projection(vv, self.list.coverage.as_ref()); self.inner.current_mode = mode; } @@ -1215,7 +1583,7 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { &mut self, oplog: &OpLog, op: crate::op::RichOp, - vv: Option<&crate::VersionVector>, + vv: Option>, ) { let InnerContent::List(l) = &op.raw_op().content else { unreachable!() @@ -1300,22 +1668,33 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { { // Apply change on the list items let this = &mut self.list; + let causal_lookup_vv = vv.map(materialize_causal_version); if let Some(vv) = vv { - this.tracker.checkout(vv); + richtext_tracker_checkout_causal_with_coverage( + &mut this.tracker, + &mut this.materialized, + vv, + this.coverage.as_ref(), + ); } let real_op = op.op(); + let mut updates_tracker = false; match &real_op.content { crate::op::InnerContent::List(l) => match l { InnerListOp::Insert { slice, pos } => { + updates_tracker = true; this.tracker.insert( + &mut this.materialized, op.id_full(), *pos, RichtextChunk::new_text(slice.0.clone()), ); } InnerListOp::Delete(del) => { + updates_tracker = true; this.tracker.delete( + &mut this.materialized, op.id_start(), del.id_start, del.start() as usize, @@ -1324,20 +1703,37 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { ); } InnerListOp::Move { from, elem_id, to } => { + updates_tracker = true; self.inner.move_id_to_elem_id.insert(op.id(), *elem_id); - if !this.tracker.current_vv().includes_id(op.id()) { + if !this.materialized.includes_id(op.id()) { let last_pos = if is_checkout { // TODO: PERF: this lookup can be optimized oplog.with_history_cache(|h| { let list = &h.get_checkout_index().movable_list; + let lookup_vv = causal_lookup_vv + .as_ref() + .unwrap_or(this.materialized.as_vv()); + // Invariant: a valid move's elem_id must have a previous + // position in its before-op causal VV. The original insert + // causally precedes the move in full history; shallow docs + // seed visible root elements into the checkout history cache. + // Missing last_pos means either the op log or the replay VV is + // inconsistent, so failing fast is better than corrupting state. list.last_pos( *elem_id, - this.tracker.current_vv(), + lookup_vv, // TODO: PERF: Provide the lamport of to version Lamport::MAX, oplog, ) - .unwrap() + .unwrap_or_else(|| { + panic!( + "missing previous movable-list position: move_op={:?}, elem_id={:?}, lookup_vv={:?}", + op.id(), + elem_id, + lookup_vv + ) + }) .id() }) } else { @@ -1355,6 +1751,7 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { FAKE_ID }; this.tracker.move_item( + &mut this.materialized, op.id_full(), last_pos, *from as usize, @@ -1371,6 +1768,9 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { }, _ => unreachable!(), } + if updates_tracker { + record_op_coverage(this.coverage.as_mut(), &op); + } }; } @@ -1463,9 +1863,17 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { return false; }; // TODO: PERF: Provide the lamport of to version + // Invariant: if an element has a position at to_vv, it must also + // have a value at to_vv. Full history falls back to the original + // insert value; shallow roots record position and value together. let value = checkout_index .last_value(id, info.to_vv, Lamport::MAX, oplog) - .unwrap(); + .unwrap_or_else(|| { + panic!( + "missing movable-list value for positioned element: elem_id={:?}, to_vv={:?}", + id, info.to_vv + ) + }); // TODO: PERF: Provide the lamport of to version let old_pos = checkout_index.last_pos(id, info.from_vv, Lamport::MAX, oplog); // TODO: PERF: Provide the lamport of to version @@ -1520,7 +1928,10 @@ impl MovableListDiffCalculator { #[test] fn test_size() { - let text = RichtextDiffCalculator::new(); + let text = RichtextDiffCalculator::new(ContainerIdx::from_index_and_type( + 0, + loro_common::ContainerType::Text, + )); let size = std::mem::size_of_val(&text); assert!(size < 50, "RichtextDiffCalculator size: {}", size); let list = MovableListDiffCalculator::new(ContainerIdx::from_index_and_type( diff --git a/crates/loro-internal/src/diff_calc/counter.rs b/crates/loro-internal/src/diff_calc/counter.rs index bf0663b54..9c1627c91 100644 --- a/crates/loro-internal/src/diff_calc/counter.rs +++ b/crates/loro-internal/src/diff_calc/counter.rs @@ -2,7 +2,7 @@ use std::collections::BTreeMap; use loro_common::{ContainerID, ID}; -use crate::{container::idx::ContainerIdx, event::InternalDiff, OpLog}; +use crate::{container::idx::ContainerIdx, event::InternalDiff, version::CausalVersion, OpLog}; use super::{DiffCalcVersionInfo, DiffCalculatorTrait, DiffMode}; @@ -26,7 +26,7 @@ impl DiffCalculatorTrait for CounterDiffCalculator { &mut self, _oplog: &OpLog, op: crate::op::RichOp, - _vv: Option<&crate::VersionVector>, + _vv: Option>, ) { let id = op.id(); self.ops.insert( diff --git a/crates/loro-internal/src/diff_calc/tree.rs b/crates/loro-internal/src/diff_calc/tree.rs index 5dc93eb52..f76dc4601 100644 --- a/crates/loro-internal/src/diff_calc/tree.rs +++ b/crates/loro-internal/src/diff_calc/tree.rs @@ -7,11 +7,10 @@ use rustc_hash::FxHashMap; use crate::{ container::{idx::ContainerIdx, tree::tree_op::TreeOp}, - dag::DagUtils, delta::{TreeDelta, TreeDeltaItem, TreeInternalDiff}, event::InternalDiff, state::TreeParentId, - version::Frontiers, + version::{CausalVersion, Frontiers}, OpLog, VersionVector, }; @@ -52,7 +51,7 @@ impl DiffCalculatorTrait for TreeDiffCalculator { &mut self, _oplog: &OpLog, op: crate::op::RichOp, - _vv: Option<&crate::VersionVector>, + _vv: Option>, ) { match &mut self.mode { TreeDiffCalculatorMode::Crdt => {} @@ -238,9 +237,23 @@ impl TreeDiffCalculator { let _e = s.enter(); let to_frontiers = info.to_frontiers; let from_frontiers = info.from_frontiers; - let (common_ancestors, _mode) = - oplog.dag.find_common_ancestor(from_frontiers, to_frontiers); - let lca_vv = oplog.dag.frontiers_to_vv(&common_ancestors).unwrap(); + let (mut common_ancestors, _mode) = oplog.find_common_ancestor_for_diff( + info.from_vv, + from_frontiers, + info.to_vv, + to_frontiers, + ); + let mut lca_vv = oplog.dag.frontiers_to_vv(&common_ancestors); + if lca_vv.is_none() { + if info.to_vv.includes_vv(info.from_vv) { + common_ancestors = from_frontiers.clone(); + lca_vv = oplog.dag.frontiers_to_vv(&common_ancestors); + } else if info.from_vv.includes_vv(info.to_vv) { + common_ancestors = to_frontiers.clone(); + lca_vv = oplog.dag.frontiers_to_vv(&common_ancestors); + } + } + let lca_vv = lca_vv.expect("tree diff LCA should be representable in the current DAG"); let lca_frontiers = common_ancestors; let to_max_lamport = self.get_max_lamport_by_frontiers(to_frontiers, oplog); let lca_min_lamport = self.get_min_lamport_by_frontiers(&lca_frontiers, oplog); diff --git a/crates/loro-internal/src/diff_calc/unknown.rs b/crates/loro-internal/src/diff_calc/unknown.rs index 25a6fbff3..0532b4caa 100644 --- a/crates/loro-internal/src/diff_calc/unknown.rs +++ b/crates/loro-internal/src/diff_calc/unknown.rs @@ -1,6 +1,6 @@ use loro_common::ContainerID; -use crate::{container::idx::ContainerIdx, event::InternalDiff, OpLog}; +use crate::{container::idx::ContainerIdx, event::InternalDiff, version::CausalVersion, OpLog}; use super::{DiffCalcVersionInfo, DiffCalculatorTrait, DiffMode}; @@ -14,7 +14,7 @@ impl DiffCalculatorTrait for UnknownDiffCalculator { &mut self, _oplog: &OpLog, _op: crate::op::RichOp, - _vv: Option<&crate::VersionVector>, + _vv: Option>, ) { } diff --git a/crates/loro-internal/src/encoding/fast_snapshot.rs b/crates/loro-internal/src/encoding/fast_snapshot.rs index 2ebf60a71..3ab8f6ae6 100644 --- a/crates/loro-internal/src/encoding/fast_snapshot.rs +++ b/crates/loro-internal/src/encoding/fast_snapshot.rs @@ -227,6 +227,7 @@ pub(crate) fn decode_snapshot_inner( } else { ensure_cov::notify_cov("shallow_snapshot::dont_need_calc"); state_frontiers = oplog.frontiers().clone(); + state.cache_current_as_shallow_latest(state_frontiers.clone()); } } diff --git a/crates/loro-internal/src/encoding/json_schema.rs b/crates/loro-internal/src/encoding/json_schema.rs index 2f846c313..183fc4fd1 100644 --- a/crates/loro-internal/src/encoding/json_schema.rs +++ b/crates/loro-internal/src/encoding/json_schema.rs @@ -564,6 +564,7 @@ fn decode_changes(json: JsonSchema, arena: &SharedArena) -> LoroResult = RleVec::new(); let mut expected_counter = id.counter; if json_ops.is_empty() { @@ -601,6 +602,53 @@ fn decode_changes(json: JsonSchema, arena: &SharedArena) -> LoroResult LoroResult<()> { + for (i, op) in ops.iter().enumerate() { + let JsonOpContent::Text(text) = &op.content else { + continue; + }; + + match text { + json::TextOp::Mark { .. } => { + let Some(next) = ops.get(i + 1) else { + return Err(LoroError::DecodeError( + "text mark must be immediately followed by mark end".into(), + )); + }; + + if next.container != op.container + || !matches!(&next.content, JsonOpContent::Text(json::TextOp::MarkEnd)) + { + return Err(LoroError::DecodeError( + "text mark must be immediately followed by mark end".into(), + )); + } + } + json::TextOp::MarkEnd => { + let Some(prev) = i.checked_sub(1).and_then(|i| ops.get(i)) else { + return Err(LoroError::DecodeError( + "text mark end must immediately follow text mark".into(), + )); + }; + + if prev.container != op.container + || !matches!( + &prev.content, + JsonOpContent::Text(json::TextOp::Mark { .. }) + ) + { + return Err(LoroError::DecodeError( + "text mark end must immediately follow text mark".into(), + )); + } + } + _ => {} + } + } + + Ok(()) +} + fn validate_json_frontiers(frontiers: &Frontiers) -> LoroResult<()> { for id in frontiers.iter() { validate_json_id_counter(id, "start version id")?; @@ -824,13 +872,21 @@ fn decode_op(op: json::JsonOp, arena: &SharedArena, peers: &Option>) style_key, style_value, info, - } => InnerContent::List(InnerListOp::StyleStart { - start, - end, - key: style_key.into(), - value: style_value, - info: TextStyleInfoFlag::from_byte(info), - }), + } => { + if start >= end { + return Err(LoroError::DecodeError( + "text mark start must be less than end".into(), + )); + } + + InnerContent::List(InnerListOp::StyleStart { + start, + end, + key: style_key.into(), + value: style_value, + info: TextStyleInfoFlag::from_byte(info), + }) + } json::TextOp::MarkEnd => InnerContent::List(InnerListOp::StyleEnd), }, _ => { diff --git a/crates/loro-internal/src/encoding/outdated_encode_reordered.rs b/crates/loro-internal/src/encoding/outdated_encode_reordered.rs index e2f2c504c..5ae40d9bb 100644 --- a/crates/loro-internal/src/encoding/outdated_encode_reordered.rs +++ b/crates/loro-internal/src/encoding/outdated_encode_reordered.rs @@ -51,18 +51,26 @@ pub(crate) fn import_changes_to_oplog( continue; } - if oplog.dag.is_before_shallow_root(&change.deps) { + let deps_are_before_shallow_root = oplog.dag.is_before_shallow_root(&change.deps); + let deps_start_at_shallow_root = !change.deps.is_empty() + && change + .deps + .iter() + .all(|dep| oplog.shallow_since_frontiers().contains(&dep)); + if deps_are_before_shallow_root && !deps_start_at_shallow_root { changes_before_shallow_root.push(change); continue; } latest_ids.push(change.id_last()); // calc lamport or pending if its deps are not satisfied - match oplog.dag.get_change_lamport_from_deps(&change.deps) { - Some(lamport) => change.lamport = lamport, - None => { - pending_changes.push(change); - continue; + if !deps_are_before_shallow_root { + match oplog.dag.get_change_lamport_from_deps(&change.deps) { + Some(lamport) => change.lamport = lamport, + None => { + pending_changes.push(change); + continue; + } } } diff --git a/crates/loro-internal/src/encoding/shallow_snapshot.rs b/crates/loro-internal/src/encoding/shallow_snapshot.rs index ac2f4fee2..b23d85101 100644 --- a/crates/loro-internal/src/encoding/shallow_snapshot.rs +++ b/crates/loro-internal/src/encoding/shallow_snapshot.rs @@ -9,7 +9,7 @@ use crate::{ dag::DagUtils, encoding::fast_snapshot::{_encode_snapshot, Snapshot}, state::container_store::FRONTIERS_KEY, - version::{Frontiers, VersionVector}, + version::{shrink_frontiers, Frontiers, VersionVector}, LoroDoc, }; @@ -33,6 +33,7 @@ pub(crate) fn export_shallow_snapshot_inner( doc: &LoroDoc, start_from: &Frontiers, ) -> Result<(Snapshot, Frontiers), LoroEncodeError> { + let requested_start_from_len = start_from.len(); let oplog = doc.oplog().lock(); let start_from = calc_shallow_doc_start(&oplog, start_from); let mut start_vv = frontiers_to_vv_for_export(&oplog, &start_from, "export_shallow_snapshot")?; @@ -41,26 +42,6 @@ pub(crate) fn export_shallow_snapshot_inner( start_vv.insert(id.peer, id.counter); } - #[cfg(debug_assertions)] - { - use crate::dag::Dag; - if !start_from.is_empty() { - assert!(start_from.len() == 1); - let id = start_from.as_single().unwrap(); - let node = oplog.dag.get(id).unwrap(); - if id.counter == node.cnt { - let vv = oplog.dag().frontiers_to_vv(&node.deps).unwrap(); - assert_eq!(vv, start_vv); - } else { - let vv = oplog - .dag() - .frontiers_to_vv(&Frontiers::from(id.inc(-1))) - .unwrap(); - assert_eq!(vv, start_vv); - } - } - } - loro_common::debug!( "start version vv={:?} frontiers={:?}", &start_vv, @@ -105,7 +86,10 @@ pub(crate) fn export_shallow_snapshot_inner( drop(state); doc._checkout_without_emitting(&latest_frontiers, false, false) .map_err(LoroEncodeError::from)?; - let state_bytes = if ops_num > MAX_OPS_NUM_TO_ENCODE_WITHOUT_LATEST_STATE { + let should_encode_latest_state = requested_start_from_len > 1 + || start_from.len() > 1 + || ops_num > MAX_OPS_NUM_TO_ENCODE_WITHOUT_LATEST_STATE; + let state_bytes = if should_encode_latest_state { let mut state = doc.app_state().lock(); state.ensure_all_alive_containers(); state.store.encode(); @@ -155,9 +139,11 @@ pub(crate) fn export_state_only_snapshot( w: &mut W, ) -> Result { let oplog = doc.oplog().lock(); - let start_from = calc_shallow_doc_start(&oplog, target_frontiers); - let mut start_vv = + let target_frontiers = normalize_state_only_target_frontiers(&oplog, target_frontiers); + let start_from = calc_state_only_doc_start(&oplog, &target_frontiers); + let start_inclusive_vv = frontiers_to_vv_for_export(&oplog, &start_from, "export_state_only_snapshot")?; + let mut start_vv = start_inclusive_vv.clone(); for id in start_from.iter() { // we need to include the ops in start_from, this can make things easier start_vv.insert(id.peer, id.counter); @@ -169,10 +155,13 @@ pub(crate) fn export_state_only_snapshot( &start_from, ); - let to_vv = frontiers_to_vv_for_export(&oplog, target_frontiers, "export_state_only_snapshot")?; + let mut to_vv = + frontiers_to_vv_for_export(&oplog, &target_frontiers, "export_state_only_snapshot")?; + to_vv.merge(&start_inclusive_vv); + let to_frontiers = oplog.dag().vv_to_frontiers(&to_vv); let oplog_bytes = - oplog.export_change_store_in_range(&start_vv, &start_from, &to_vv, target_frontiers); + oplog.export_change_store_in_range(&start_vv, &start_from, &to_vv, &to_frontiers); let state_frontiers = doc.state_frontiers(); let is_attached = !doc.is_detached(); drop(oplog); @@ -244,9 +233,66 @@ fn restore_export_doc_state( /// It should be the LCA of the user given version and the latest version. /// Otherwise, users cannot replay the history from the initial version till the latest version. fn calc_shallow_doc_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Frontiers { + let frontiers = shrink_frontiers_preserving_shallow_root(oplog, frontiers); + calc_shallow_doc_start_from(oplog, frontiers) +} + +fn calc_state_only_doc_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Frontiers { + let shrunk = shrink_frontiers(frontiers, oplog.dag()).unwrap_or_else(|_| frontiers.clone()); + if shrunk == *frontiers { + // A canonical state-only target should become the shallow root itself. + // Lowering a concurrent target to its LCA would require replaying ops back + // to the target and can lose the exact target-state boundary. + advance_style_start_frontiers(oplog, frontiers.clone()) + } else { + // Non-canonical targets are used to spell "this shallow root plus later + // frontiers"; preserve that explicit root boundary. + calc_shallow_doc_start_from(oplog, frontiers.clone()) + } +} + +fn normalize_state_only_target_frontiers(oplog: &crate::OpLog, frontiers: &Frontiers) -> Frontiers { + if oplog.is_shallow() { + shrink_frontiers_preserving_shallow_root(oplog, frontiers) + } else { + frontiers.clone() + } +} + +fn shrink_frontiers_preserving_shallow_root( + oplog: &crate::OpLog, + frontiers: &Frontiers, +) -> Frontiers { + if oplog.is_shallow() && frontiers_eq_unordered(frontiers, oplog.shallow_since_frontiers()) { + return oplog.shallow_since_frontiers().clone(); + } + + let shrunk = shrink_frontiers(frontiers, oplog.dag()).unwrap_or_else(|_| frontiers.clone()); + if oplog.is_shallow() + && oplog.dag().is_before_shallow_root(&shrunk) + && !oplog.dag().is_before_shallow_root(frontiers) + { + frontiers.clone() + } else { + shrunk + } +} + +fn frontiers_eq_unordered(a: &Frontiers, b: &Frontiers) -> bool { + a.len() == b.len() && a.iter().all(|id| b.contains(&id)) +} + +fn calc_shallow_doc_start_from(oplog: &crate::OpLog, frontiers: Frontiers) -> Frontiers { + if !oplog.shallow_since_vv().is_empty() { + // The target frontiers have already been checked by the caller. On a + // shallow doc, searching for a lower GCA can walk into trimmed history. + // Keep the requested boundary. + return advance_style_start_frontiers(oplog, frontiers); + } + // Find the LCA of the given frontiers by iteratively pairwise GCA. // This converges to a single frontier or empty if there is no common ancestor. - let mut current = frontiers.clone(); + let mut current = frontiers; while current.len() > 1 { let ids: Vec = current.iter().collect(); let mut next = Frontiers::new(); @@ -256,24 +302,35 @@ fn calc_shallow_doc_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Fronti let (gca, _) = oplog .dag() .find_common_ancestor(&Frontiers::from(ids[i]), &Frontiers::from(ids[i + 1])); - for id in gca.iter() { - next.push(id); + if gca.is_empty() { + next.push(ids[i]); + next.push(ids[i + 1]); + } else { + for id in gca.iter() { + next.push(id); + } } } else { next.push(ids[i]); } i += 2; } - if next == current { - // Cannot converge further (pairwise GCAs are the nodes themselves). - // Fall back to empty frontiers, meaning export full history. - return Frontiers::default(); + if next.is_empty() || next == current { + // Cannot converge further (no non-empty GCA, or pairwise GCAs are + // the nodes themselves). + // Keep the multi-frontier start so the shallow root still represents + // the complete boundary instead of falling back to full history. + break; } current = next; } + advance_style_start_frontiers(oplog, current) +} + +fn advance_style_start_frontiers(oplog: &crate::OpLog, frontiers: Frontiers) -> Frontiers { let mut ans = Frontiers::new(); - for id in current.iter() { + for id in frontiers.iter() { let mut processed = false; if let Some(op) = oplog.get_op_that_includes(id) { if let crate::op::InnerContent::List(InnerListOp::StyleStart { .. }) = &op.content { @@ -320,10 +377,11 @@ pub(crate) fn encode_snapshot_at( "encode_snapshot_at: state is unexpectedly still in a transaction", )); } - let Some(oplog_bytes) = oplog.fork_changes_up_to(frontiers) else { + let target_frontiers = state.frontiers.clone(); + let Some(oplog_bytes) = oplog.fork_changes_up_to(&target_frontiers) else { break 'block Err(LoroEncodeError::FrontiersNotFound(format!( "frontiers: {:?} when export in SnapshotAt mode", - frontiers + target_frontiers ))); }; diff --git a/crates/loro-internal/src/handler.rs b/crates/loro-internal/src/handler.rs index d48fa785a..2b549519f 100644 --- a/crates/loro-internal/src/handler.rs +++ b/crates/loro-internal/src/handler.rs @@ -8,7 +8,7 @@ use crate::{ }, cursor::{Cursor, Side}, delta::{DeltaItem, Meta, StyleMeta, TreeExternalDiff}, - diff::{diff, diff_impl::UpdateTimeoutError, OperateProxy}, + diff::{diff, OperateProxy}, event::{Diff, TextDiff, TextDiffItem, TextMeta}, op::ListSlice, state::{IndexType, State, TreeParentId}, @@ -28,7 +28,7 @@ use serde::{Deserialize, Serialize}; use std::{borrow::Cow, cmp::Reverse, collections::BinaryHeap, fmt::Debug, ops::Deref, sync::Arc}; use tracing::{error, instrument}; -pub use crate::diff::diff_impl::UpdateOptions; +pub use crate::diff::diff_impl::{UpdateOptions, UpdateTimeoutError}; pub use tree::TreeHandler; mod movable_list_apply_delta; mod tree; @@ -2536,6 +2536,7 @@ impl TextHandler { } pub fn update(&self, text: &str, options: UpdateOptions) -> Result<(), UpdateTimeoutError> { + self.ensure_not_deleted_for_update()?; let old_str = self.to_string(); let new = text.chars().map(|x| x as u32).collect::>(); let old = old_str.chars().map(|x| x as u32).collect::>(); @@ -2553,6 +2554,7 @@ impl TextHandler { text: &str, options: UpdateOptions, ) -> Result<(), UpdateTimeoutError> { + self.ensure_not_deleted_for_update()?; let hook = text_update::DiffHookForLine::new(self, text); let old_lines = hook.get_old_arr().to_vec(); let new_lines = hook.get_new_arr().to_vec(); @@ -2564,6 +2566,16 @@ impl TextHandler { ) } + fn ensure_not_deleted_for_update(&self) -> Result<(), UpdateTimeoutError> { + if self.is_deleted() { + return Err(UpdateTimeoutError::ContainerDeleted { + container: Box::new(self.id()), + }); + } + + Ok(()) + } + #[allow(clippy::inherent_to_string)] pub fn to_string(&self) -> String { match &self.inner { diff --git a/crates/loro-internal/src/history_cache.rs b/crates/loro-internal/src/history_cache.rs index c75a24fb8..3ec741df1 100644 --- a/crates/loro-internal/src/history_cache.rs +++ b/crates/loro-internal/src/history_cache.rs @@ -346,6 +346,33 @@ impl ContainerHistoryCache { ans } + pub(crate) fn text_chunks_at_shallow_root( + &self, + idx: ContainerIdx, + ) -> Option> { + ensure_cov::notify_cov("loro_internal::history_cache::text_chunks_at_shallow_root"); + let state = self.shallow_root_state.as_ref()?; + let mut binding = state.store.lock(); + let Some(text) = binding.get_mut(idx) else { + return Some(Vec::new()); + }; + + let text_state = text + .get_state( + idx, + ContainerCreationContext { + configure: &Default::default(), + peer: 0, + }, + ) + .as_richtext_state() + .unwrap(); + + let mut ans = Vec::new(); + text_state.iter_raw(&mut |chunk| ans.push(chunk.clone())); + Some(ans) + } + pub(crate) fn find_list_chunks_in( &self, idx: ContainerIdx, diff --git a/crates/loro-internal/src/loro.rs b/crates/loro-internal/src/loro.rs index d52254f21..661fe1a42 100644 --- a/crates/loro-internal/src/loro.rs +++ b/crates/loro-internal/src/loro.rs @@ -13,7 +13,7 @@ use crate::{ IntoContainerId, }, cursor::{AbsolutePosition, CannotFindRelativePosition, Cursor, PosQueryResult}, - dag::{Dag, DagUtils}, + dag::Dag, diff_calc::DiffCalculator, encoding::{ self, decode_snapshot, export_fast_snapshot, export_fast_updates, @@ -61,6 +61,56 @@ use std::{ }; use tracing::{debug_span, info_span, instrument, warn}; +#[cfg(feature = "test_utils")] +#[derive(Debug, Clone, Copy, Default)] +pub struct CheckoutProfile { + pub total: std::time::Duration, + pub frontier_prepare: std::time::Duration, + pub frontiers_to_vv: std::time::Duration, + pub diff_calc: std::time::Duration, + pub state_apply: std::time::Duration, + pub emit_events: std::time::Duration, + pub richtext_tracker_checkout: std::time::Duration, + pub richtext_tracker_diff: std::time::Duration, + pub richtext_delta_build: std::time::Duration, + pub richtext_insert_future_scan: std::time::Duration, + pub causal_vv_materialize: std::time::Duration, + pub diff_container_count: usize, + pub from_frontiers_len: usize, + pub to_frontiers_len: usize, + pub from_vv_len: usize, + pub to_vv_len: usize, + pub richtext_tracker_checkout_count: u64, + pub richtext_tracker_diff_count: u64, + pub richtext_delta_build_count: u64, + pub richtext_insert_future_scan_count: u64, + pub richtext_insert_future_scan_visited: u64, + pub richtext_insert_future_scan_max_visited: usize, + pub causal_vv_materialize_count: u64, + pub max_causal_vv_width: usize, + pub richtext_tracker_span_filter_count: u64, + pub richtext_tracker_span_count: u64, + pub richtext_tracker_filtered_span_count: u64, + pub richtext_tracker_skipped_span_count: u64, + pub richtext_tracker_max_span_count: usize, + pub richtext_tracker_max_filtered_span_count: usize, + pub richtext_id_to_cursor_iter_count: u64, + pub richtext_id_to_cursor_empty_iter_count: u64, + pub recording_events: bool, + pub forward_diff_calculator: bool, +} + +#[cfg(feature = "test_utils")] +#[derive(Debug, Clone, Copy, Default)] +pub struct TextStateProfile { + pub richtext_tree_node_count: usize, + pub richtext_chunk_count: usize, + pub text_chunk_count: usize, + pub style_anchor_count: usize, + pub style_range_tree_node_count: usize, + pub style_range_chunk_count: usize, +} + impl Default for LoroDoc { fn default() -> Self { Self::new() @@ -624,6 +674,7 @@ impl LoroDoc { self.import_changes_and_apply_delta_to_state_if_needed( |oplog| encoding::decode_oplog_changes(oplog, parsed), origin, + false, ) // let new_doc = LoroDoc::new(); @@ -635,6 +686,7 @@ impl LoroDoc { EncodeMode::FastUpdates => self.import_changes_and_apply_delta_to_state_if_needed( |oplog| encoding::decode_oplog_changes(oplog, parsed), origin, + false, ), EncodeMode::Auto => { unreachable!() @@ -719,6 +771,7 @@ impl LoroDoc { &self, decode_changes: impl FnOnce(&mut OpLog) -> Result, LoroError>, origin: InternalString, + force_state_apply_rollback: bool, ) -> Result { let mut oplog = self.oplog.lock(); let arena_checkpoint = oplog.arena.checkpoint_for_rollback(); @@ -767,7 +820,7 @@ impl LoroDoc { let old_vv = oplog.vv().clone(); let old_frontiers = oplog.frontiers().clone(); - let rollback_enabled = preflight.needs_state_apply_rollback; + let rollback_enabled = force_state_apply_rollback || preflight.needs_state_apply_rollback; if rollback_enabled { oplog.begin_import_rollback_with_arena(arena_checkpoint); } @@ -841,6 +894,7 @@ impl LoroDoc { let result = self.import_changes_and_apply_delta_to_state_if_needed( |oplog| crate::encoding::json_schema::decode_json_changes(json, &oplog.arena), Default::default(), + true, ); self.emit_events(); result @@ -1577,6 +1631,66 @@ impl LoroDoc { result } + #[cfg(feature = "test_utils")] + pub fn checkout_with_profile(&self, frontiers: &Frontiers) -> LoroResult { + let total_start = std::time::Instant::now(); + let was_detached = self.is_detached(); + let (options, guard) = self.implicit_commit_then_stop(); + let mut result = self._checkout_without_emitting_profile(frontiers, true, true); + if let Ok(profile) = result.as_mut() { + let emit_start = std::time::Instant::now(); + self.emit_events(); + profile.emit_events = emit_start.elapsed(); + } + drop(guard); + if self.config.detached_editing() { + if result.is_ok() { + self.renew_peer_id(); + } + self.renew_txn_if_auto_commit(options); + } else if result.is_err() { + if !was_detached { + self.renew_txn_if_auto_commit(options); + } + } else if !self.is_detached() { + self.renew_txn_if_auto_commit(options); + } + + if let Ok(profile) = result.as_mut() { + profile.total = total_start.elapsed(); + } + + result + } + + #[cfg(feature = "test_utils")] + pub fn text_state_profile(&self, name: &str) -> Option { + let id = ContainerID::new_root(name, ContainerType::Text); + let idx = self.arena.id_to_idx(&id)?; + let mut state = self.state.lock(); + let ( + richtext_tree_node_count, + richtext_chunk_count, + text_chunk_count, + style_anchor_count, + style_range_tree_node_count, + style_range_chunk_count, + ) = state.with_state_mut(idx, |state| { + state + .as_richtext_state_mut() + .map(|state| state.debug_counts()) + })?; + + Some(TextStateProfile { + richtext_tree_node_count, + richtext_chunk_count, + text_chunk_count, + style_anchor_count, + style_range_tree_node_count, + style_range_chunk_count, + }) + } + /// NOTE: The caller of this method should ensure the txn is locked and set to None #[instrument(level = "info", skip(self))] pub(crate) fn _checkout_without_emitting( @@ -1610,7 +1724,8 @@ impl LoroDoc { } let frontiers = if to_shrink_frontiers { - shrink_frontiers(frontiers, &oplog.dag).map_err(LoroError::FrontiersNotFound)? + shrink_frontiers_for_checkout(&oplog, frontiers) + .map_err(LoroError::FrontiersNotFound)? } else { frontiers.clone() }; @@ -1620,12 +1735,25 @@ impl LoroDoc { } let mut state = self.state.lock(); - let mut calc = self.diff_calculator.lock(); for i in frontiers.iter() { if !oplog.dag.contains(i) { return Err(LoroError::FrontiersNotFound(i)); } } + if !to_commit_then_renew || !state.is_recording() { + let shallow_root = oplog.shallow_since_frontiers(); + let is_shallow_root = frontiers.len() == shallow_root.len() + && frontiers.iter().all(|id| shallow_root.contains(&id)); + if is_shallow_root && state.restore_to_shallow_root() { + self.set_detached(true); + return Ok(()); + } + + if state.restore_to_shallow_latest(&frontiers) { + self.set_detached(true); + return Ok(()); + } + } let before = oplog.dag.frontiers_to_vv(&state.frontiers).ok_or_else(|| { LoroError::NotFoundError( @@ -1643,8 +1771,14 @@ impl LoroDoc { }; self.set_detached(true); - let (diff, diff_mode) = - calc.calc_diff_internal(&oplog, &before, &state.frontiers, after, &frontiers, None); + let use_forward_diff_calculator = should_use_forward_diff_calculator(&before, after); + let (diff, diff_mode) = if use_forward_diff_calculator { + let mut calc = DiffCalculator::new(false); + calc.calc_diff_internal(&oplog, &before, &state.frontiers, after, &frontiers, None) + } else { + let mut calc = self.diff_calculator.lock(); + calc.calc_diff_internal(&oplog, &before, &state.frontiers, after, &frontiers, None) + }; state.apply_diff( InternalDocDiff { origin: "checkout".into(), @@ -1658,6 +1792,155 @@ impl LoroDoc { Ok(()) } + #[cfg(feature = "test_utils")] + fn _checkout_without_emitting_profile( + &self, + frontiers: &Frontiers, + to_shrink_frontiers: bool, + to_commit_then_renew: bool, + ) -> Result { + let mut profile = CheckoutProfile::default(); + let prepare_start = std::time::Instant::now(); + if !self.txn.is_locked() { + return Err(LoroError::TransactionError( + "checkout requires the transaction mutex to be held" + .to_string() + .into_boxed_str(), + )); + } + let from_frontiers = self.state_frontiers(); + profile.from_frontiers_len = from_frontiers.len(); + profile.to_frontiers_len = frontiers.len(); + loro_common::info!( + "checkout from={:?} to={:?} cur_vv={:?}", + from_frontiers, + frontiers, + self.oplog_vv() + ); + + if &from_frontiers == frontiers { + profile.frontier_prepare = prepare_start.elapsed(); + return Ok(profile); + } + + let oplog = self.oplog.lock(); + if oplog.dag.is_before_shallow_root(frontiers) { + return Err(LoroError::SwitchToVersionBeforeShallowRoot); + } + + let frontiers = if to_shrink_frontiers { + shrink_frontiers_for_checkout(&oplog, frontiers) + .map_err(LoroError::FrontiersNotFound)? + } else { + frontiers.clone() + }; + profile.to_frontiers_len = frontiers.len(); + + if from_frontiers == frontiers { + profile.frontier_prepare = prepare_start.elapsed(); + return Ok(profile); + } + + let mut state = self.state.lock(); + for i in frontiers.iter() { + if !oplog.dag.contains(i) { + return Err(LoroError::FrontiersNotFound(i)); + } + } + profile.frontier_prepare = prepare_start.elapsed(); + if !to_commit_then_renew || !state.is_recording() { + let shallow_root = oplog.shallow_since_frontiers(); + let is_shallow_root = frontiers.len() == shallow_root.len() + && frontiers.iter().all(|id| shallow_root.contains(&id)); + if is_shallow_root && state.restore_to_shallow_root() { + self.set_detached(true); + return Ok(profile); + } + + if state.restore_to_shallow_latest(&frontiers) { + self.set_detached(true); + return Ok(profile); + } + } + + let vv_start = std::time::Instant::now(); + let before = oplog.dag.frontiers_to_vv(&state.frontiers).ok_or_else(|| { + LoroError::NotFoundError( + format!( + "Cannot find the current state version {:?}", + state.frontiers + ) + .into_boxed_str(), + ) + })?; + let Some(after) = &oplog.dag.frontiers_to_vv(&frontiers) else { + return Err(LoroError::NotFoundError( + format!("Cannot find the specified version {:?}", frontiers).into_boxed_str(), + )); + }; + profile.frontiers_to_vv = vv_start.elapsed(); + profile.from_vv_len = before.len(); + profile.to_vv_len = after.len(); + profile.recording_events = state.is_recording(); + + self.set_detached(true); + let diff_start = std::time::Instant::now(); + crate::diff_calc::profiling::begin(); + profile.forward_diff_calculator = should_use_forward_diff_calculator(&before, after); + let (diff, diff_mode) = if profile.forward_diff_calculator { + let mut calc = DiffCalculator::new(false); + calc.calc_diff_internal(&oplog, &before, &state.frontiers, after, &frontiers, None) + } else { + let mut calc = self.diff_calculator.lock(); + calc.calc_diff_internal(&oplog, &before, &state.frontiers, after, &frontiers, None) + }; + let diff_profile = crate::diff_calc::profiling::finish(); + profile.diff_calc = diff_start.elapsed(); + profile.richtext_tracker_checkout = diff_profile.richtext_tracker_checkout; + profile.richtext_tracker_diff = diff_profile.richtext_tracker_diff; + profile.richtext_delta_build = diff_profile.richtext_delta_build; + profile.richtext_insert_future_scan = diff_profile.richtext_insert_future_scan; + profile.causal_vv_materialize = diff_profile.causal_vv_materialize; + profile.richtext_tracker_checkout_count = diff_profile.richtext_tracker_checkout_count; + profile.richtext_tracker_diff_count = diff_profile.richtext_tracker_diff_count; + profile.richtext_delta_build_count = diff_profile.richtext_delta_build_count; + profile.richtext_insert_future_scan_count = diff_profile.richtext_insert_future_scan_count; + profile.richtext_insert_future_scan_visited = + diff_profile.richtext_insert_future_scan_visited; + profile.richtext_insert_future_scan_max_visited = + diff_profile.richtext_insert_future_scan_max_visited; + profile.causal_vv_materialize_count = diff_profile.causal_vv_materialize_count; + profile.max_causal_vv_width = diff_profile.max_causal_vv_width; + profile.richtext_tracker_span_filter_count = + diff_profile.richtext_tracker_span_filter_count; + profile.richtext_tracker_span_count = diff_profile.richtext_tracker_span_count; + profile.richtext_tracker_filtered_span_count = + diff_profile.richtext_tracker_filtered_span_count; + profile.richtext_tracker_skipped_span_count = + diff_profile.richtext_tracker_skipped_span_count; + profile.richtext_tracker_max_span_count = diff_profile.richtext_tracker_max_span_count; + profile.richtext_tracker_max_filtered_span_count = + diff_profile.richtext_tracker_max_filtered_span_count; + profile.richtext_id_to_cursor_iter_count = diff_profile.richtext_id_to_cursor_iter_count; + profile.richtext_id_to_cursor_empty_iter_count = + diff_profile.richtext_id_to_cursor_empty_iter_count; + profile.diff_container_count = diff.len(); + + let apply_start = std::time::Instant::now(); + state.apply_diff( + InternalDocDiff { + origin: "checkout".into(), + diff: Cow::Owned(diff), + by: EventTriggerKind::Checkout, + new_version: Cow::Owned(frontiers.clone()), + }, + diff_mode, + )?; + profile.state_apply = apply_start.elapsed(); + + Ok(profile) + } + #[inline] pub fn vv_to_frontiers(&self, vv: &VersionVector) -> Frontiers { self.oplog.lock().dag.vv_to_frontiers(vv) @@ -1729,23 +2012,24 @@ impl LoroDoc { // 5. Compare the states of the new document and the current document. // Step 1: Export the initial state from the GC snapshot. + let shallow_root = self.shallow_since_frontiers(); let initial_snapshot = self - .export(ExportMode::state_only(Some( - &self.shallow_since_frontiers(), - ))) + .export(ExportMode::state_only(Some(&shallow_root))) .unwrap(); // Step 2: Create a new document and import the initial snapshot. let doc = LoroDoc::new(); doc.import(&initial_snapshot).unwrap(); - self.checkout(&self.shallow_since_frontiers()).unwrap(); + self.checkout(&shallow_root).unwrap(); assert_eq!(self.get_deep_value(), doc.get_deep_value()); - // Step 3: Export updates since the shallow start version vector to the current version. - let updates = self.export(ExportMode::all_updates()).unwrap(); + // Step 3: Export updates after the complete shallow root state. + let shallow_root_vv = self.frontiers_to_vv(&shallow_root).unwrap(); + let updates = self.export(ExportMode::updates(&shallow_root_vv)).unwrap(); // Step 4: Import these updates into the new document. doc.import(&updates).unwrap(); + doc.checkout_to_latest(); self.checkout_to_latest(); // Step 5: Checkout to the current state's frontiers and compare the states. @@ -2051,7 +2335,28 @@ impl LoroDoc { #[inline] pub fn find_id_spans_between(&self, from: &Frontiers, to: &Frontiers) -> VersionVectorDiff { - self.oplog().lock().dag.find_path(from, to) + let oplog = self.oplog().lock(); + let frontiers_to_vv = |frontiers: &Frontiers, side: &str| { + if let Some(vv) = oplog.dag.frontiers_to_vv(frontiers) { + return vv; + } + + if oplog.dag.is_before_shallow_root(frontiers) { + return oplog + .dag + .frontiers_to_vv(oplog.dag.shallow_since_frontiers()) + .expect("shallow root frontiers should be included by the document history"); + } + + panic!("{side} frontiers should be included by the document history"); + }; + let from_vv = frontiers_to_vv(from, "from"); + let to_vv = frontiers_to_vv(to, "to"); + + VersionVectorDiff { + retreat: from_vv.sub_vec(&to_vv), + forward: to_vv.sub_vec(&from_vv), + } } /// Subscribe to the first commit from a peer. Operations performed on the `LoroDoc` within this callback @@ -2128,66 +2433,69 @@ impl LoroDoc { ) -> Result<(), ChangeTravelError> { let (options, guard) = self.implicit_commit_then_stop(); drop(guard); - struct PendingNode(ChangeMeta); - impl PartialEq for PendingNode { - fn eq(&self, other: &Self) -> bool { - self.0.lamport_last() == other.0.lamport_last() && self.0.id.peer == other.0.id.peer + let ans = 'travel: { + struct PendingNode(ChangeMeta); + impl PartialEq for PendingNode { + fn eq(&self, other: &Self) -> bool { + self.0.lamport_last() == other.0.lamport_last() + && self.0.id.peer == other.0.id.peer + } } - } - impl Eq for PendingNode {} - impl PartialOrd for PendingNode { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) + impl Eq for PendingNode {} + impl PartialOrd for PendingNode { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } } - } - impl Ord for PendingNode { - fn cmp(&self, other: &Self) -> Ordering { - self.0 - .lamport_last() - .cmp(&other.0.lamport_last()) - .then_with(|| self.0.id.peer.cmp(&other.0.id.peer)) + impl Ord for PendingNode { + fn cmp(&self, other: &Self) -> Ordering { + self.0 + .lamport_last() + .cmp(&other.0.lamport_last()) + .then_with(|| self.0.id.peer.cmp(&other.0.id.peer)) + } } - } - for id in ids { - let op_log = &self.oplog().lock(); - if !op_log.vv().includes_id(*id) { - return Err(ChangeTravelError::TargetIdNotFound(*id)); - } - if op_log.dag.shallow_since_vv().includes_id(*id) { - return Err(ChangeTravelError::TargetVersionNotIncluded); + for id in ids { + let op_log = &self.oplog().lock(); + if !op_log.vv().includes_id(*id) { + break 'travel Err(ChangeTravelError::TargetIdNotFound(*id)); + } + if op_log.dag.shallow_since_vv().includes_id(*id) { + break 'travel Err(ChangeTravelError::TargetVersionNotIncluded); + } } - } - let mut visited = FxHashSet::default(); - let mut pending: BinaryHeap = BinaryHeap::new(); - for id in ids { - pending.push(PendingNode(ChangeMeta::from_change( - &self.oplog().lock().get_change_at(*id).unwrap(), - ))); - } - while let Some(PendingNode(node)) = pending.pop() { - let deps = node.deps.clone(); - if f(node).is_break() { - break; + let mut visited = FxHashSet::default(); + let mut pending: BinaryHeap = BinaryHeap::new(); + for id in ids { + pending.push(PendingNode(ChangeMeta::from_change( + &self.oplog().lock().get_change_at(*id).unwrap(), + ))); } - - for dep in deps.iter() { - let Some(dep_node) = self.oplog().lock().get_change_at(dep) else { - continue; - }; - if visited.contains(&dep_node.id) { - continue; + while let Some(PendingNode(node)) = pending.pop() { + let deps = node.deps.clone(); + if f(node).is_break() { + break; } - visited.insert(dep_node.id); - pending.push(PendingNode(ChangeMeta::from_change(&dep_node))); + for dep in deps.iter() { + let Some(dep_node) = self.oplog().lock().get_change_at(dep) else { + continue; + }; + if visited.contains(&dep_node.id) { + continue; + } + + visited.insert(dep_node.id); + pending.push(PendingNode(ChangeMeta::from_change(&dep_node))); + } } - } - let ans = Ok(()); + Ok(()) + }; self.renew_txn_if_auto_commit(options); ans } @@ -2295,6 +2603,30 @@ fn find_last_delete_op(oplog: &OpLog, id: ID, idx: ContainerIdx) -> Option { best.map(|(_, op_id)| op_id) } +fn should_use_forward_diff_calculator(before: &VersionVector, after: &VersionVector) -> bool { + matches!(before.partial_cmp(after), Some(Ordering::Less)) +} + +fn shrink_frontiers_for_checkout(oplog: &OpLog, frontiers: &Frontiers) -> Result { + if oplog.is_shallow() && frontiers_eq_unordered(frontiers, oplog.shallow_since_frontiers()) { + return Ok(oplog.shallow_since_frontiers().clone()); + } + + let shrunk = shrink_frontiers(frontiers, &oplog.dag)?; + if oplog.is_shallow() + && oplog.dag.is_before_shallow_root(&shrunk) + && !oplog.dag.is_before_shallow_root(frontiers) + { + Ok(frontiers.clone()) + } else { + Ok(shrunk) + } +} + +fn frontiers_eq_unordered(a: &Frontiers, b: &Frontiers) -> bool { + a.len() == b.len() && a.iter().all(|id| b.contains(&id)) +} + #[derive(Debug)] pub struct CommitWhenDrop<'a> { doc: &'a LoroDoc, @@ -2401,6 +2733,7 @@ mod test { cursor::PosType, encoding::json_schema::json::{JsonOpContent, JsonSchema, ListOp}, encoding::{fast_snapshot::EMPTY_MARK, EncodeMode}, + handler::HandlerTrait, loro::ExportMode, version::{Frontiers, VersionVector}, LoroDoc, ToJson, TreeParentId, @@ -2872,6 +3205,125 @@ mod test { } } + #[test] + fn text_checkout_wide_causal_multi_peer() { + let doc = LoroDoc::new_auto_commit(); + doc.set_peer_id(1).unwrap(); + let text = doc.get_text("text"); + text.insert(0, "base", PosType::Unicode).unwrap(); + doc.commit_then_renew(); + + let mut frontiers = vec![doc.oplog_frontiers()]; + let mut expected = vec!["base".to_string()]; + let mut len = 4; + for peer in 0..32 { + let snapshot = doc.export(ExportMode::snapshot()).unwrap(); + let base_vv = doc.oplog_vv(); + let peer_doc = LoroDoc::new_auto_commit(); + peer_doc.import(&snapshot).unwrap(); + peer_doc.set_peer_id(peer + 2).unwrap(); + let peer_text = peer_doc.get_text("text"); + peer_text.insert(len, "x", PosType::Unicode).unwrap(); + peer_doc.commit_then_renew(); + let update = peer_doc.export(ExportMode::updates(&base_vv)).unwrap(); + doc.import(&update).unwrap(); + len += 1; + frontiers.push(doc.oplog_frontiers()); + expected.push(format!("base{}", "x".repeat(peer as usize + 1))); + } + + for idx in (0..frontiers.len()).rev() { + doc.checkout(&frontiers[idx]).unwrap(); + assert_eq!( + text.get_value().as_string().unwrap().as_str(), + expected[idx] + ); + } + + for idx in 0..frontiers.len() { + doc.checkout(&frontiers[idx]).unwrap(); + assert_eq!( + text.get_value().as_string().unwrap().as_str(), + expected[idx] + ); + } + } + + #[test] + fn checkout_same_deps_same_position_frontiers_text_consistent() { + let base_doc = LoroDoc::new_auto_commit(); + base_doc.set_peer_id(1).unwrap(); + let base_text = base_doc.get_text("text"); + base_text.insert(0, "base", PosType::Unicode).unwrap(); + base_doc.commit_then_renew(); + let snapshot = base_doc.export(ExportMode::snapshot()).unwrap(); + let base_vv = base_doc.oplog_vv(); + let base_frontiers = base_doc.oplog_frontiers(); + + let doc = LoroDoc::new_auto_commit(); + doc.import(&snapshot).unwrap(); + let text = doc.get_text("text"); + for peer in 0..32 { + let peer_doc = LoroDoc::new_auto_commit(); + peer_doc.import(&snapshot).unwrap(); + peer_doc.set_peer_id(peer + 2).unwrap(); + let peer_text = peer_doc.get_text("text"); + peer_text.insert(0, "x", PosType::Unicode).unwrap(); + peer_doc.commit_then_renew(); + let update = peer_doc.export(ExportMode::updates(&base_vv)).unwrap(); + doc.import(&update).unwrap(); + } + + let latest_frontiers = doc.oplog_frontiers(); + assert_eq!(latest_frontiers.len(), 32); + let expected = text.get_value().as_string().unwrap().to_string(); + + doc.checkout(&base_frontiers).unwrap(); + assert_eq!(text.get_value().as_string().unwrap().as_str(), "base"); + + doc.checkout(&latest_frontiers).unwrap(); + assert_eq!( + text.get_value().as_string().unwrap().as_str(), + expected.as_str() + ); + doc.check_state_diff_calc_consistency_slow(); + } + + #[test] + fn checkout_to_latest_linear_text_state_consistent() { + let doc = LoroDoc::new_auto_commit(); + doc.set_peer_id(1).unwrap(); + let text = doc.get_text("text"); + text.insert(0, "base", PosType::Unicode).unwrap(); + doc.commit_then_renew(); + + let mut frontiers = vec![doc.oplog_frontiers()]; + let mut expected = vec!["base".to_string()]; + for _ in 0..24 { + let pos = text.get_value().as_string().unwrap().chars().count(); + text.insert(pos, "x", PosType::Unicode).unwrap(); + doc.commit_then_renew(); + frontiers.push(doc.oplog_frontiers()); + expected.push(format!("base{}", "x".repeat(expected.len()))); + } + + let old_idx = 7; + doc.checkout(&frontiers[old_idx]).unwrap(); + assert!(doc.is_detached()); + assert_eq!( + text.get_value().as_string().unwrap().as_str(), + expected[old_idx] + ); + + doc.checkout_to_latest(); + assert!(!doc.is_detached()); + assert_eq!( + text.get_value().as_string().unwrap().as_str(), + expected.last().unwrap() + ); + doc.check_state_diff_calc_consistency_slow(); + } + #[test] fn import_batch_err_181() { let a = LoroDoc::new_auto_commit(); diff --git a/crates/loro-internal/src/oplog.rs b/crates/loro-internal/src/oplog.rs index a1e415b81..2e670adf4 100644 --- a/crates/loro-internal/src/oplog.rs +++ b/crates/loro-internal/src/oplog.rs @@ -5,9 +5,7 @@ mod pending_changes; use crate::sync::{AtomicUsize, Mutex}; use bytes::Bytes; use std::borrow::Cow; -use std::cell::RefCell; use std::cmp::Ordering; -use std::rc::Rc; use std::sync::Arc; use tracing::trace_span; @@ -28,7 +26,7 @@ use crate::span::{HasCounterSpan, HasLamportSpan}; use crate::version::{Frontiers, ImVersionVector, VersionVector}; use crate::LoroError; use change_store::{BlockOpRef, ChangeStoreRollback}; -use loro_common::{ContainerType, HasIdSpan, IdLp, IdSpan}; +use loro_common::{HasIdSpan, IdLp, IdSpan}; use rle::{HasLength, RleVec, Sliceable}; use smallvec::SmallVec; @@ -233,26 +231,32 @@ impl OpLog { continue; } - if self.dag.is_before_shallow_root(&change.deps) { + let deps_are_before_shallow_root = self.dag.is_before_shallow_root(&change.deps); + let deps_start_at_shallow_root = !change.deps.is_empty() + && change + .deps + .iter() + .all(|dep| self.shallow_since_frontiers().contains(&dep)); + if deps_are_before_shallow_root && !deps_start_at_shallow_root { ans.has_deps_before_shallow_root = true; continue; } - if self - .dag - .get_change_lamport_from_deps(&change.deps) - .is_none() + if !deps_are_before_shallow_root + && self + .dag + .get_change_lamport_from_deps(&change.deps) + .is_none() { continue; } ans.applies_to_dag = true; - if change.ops.iter().any(|op| { - matches!( - op.container.get_type(), - ContainerType::List | ContainerType::Tree - ) - }) { + if change + .ops + .iter() + .any(|op| op.container.get_type().may_need_state_apply_rollback()) + { ans.needs_state_apply_rollback = true; } } @@ -562,7 +566,7 @@ impl OpLog { /// iterates over all changes between LCA(common ancestors) to the merged version of (`from` and `to`) causally /// - /// Tht iterator will include a version vector when the change is applied + /// The iterator includes the causal base version and frontiers before each change is applied. /// /// returns: (common_ancestor_vv, iterator) /// @@ -584,36 +588,39 @@ impl OpLog { Item = ( BlockChangeRef, (Counter, Counter), - Rc>, + ImVersionVector, + Frontiers, ), > + '_, ) { let mut merged_vv = from.clone(); merged_vv.merge(to); loro_common::debug!("to_frontiers={:?} vv={:?}", &to_frontiers, to); - let (common_ancestors, mut diff_mode) = - self.dag.find_common_ancestor(from_frontiers, to_frontiers); - if diff_mode == DiffMode::Checkout && to > from { - diff_mode = DiffMode::Import; - } - - let common_ancestors_vv = self.dag.frontiers_to_vv(&common_ancestors).unwrap(); + let (common_ancestors, diff_mode) = + self.find_common_ancestor_for_diff(from, from_frontiers, to, to_frontiers); + let common_ancestors_vv = self + .dag + .frontiers_to_vv(&common_ancestors) + .expect("common ancestors should be representable in the current DAG"); // go from lca to merged_vv let diff = common_ancestors_vv.diff(&merged_vv).forward; let mut iter = self.dag.iter_causal(common_ancestors, diff); let mut node = iter.next(); let mut cur_cnt = 0; - let vv = Rc::new(RefCell::new(VersionVector::default())); ( common_ancestors_vv.clone(), diff_mode, std::iter::from_fn(move || { if let Some(inner) = &node { - let mut inner_vv = vv.borrow_mut(); - // FIXME: PERF: it looks slow for large vv, like 10000+ entries - inner_vv.clear(); - self.dag.ensure_vv_for(&inner.data); - inner_vv.extend_to_include_vv(inner.data.vv.get().unwrap().iter()); + #[cfg(feature = "test_utils")] + let vv_prepare_start = std::time::Instant::now(); + let base_vv = self.dag.ensure_vv_for(&inner.data); + #[cfg(feature = "test_utils")] + crate::diff_calc::profiling::record_causal_vv_materialize( + vv_prepare_start.elapsed(), + base_vv.len(), + ); + let base_frontiers = inner.data.deps.clone(); let peer = inner.data.peer; let cnt = inner .data @@ -631,9 +638,7 @@ impl OpLog { cur_cnt = 0; } - inner_vv.extend_to_include_end_id(change.id); - - Some((change, (cnt, dag_node_end), vv.clone())) + Some((change, (cnt, dag_node_end), base_vv, base_frontiers)) } else { None } @@ -641,6 +646,43 @@ impl OpLog { ) } + pub(crate) fn find_common_ancestor_for_diff( + &self, + from: &VersionVector, + from_frontiers: &Frontiers, + to: &VersionVector, + to_frontiers: &Frontiers, + ) -> (Frontiers, DiffMode) { + let shallow_root_frontiers = self.dag.shallow_since_frontiers(); + if !shallow_root_frontiers.is_empty() { + if from_frontiers == shallow_root_frontiers && to.includes_vv(from) { + return (from_frontiers.clone(), DiffMode::Import); + } + + if to_frontiers == shallow_root_frontiers && from.includes_vv(to) { + return (to_frontiers.clone(), DiffMode::Checkout); + } + } + + let (mut common_ancestors, mut diff_mode) = + self.dag.find_common_ancestor(from_frontiers, to_frontiers); + if diff_mode == DiffMode::Checkout && to > from { + diff_mode = DiffMode::Import; + } + + if self.dag.frontiers_to_vv(&common_ancestors).is_none() { + if to.includes_vv(from) { + common_ancestors = from_frontiers.clone(); + diff_mode = DiffMode::Import; + } else if from.includes_vv(to) { + common_ancestors = to_frontiers.clone(); + diff_mode = DiffMode::Checkout; + } + } + + (common_ancestors, diff_mode) + } + pub fn len_changes(&self) -> usize { self.change_store.change_num() } diff --git a/crates/loro-internal/src/oplog/loro_dag.rs b/crates/loro-internal/src/oplog/loro_dag.rs index d71a08e5d..43947b9ca 100644 --- a/crates/loro-internal/src/oplog/loro_dag.rs +++ b/crates/loro-internal/src/oplog/loro_dag.rs @@ -635,11 +635,16 @@ impl AppDag { self.frontiers = v.frontiers; if let Some((vv, f)) = v.start_version { if !f.is_empty() { - assert!(f.len() == 1); - let id = f.as_single().unwrap(); - let node = self.get(id).unwrap(); - assert!(node.cnt == id.counter); - self.shallow_root_frontiers_deps = node.deps.clone(); + let deps: Frontiers = vv + .iter() + .filter_map(|(&peer, &counter)| { + (counter > 0).then_some(ID::new(peer, counter - 1)) + }) + .collect(); + for id in f.iter() { + self.get(id).unwrap(); + } + self.shallow_root_frontiers_deps = deps; } self.shallow_since_frontiers = f; self.shallow_since_vv = ImVersionVector::from_vv(&vv); @@ -779,18 +784,34 @@ impl AppDag { return true; } - if deps.iter().any(|x| self.shallow_since_vv.includes_id(x)) { - return true; + let Some(vv) = self.frontiers_to_vv(deps) else { + return deps.iter().any(|id| { + self.shallow_since_vv.includes_id(id) || self.shallow_since_frontiers.contains(&id) + }); + }; + + self.vv_is_before_shallow_root(&vv) + } + + fn vv_is_before_shallow_root(&self, vv: &VersionVector) -> bool { + if self.shallow_since_vv.is_empty() { + return false; } - if deps + if self + .shallow_since_vv .iter() - .any(|x| self.shallow_since_frontiers.contains(&x)) + .any(|(&peer, &counter)| vv.get(&peer).copied().unwrap_or(0) < counter) { - return deps != &self.shallow_since_frontiers; + return true; } - false + // The shallow boundary can be a multi-frontier root. A target at that + // boundary must include every root frontier; a proper subset is not a + // representable state in the shallow history. + self.shallow_since_frontiers + .iter() + .any(|id| !vv.includes_id(id)) } /// Travel the ancestors of the given id, and call the callback for each node @@ -1074,6 +1095,10 @@ impl AppDag { } else { let mut all_deps_processed = true; for id in top_node.deps.iter() { + if self.shallow_since_vv.includes_id(id) { + continue; + } + let node = self.get(id).expect("deps should be in the dag"); if node.vv.get().is_none() { if all_deps_processed { @@ -1090,6 +1115,15 @@ impl AppDag { } for id in top_node.deps.iter() { + if self.shallow_since_vv.includes_id(id) { + if ans_vv.is_empty() { + ans_vv = self.shallow_since_vv.clone(); + } else { + ans_vv.extend_to_include_vv(self.shallow_since_vv.iter()); + } + continue; + } + let node = self.get(id).expect("deps should be in the dag"); let dep_vv = node.vv.get().unwrap(); if ans_vv.is_empty() { @@ -1151,7 +1185,7 @@ impl AppDag { pub fn frontiers_to_vv(&self, frontiers: &Frontiers) -> Option { if frontiers == &self.shallow_root_frontiers_deps { let vv = VersionVector::from_im_vv(&self.shallow_since_vv); - return Some(vv); + return (!self.vv_is_before_shallow_root(&vv)).then_some(vv); } let mut vv: VersionVector = Default::default(); @@ -1162,6 +1196,10 @@ impl AppDag { vv.extend_to_include_last_id(id); } + if self.vv_is_before_shallow_root(&vv) { + return None; + } + Some(vv) } @@ -1195,6 +1233,13 @@ impl AppDag { } pub fn im_vv_to_frontiers(&self, vv: &ImVersionVector) -> Frontiers { + if !self.shallow_since_vv.is_empty() { + let version = VersionVector::from_im_vv(vv); + if self.vv_is_before_shallow_root(&version) { + return self.shallow_since_frontiers.clone(); + } + } + if vv.is_empty() { return Default::default(); } @@ -1226,6 +1271,10 @@ impl AppDag { } pub fn vv_to_frontiers(&self, vv: &VersionVector) -> Frontiers { + if self.vv_is_before_shallow_root(vv) { + return self.shallow_since_frontiers.clone(); + } + if vv.is_empty() { return Default::default(); } @@ -1292,7 +1341,7 @@ impl AppDag { pub fn cmp_with_frontiers(&self, other: &Frontiers) -> Ordering { if &self.frontiers == other { Ordering::Equal - } else if other.iter().all(|id| self.vv.includes_id(id)) { + } else if self.frontiers_to_vv(other).is_some() { Ordering::Greater } else { Ordering::Less diff --git a/crates/loro-internal/src/oplog/pending_changes.rs b/crates/loro-internal/src/oplog/pending_changes.rs index 8d84e2b37..84ad1d9c1 100644 --- a/crates/loro-internal/src/oplog/pending_changes.rs +++ b/crates/loro-internal/src/oplog/pending_changes.rs @@ -5,9 +5,7 @@ use crate::{ version::{ImVersionVector, VersionRange}, OpLog, VersionVector, }; -use loro_common::{ - ContainerType, Counter, CounterSpan, HasCounterSpan, HasIdSpan, LoroResult, PeerID, ID, -}; +use loro_common::{Counter, CounterSpan, HasCounterSpan, HasIdSpan, LoroResult, PeerID, ID}; use rustc_hash::FxHashMap; #[derive(Debug, Clone)] @@ -40,12 +38,10 @@ impl PendingChanges { self.changes.values().any(|tree| { tree.values().any(|changes| { changes.iter().any(|change| { - change.ops.iter().any(|op| { - matches!( - op.container.get_type(), - ContainerType::List | ContainerType::Tree - ) - }) + change + .ops + .iter() + .any(|op| op.container.get_type().may_need_state_apply_rollback()) }) }) }) diff --git a/crates/loro-internal/src/state.rs b/crates/loro-internal/src/state.rs index 7b590c4ab..de7cb800b 100644 --- a/crates/loro-internal/src/state.rs +++ b/crates/loro-internal/src/state.rs @@ -625,13 +625,10 @@ impl DocState { return Err(LoroError::internal("state apply failpoint")); } } - match diff_mode { - DiffMode::Checkout => { - self.dead_containers_cache.clear(); - } - _ => { - self.dead_containers_cache.clear_alive(); - } + if diff.by.is_checkout() || diff_mode == DiffMode::Checkout { + self.dead_containers_cache.clear(); + } else { + self.dead_containers_cache.clear_alive(); } self.pre_txn(diff.origin.clone(), diff.by); @@ -1932,6 +1929,30 @@ impl DocState { pub(crate) fn shallow_root_store(&self) -> Option<&Arc> { self.store.shallow_root_store() } + + pub(crate) fn restore_to_shallow_root(&mut self) -> bool { + let Some(frontiers) = self.store.restore_to_shallow_root() else { + return false; + }; + + self.frontiers = frontiers; + self.dead_containers_cache.clear(); + true + } + + pub(crate) fn cache_current_as_shallow_latest(&mut self, frontiers: Frontiers) { + self.store.cache_current_as_shallow_latest(frontiers); + } + + pub(crate) fn restore_to_shallow_latest(&mut self, frontiers: &Frontiers) -> bool { + if !self.store.restore_to_shallow_latest(frontiers) { + return false; + } + + self.frontiers = frontiers.clone(); + self.dead_containers_cache.clear(); + true + } } fn create_state_(idx: ContainerIdx, config: &Configure, peer: u64) -> State { diff --git a/crates/loro-internal/src/state/container_store.rs b/crates/loro-internal/src/state/container_store.rs index 3dc4255ce..6d70aa127 100644 --- a/crates/loro-internal/src/state/container_store.rs +++ b/crates/loro-internal/src/state/container_store.rs @@ -41,6 +41,7 @@ pub(crate) struct ContainerStore { arena: SharedArena, store: InnerStore, shallow_root_store: Option>, + shallow_latest_store: Option>, conf: Configure, peer: Arc, } @@ -76,6 +77,7 @@ impl ContainerStore { arena, conf, shallow_root_store: None, + shallow_latest_store: None, peer, } } @@ -194,6 +196,37 @@ impl ContainerStore { Some(shallow_root_kv.export()) } + pub(crate) fn restore_to_shallow_root(&mut self) -> Option { + let shallow_root = self.shallow_root_store.as_ref()?; + self.store = shallow_root + .store + .lock() + .fork(self.arena.clone(), &self.conf); + Some(shallow_root.shallow_root_frontiers.clone()) + } + + pub(crate) fn cache_current_as_shallow_latest(&mut self, frontiers: Frontiers) { + self.shallow_latest_store = Some(Arc::new(GcStore { + shallow_root_frontiers: frontiers, + store: Mutex::new(self.store.fork(self.arena.clone(), &self.conf)), + })); + } + + pub(crate) fn restore_to_shallow_latest(&mut self, frontiers: &Frontiers) -> bool { + let Some(shallow_latest) = self.shallow_latest_store.as_ref() else { + return false; + }; + if &shallow_latest.shallow_root_frontiers != frontiers { + return false; + } + + self.store = shallow_latest + .store + .lock() + .fork(self.arena.clone(), &self.conf); + true + } + pub(crate) fn decode(&mut self, bytes: Bytes) -> LoroResult> { self.store.decode(bytes) } @@ -314,6 +347,7 @@ impl ContainerStore { conf: config, peer, shallow_root_store: None, + shallow_latest_store: None, } } diff --git a/crates/loro-internal/src/state/richtext_state.rs b/crates/loro-internal/src/state/richtext_state.rs index a5e40dd87..4bec7762c 100644 --- a/crates/loro-internal/src/state/richtext_state.rs +++ b/crates/loro-internal/src/state/richtext_state.rs @@ -1,6 +1,6 @@ use generic_btree::{rle::HasLength, rle::Sliceable as _, Cursor}; use loro_common::{ContainerID, InternalString, LoroError, LoroResult, LoroValue, ID}; -use loro_delta::{DeltaRope, DeltaRopeBuilder}; +use loro_delta::{delta_trait::DeltaAttr, DeltaRope, DeltaRopeBuilder}; use rustc_hash::{FxHashMap, FxHashSet}; use smallvec::SmallVec; use std::ops::Range; @@ -44,6 +44,66 @@ struct Pos { event_index: usize, } +fn flush_pending_style_delta(style_delta: &mut TextDiff, pending_delta: &mut TextDiff) { + if !pending_delta.is_empty() { + style_delta.compose(pending_delta); + *pending_delta = TextDiff::new(); + } +} + +fn try_append_retain_only_style_delta( + pending_delta: &mut TextDiff, + pending_len: &mut usize, + delta: &TextDiff, +) -> bool { + // Adjacent, non-overlapping style retains can be composed once as a batch. + // Overlapping deltas are flushed to preserve the original compose order. + let mut index = 0; + let mut first_styled_start = None; + for item in delta.iter() { + match item { + loro_delta::DeltaItem::Retain { len, attr } => { + if !attr.attr_is_empty() { + first_styled_start.get_or_insert(index); + } + index += len; + } + loro_delta::DeltaItem::Replace { .. } => return false, + } + } + + let Some(first_styled_start) = first_styled_start else { + return true; + }; + + if first_styled_start < *pending_len { + return false; + } + + index = 0; + for item in delta.iter() { + let loro_delta::DeltaItem::Retain { len, attr } = item else { + unreachable!("non-retain style deltas are rejected in the first pass") + }; + if !attr.attr_is_empty() { + if index < *pending_len { + return false; + } + + if index > *pending_len { + pending_delta.push_retain(index - *pending_len, Default::default()); + *pending_len = index; + } + + pending_delta.push_retain(*len, attr.clone()); + *pending_len += len; + } + index += len; + } + + true +} + impl RichtextState { #[inline] pub fn new(idx: ContainerIdx, config: Arc>) -> Self { @@ -150,6 +210,11 @@ impl RichtextState { } } + #[cfg(feature = "test_utils")] + pub(crate) fn debug_counts(&mut self) -> (usize, usize, usize, usize, usize, usize) { + self.state.get_mut().debug_counts() + } + fn get_style_start( &mut self, style_starts: &mut FxHashMap, Pos>, @@ -566,9 +631,20 @@ impl ContainerState for RichtextState { } } + let mut pending_style_delta = TextDiff::new(); + let mut pending_style_delta_len = 0; for s in new_style_deltas { - style_delta.compose(&s); + if !try_append_retain_only_style_delta( + &mut pending_style_delta, + &mut pending_style_delta_len, + &s, + ) { + flush_pending_style_delta(&mut style_delta, &mut pending_style_delta); + pending_style_delta_len = 0; + style_delta.compose(&s); + } } + flush_pending_style_delta(&mut style_delta, &mut pending_style_delta); // self.check_consistency_between_content_and_style_ranges(); ans.compose(&style_delta); Diff::Text(ans) @@ -591,57 +667,54 @@ impl ContainerState for RichtextState { // Rebuilding avoids repeated BTree queries and mutations when the delta is very "choppy" // (many small edit spans), but it allocates and clones chunks, so it can be slower for // small deltas. Use a cheap cost model to enable it only when it's likely beneficial. - let should_fast_apply = { - #[inline] - fn ilog2_ceil(x: usize) -> usize { - debug_assert!(x > 0); - (usize::BITS - (x - 1).leading_zeros()) as usize - } + #[inline] + fn ilog2_ceil(x: usize) -> usize { + debug_assert!(x > 0); + (usize::BITS - (x - 1).leading_zeros()) as usize + } - let state = self.state.get_mut(); - if state.has_styles() { - false - } else { - // `edit_actions` approximates how many BTree mutations the incremental path will do: - // each Replace with delete>0 becomes a drain, and each Replace with value>0 becomes an insert. - let mut edit_actions: usize = 0; - let mut is_plain_text_delta = true; - for span in richtext.iter() { - match span { - loro_delta::DeltaItem::Retain { .. } => {} - loro_delta::DeltaItem::Replace { value, delete, .. } => { - if *delete > 0 { - edit_actions += 1; - } - if value.rle_len() > 0 { - if !matches!(value, RichtextStateChunk::Text(_)) { - is_plain_text_delta = false; - break; - } - edit_actions += 1; - } + // `edit_actions` approximates how many BTree mutations the incremental path will do: + // each Replace with delete>0 becomes a drain, and each Replace with value>0 becomes an insert. + let mut edit_actions: usize = 0; + let mut is_plain_text_delta = true; + for span in richtext.iter() { + match span { + loro_delta::DeltaItem::Retain { .. } => {} + loro_delta::DeltaItem::Replace { value, delete, .. } => { + if *delete > 0 { + edit_actions += 1; + } + if value.rle_len() > 0 { + if !matches!(value, RichtextStateChunk::Text(_)) { + is_plain_text_delta = false; + break; } + edit_actions += 1; } } - - if !is_plain_text_delta || edit_actions == 0 { - false - } else { - let content_nodes = state.content_node_len().max(1); - let log_n = ilog2_ceil(content_nodes + 1).max(1); - let incremental_score = edit_actions.saturating_mul(log_n); - let rebuild_score = content_nodes.saturating_add(edit_actions); - - let old_len = richtext.old_len().max(1); - let avg_action_span = old_len / edit_actions; - // A very rough proxy for "choppiness": many edit actions with small average span. - // The thresholds are intentionally conservative to avoid rebuilding for small or - // localized deltas. - let is_choppy = edit_actions >= 256 && avg_action_span <= 32; - - is_choppy && incremental_score >= rebuild_score.saturating_mul(4) - } } + } + + let state_has_styles = self.state.get_mut().has_styles(); + let use_plain_text_no_event_path = + !state_has_styles && is_plain_text_delta && edit_actions > 0; + let should_fast_apply = if use_plain_text_no_event_path { + let state = self.state.get_mut(); + let content_nodes = state.content_node_len().max(1); + let log_n = ilog2_ceil(content_nodes + 1).max(1); + let incremental_score = edit_actions.saturating_mul(log_n); + let rebuild_score = content_nodes.saturating_add(edit_actions); + + let old_len = richtext.old_len().max(1); + let avg_action_span = old_len / edit_actions; + // A very rough proxy for "choppiness": many edit actions with small average span. + // The thresholds are intentionally conservative to avoid rebuilding for small or + // localized deltas. + let is_choppy = edit_actions >= 256 && avg_action_span <= 32; + + is_choppy && incremental_score >= rebuild_score.saturating_mul(4) + } else { + false }; if should_fast_apply { @@ -739,18 +812,30 @@ impl ContainerState for RichtextState { loro_delta::DeltaItem::Replace { value, delete, .. } => { if *delete > 0 { // Deletions - self.state - .get_mut() - .drain_by_entity_index(entity_index, *delete, None); + if use_plain_text_no_event_path { + self.state + .get_mut() + .drain_plain_text_by_entity_index(entity_index, *delete); + } else { + self.state + .get_mut() + .drain_by_entity_index(entity_index, *delete, None); + } } if value.rle_len() > 0 { // Insertions match value { RichtextStateChunk::Text(s) => { - self.state.get_mut().insert_elem_at_entity_index( - entity_index, - RichtextStateChunk::Text(s.clone()), - ); + if use_plain_text_no_event_path { + self.state + .get_mut() + .insert_text_chunk_at_entity_index(entity_index, s.clone()); + } else { + self.state.get_mut().insert_elem_at_entity_index( + entity_index, + RichtextStateChunk::Text(s.clone()), + ); + } } RichtextStateChunk::Style { style, anchor_type } => { self.state.get_mut().insert_elem_at_entity_index( @@ -807,6 +892,48 @@ impl ContainerState for RichtextState { Ok(()) } + fn validate_diff(&self, diff: &InternalDiff) -> LoroResult<()> { + let InternalDiff::RichtextRaw(delta) = diff else { + unreachable!() + }; + + let mut cursor = 0usize; + let mut projected = self.len_entity(); + for span in delta.iter() { + match span { + loro_delta::DeltaItem::Retain { len, .. } => { + cursor += len; + if cursor > projected { + return Err(LoroError::internal(format!( + "text diff retains {cursor} entities but state only has {projected}", + ))); + } + } + loro_delta::DeltaItem::Replace { value, delete, .. } => { + if cursor + delete > projected { + return Err(LoroError::internal(format!( + "text diff deletes {delete} entities at {cursor} but state only has {projected}", + ))); + } + + projected -= delete; + let len = value.rle_len(); + if len > 0 { + if cursor > projected { + return Err(LoroError::internal(format!( + "text diff inserts at {cursor} but state only has {projected}", + ))); + } + cursor += len; + projected += len; + } + } + } + } + + Ok(()) + } + fn apply_local_op(&mut self, r_op: &RawOp, op: &Op) -> LoroResult { self.update_version(); match &op.content { diff --git a/crates/loro-internal/src/version.rs b/crates/loro-internal/src/version.rs index 1c6ea17a4..331895ce3 100644 --- a/crates/loro-internal/src/version.rs +++ b/crates/loro-internal/src/version.rs @@ -2,6 +2,7 @@ mod frontiers; pub use frontiers::Frontiers; use crate::{ + dag::Dag, id::{Counter, ID}, oplog::AppDag, span::{CounterSpan, IdSpan}, @@ -162,6 +163,60 @@ impl VersionRange { #[derive(Debug, Clone, Default, Serialize, Deserialize)] pub struct ImVersionVector(im::HashMap); +/// A lightweight causal version used while replaying changes in causal order. +/// +/// It represents `base` plus the current peer advanced to at least `peer_end`. +/// Version vector counters are exclusive upper bounds, so replaying an op at +/// counter `c` uses `CausalVersion(base, peer, c)` as the before-op version: +/// all deps and earlier same-peer ops are included, but the op at `c` is not. +/// This avoids rebuilding a full mutable [VersionVector] for every replayed DAG +/// node/op in checkout diff calculation. +#[derive(Clone, Copy, Debug)] +pub(crate) struct CausalVersion<'a> { + base: &'a ImVersionVector, + peer: PeerID, + peer_end: Counter, +} + +impl<'a> CausalVersion<'a> { + #[inline] + pub(crate) fn new(base: &'a ImVersionVector, peer: PeerID, peer_end: Counter) -> Self { + Self { + base, + peer, + peer_end, + } + } + + #[inline] + #[allow(dead_code)] + pub(crate) fn base(&self) -> &'a ImVersionVector { + self.base + } + + #[inline] + #[allow(dead_code)] + pub(crate) fn peer(&self) -> PeerID { + self.peer + } + + #[inline] + #[allow(dead_code)] + pub(crate) fn peer_end(&self) -> Counter { + self.peer_end + } + + #[inline] + pub(crate) fn end_for_peer(&self, peer: PeerID) -> Counter { + let base_end = self.base.get(&peer).copied().unwrap_or(0); + if peer == self.peer { + base_end.max(self.peer_end) + } else { + base_end + } + } +} + #[inline] fn normalize_vv_counter(counter: Counter) -> Counter { counter.max(0) @@ -982,6 +1037,13 @@ impl VersionVector { pub fn shrink_frontiers(last_ids: &Frontiers, dag: &AppDag) -> Result { // it only keep the ids of ops that are concurrent to each other + if !last_ids.is_empty() && dag.is_before_shallow_root(last_ids) { + return Err(last_ids + .iter() + .next() + .expect("non-empty frontiers should have at least one id")); + } + if last_ids.len() <= 1 { return Ok(last_ids.clone()); } @@ -1005,6 +1067,30 @@ pub fn shrink_frontiers(last_ids: &Frontiers, dag: &AppDag) -> Result 1 { + let first_id = last_ids[0].id(); + let Some(first_node) = dag.get(first_id) else { + return Err(first_id); + }; + let first_deps = first_node.deps.clone(); + let mut all_share_deps = true; + for id in &last_ids[1..] { + let frontier = id.id(); + let Some(node) = dag.get(frontier) else { + return Err(frontier); + }; + if node.deps != first_deps { + all_share_deps = false; + break; + } + } + + if all_share_deps { + last_ids.sort_by_key(|x| x.lamport); + return Ok(last_ids.into_iter().rev().map(|x| x.id()).collect()); + } + } + let mut frontiers = Vec::new(); // Iterate from the greatest lamport to the smallest last_ids.sort_by_key(|x| x.lamport); diff --git a/crates/loro-internal/src/version/frontiers.rs b/crates/loro-internal/src/version/frontiers.rs index 9c26268fa..2f2412a0b 100644 --- a/crates/loro-internal/src/version/frontiers.rs +++ b/crates/loro-internal/src/version/frontiers.rs @@ -335,33 +335,13 @@ impl Frontiers { } impl From<&[ID]> for Frontiers { fn from(ids: &[ID]) -> Self { - match ids.len() { - 0 => Frontiers::None, - 1 => Frontiers::ID(ids[0]), - _ => { - let mut map = InternalMap::new(); - for &id in ids { - map.insert(id); - } - Frontiers::Map(map) - } - } + ids.iter().copied().collect() } } impl From> for Frontiers { fn from(ids: Vec) -> Self { - match ids.len() { - 0 => Frontiers::None, - 1 => Frontiers::ID(ids[0]), - _ => { - let mut map = InternalMap::new(); - for id in ids { - map.insert(id); - } - Frontiers::Map(map) - } - } + ids.into_iter().collect() } } @@ -392,33 +372,13 @@ impl From> for Frontiers { impl From<[ID; N]> for Frontiers { fn from(value: [ID; N]) -> Self { - match N { - 0 => Frontiers::None, - 1 => Frontiers::ID(value[0]), - _ => { - let mut map = InternalMap::new(); - for id in value { - map.insert(id); - } - Frontiers::Map(map) - } - } + value.into_iter().collect() } } impl From<&Vec> for Frontiers { fn from(ids: &Vec) -> Self { - match ids.len() { - 0 => Frontiers::None, - 1 => Frontiers::ID(ids[0]), - _ => { - let mut map = InternalMap::new(); - for id in ids { - map.insert(*id); - } - Frontiers::Map(map) - } - } + ids.iter().copied().collect() } } diff --git a/crates/loro-wasm/src/lib.rs b/crates/loro-wasm/src/lib.rs index 8797f20eb..764d2b4f0 100644 --- a/crates/loro-wasm/src/lib.rs +++ b/crates/loro-wasm/src/lib.rs @@ -20,7 +20,7 @@ use loro_internal::{ event::Index, handler::{ Handler, ListHandler, MapHandler, TextDelta, TextHandler, TreeHandler, UpdateOptions, - ValueOrHandler, + UpdateTimeoutError, ValueOrHandler, }, id::{Counter, PeerID, TreeID, ID}, loro::{CommitOptions, ExportMode}, @@ -2490,6 +2490,13 @@ fn convert_container_path_to_js_value(path: &[(ContainerID, Index)]) -> JsContai v.into() } +fn update_error_to_js(e: UpdateTimeoutError) -> JsValue { + match e { + UpdateTimeoutError::Timeout => JsError::new("Update timeout").into(), + err => JsError::new(&err.to_string()).into(), + } +} + /// The handler of a text container. It supports rich text CRDT. /// /// Learn more at https://loro.dev/docs/tutorial/text @@ -2604,7 +2611,7 @@ impl LoroText { }; self.handler .update(text, options) - .map_err(|_| JsError::new("Update timeout").into()) + .map_err(update_error_to_js) } /// Update the current text to the target text, the difference is calculated line by line. @@ -2634,7 +2641,7 @@ impl LoroText { }; self.handler .update_by_line(text, options) - .map_err(|_| JsError::new("Update timeout").into()) + .map_err(update_error_to_js) } /// Insert the string at the given index (utf-16 index). diff --git a/crates/loro/tests/commit_message_test.rs b/crates/loro/tests/commit_message_test.rs index ecf16899b..d143baf18 100644 --- a/crates/loro/tests/commit_message_test.rs +++ b/crates/loro/tests/commit_message_test.rs @@ -1,4 +1,5 @@ -use loro::{CommitOptions, LoroDoc, VersionVector, ID}; +use loro::{ChangeTravelError, CommitOptions, LoroDoc, VersionVector, ID}; +use std::ops::ControlFlow; #[test] fn explicit_empty_commit_swallow_options() { @@ -51,6 +52,32 @@ fn implicit_empty_commit_preserves_options() { assert_eq!(second_change.timestamp(), 200); } +#[test] +fn failed_travel_change_ancestors_preserves_next_commit_options() { + let doc = LoroDoc::new(); + doc.set_peer_id(1).unwrap(); + + doc.set_next_commit_message("after failed travel"); + doc.set_next_commit_timestamp(42); + + let mut noop = |_| ControlFlow::Continue(()); + let err = doc + .travel_change_ancestors(&[ID::new(999, 0)], &mut noop) + .unwrap_err(); + assert!(matches!( + err, + ChangeTravelError::TargetIdNotFound(id) if id == ID::new(999, 0) + )); + + let text = doc.get_text("text"); + text.insert(0, "x").unwrap(); + doc.commit(); + + let change = doc.get_change(ID::new(1, 0)).unwrap(); + assert_eq!(change.message(), "after failed travel"); + assert_eq!(change.timestamp(), 42); +} + #[test] fn test_commit_message() { let doc = LoroDoc::new(); diff --git a/crates/loro/tests/contracts/version_frontiers.rs b/crates/loro/tests/contracts/version_frontiers.rs index baaa2496a..d0f30d191 100644 --- a/crates/loro/tests/contracts/version_frontiers.rs +++ b/crates/loro/tests/contracts/version_frontiers.rs @@ -32,6 +32,21 @@ fn sorted_ids(frontiers: &Frontiers) -> Vec<(u64, i32)> { ids } +#[test] +fn frontiers_constructors_canonicalize_same_peer_ids() { + let ids = [ID::new(1, 0), ID::new(1, 1)]; + let cases = [ + ("array", Frontiers::from(ids)), + ("vec", Frontiers::from(ids.to_vec())), + ("slice", Frontiers::from(ids.as_slice())), + ]; + + for (name, frontiers) in cases { + assert_eq!(frontiers.len(), 1, "{name}"); + assert_eq!(frontiers.as_single(), Some(ID::new(1, 1)), "{name}"); + } +} + fn sorted_spans(spans: I) -> Vec<(u64, i32, i32)> where I: IntoIterator, @@ -406,6 +421,8 @@ fn frontiers_contracts_follow_semantics() -> anyhow::Result<()> { .expect("foreign frontiers should remain unchanged"), foreign.state_frontiers() ); + let foreign_vv = foreign.frontiers_to_vv(&foreign.state_frontiers()).unwrap(); + assert_eq!(doc.vv_to_frontiers(&foreign_vv), foreign.state_frontiers()); let minimized = doc .minimize_frontiers(&doc_frontiers) diff --git a/crates/loro/tests/integration_test/shallow_snapshot_test.rs b/crates/loro/tests/integration_test/shallow_snapshot_test.rs index 14cc55079..765dbbbde 100644 --- a/crates/loro/tests/integration_test/shallow_snapshot_test.rs +++ b/crates/loro/tests/integration_test/shallow_snapshot_test.rs @@ -4,7 +4,224 @@ use std::{ }; use super::gen_action; -use loro::{cursor::CannotFindRelativePosition, ExportMode, Frontiers, LoroDoc, ID}; +use loro::{cursor::CannotFindRelativePosition, ExportMode, Frontiers, LoroDoc, VersionVector, ID}; + +fn multi_frontier_shallow_snapshot() -> anyhow::Result<(Vec, Frontiers, loro::LoroValue)> { + let doc = LoroDoc::new(); + doc.set_detached_editing(true); + + doc.set_peer_id(1)?; + doc.get_text("left").insert(0, "left")?; + doc.commit(); + let left = doc.state_frontiers(); + + doc.checkout(&Frontiers::default())?; + doc.set_peer_id(2)?; + doc.get_text("right").insert(0, "right")?; + doc.commit(); + let right = doc.state_frontiers(); + + let mut shallow_root = left.clone(); + shallow_root.merge_with_greater(&right); + let shallow_root = doc + .minimize_frontiers(&shallow_root) + .expect("frontiers should be reachable"); + doc.checkout(&shallow_root)?; + let expected = doc.get_deep_value(); + + let bytes = doc.export(ExportMode::shallow_snapshot(&shallow_root))?; + Ok((bytes, shallow_root, expected)) +} + +fn three_frontier_shallow_snapshot() -> anyhow::Result<(Vec, Frontiers, loro::LoroValue)> { + let doc = LoroDoc::new(); + doc.set_detached_editing(true); + + let mut root = Frontiers::default(); + for peer in 1..=3 { + doc.checkout(&Frontiers::default())?; + doc.set_peer_id(peer)?; + doc.get_text(format!("text_{peer}")) + .insert(0, &format!("value_{peer}"))?; + doc.commit(); + root.merge_with_greater(&doc.state_frontiers()); + } + + let root = doc + .minimize_frontiers(&root) + .expect("frontiers should be reachable"); + assert_eq!(root.len(), 3); + doc.checkout(&root)?; + let expected = doc.get_deep_value(); + + let bytes = doc.export(ExportMode::shallow_snapshot(&root))?; + Ok((bytes, root, expected)) +} + +#[test] +fn import_three_frontier_shallow_root_snapshot_does_not_crash() -> anyhow::Result<()> { + const CHILD_ENV: &str = "LORO_IMPORT_THREE_FRONTIER_SHALLOW_ROOT_CHILD"; + const TEST_NAME: &str = + "integration_test::shallow_snapshot_test::import_three_frontier_shallow_root_snapshot_does_not_crash"; + + if std::env::var_os(CHILD_ENV).is_some() { + return import_three_frontier_shallow_root_snapshot_does_not_crash_inner(); + } + + let output = std::process::Command::new(std::env::current_exe()?) + .arg("--exact") + .arg(TEST_NAME) + .arg("--nocapture") + .env(CHILD_ENV, "1") + .output()?; + + assert!( + output.status.success(), + "importing a three-frontier shallow root snapshot should not crash\nstatus: {}\nstdout:\n{}\nstderr:\n{}", + output.status, + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + Ok(()) +} + +fn import_three_frontier_shallow_root_snapshot_does_not_crash_inner() -> anyhow::Result<()> { + let (bytes, shallow_root, expected) = three_frontier_shallow_snapshot()?; + let meta = LoroDoc::decode_import_blob_meta(&bytes, false)?; + assert_eq!(meta.start_frontiers, shallow_root); + let imported = LoroDoc::new(); + imported.import(&bytes)?; + + assert!(imported.is_shallow()); + assert_eq!(imported.shallow_since_frontiers(), shallow_root); + assert_eq!(imported.get_deep_value(), expected); + imported.check_state_correctness_slow(); + Ok(()) +} + +#[test] +fn import_random_multi_frontier_shallow_snapshot_does_not_crash() -> anyhow::Result<()> { + const CHILD_ENV: &str = "LORO_IMPORT_RANDOM_MULTI_FRONTIER_SHALLOW_CHILD"; + const TEST_NAME: &str = + "integration_test::shallow_snapshot_test::import_random_multi_frontier_shallow_snapshot_does_not_crash"; + + if std::env::var_os(CHILD_ENV).is_some() { + return import_random_multi_frontier_shallow_snapshot_does_not_crash_inner(); + } + + let output = std::process::Command::new(std::env::current_exe()?) + .arg("--exact") + .arg(TEST_NAME) + .arg("--nocapture") + .env(CHILD_ENV, "1") + .output()?; + + assert!( + output.status.success(), + "importing random multi-frontier shallow snapshots should not crash\nstatus: {}\nstdout:\n{}\nstderr:\n{}", + output.status, + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + Ok(()) +} + +fn import_random_multi_frontier_shallow_snapshot_does_not_crash_inner() -> anyhow::Result<()> { + use rand::{Rng, SeedableRng}; + + let doc = LoroDoc::new(); + doc.set_detached_editing(true); + doc.set_change_merge_interval(0); + let text = doc.get_text("text"); + let list = doc.get_list("list"); + let map = doc.get_map("map"); + let mut rng = rand::rngs::StdRng::seed_from_u64(0x5a11_0cab); + let mut recorded = vec![(doc.state_frontiers(), doc.get_deep_value())]; + + for step in 0..80 { + let base_idx = rng.gen_range(0..recorded.len()); + doc.checkout(&recorded[base_idx].0)?; + doc.set_peer_id((step + 10) as u64)?; + + for _ in 0..rng.gen_range(1..=3) { + match rng.gen_range(0..8) { + 0 | 1 => { + let pos = rng.gen_range(0..=text.len_unicode()); + text.insert(pos, ["a", "b", "中"][rng.gen_range(0..3)])?; + } + 2 => { + if text.len_unicode() > 0 { + text.delete(rng.gen_range(0..text.len_unicode()), 1)?; + } + } + 3 => { + let pos = rng.gen_range(0..=list.len()); + list.insert(pos, step as i32)?; + } + 4 => { + if !list.is_empty() { + list.delete(rng.gen_range(0..list.len()), 1)?; + } + } + 5 | 6 => { + map.insert(&format!("k{}", rng.gen_range(0..12)), step as i32)?; + } + _ => {} + } + } + + doc.commit(); + recorded.push((doc.state_frontiers(), doc.get_deep_value())); + } + + doc.checkout_to_latest(); + let mut checked = 0; + for _ in 0..120 { + let mut target = Frontiers::default(); + for _ in 0..rng.gen_range(2..=5) { + let id = recorded[rng.gen_range(0..recorded.len())].0.iter().next(); + if let Some(id) = id { + target.push(id); + } + } + + let Ok(target) = doc.minimize_frontiers(&target) else { + continue; + }; + if target.is_empty() || target.len() < 2 { + continue; + } + + let latest_frontiers = doc.oplog_frontiers(); + doc.checkout(&latest_frontiers)?; + let latest_value = doc.get_deep_value(); + + doc.checkout(&target)?; + let bytes = doc.export(ExportMode::shallow_snapshot(&target))?; + let meta = LoroDoc::decode_import_blob_meta(&bytes, false)?; + doc.checkout(&meta.start_frontiers)?; + let shallow_root_value = doc.get_deep_value(); + doc.checkout(&target)?; + + let imported = LoroDoc::new(); + imported.import(&bytes)?; + assert_eq!(imported.shallow_since_frontiers(), meta.start_frontiers); + assert_eq!(imported.get_deep_value(), latest_value); + let root_state_only = + imported.export(ExportMode::state_only(Some(&meta.start_frontiers)))?; + let root_doc = LoroDoc::new(); + root_doc.import(&root_state_only)?; + imported.checkout(&meta.start_frontiers)?; + assert_eq!(imported.get_deep_value(), shallow_root_value); + assert_eq!(root_doc.get_deep_value(), shallow_root_value); + imported.checkout_to_latest(); + assert_eq!(imported.get_deep_value(), latest_value); + checked += 1; + } + + assert!(checked > 0); + Ok(()) +} #[test] fn state_only_at_concurrent_frontiers_excludes_later_ops() -> anyhow::Result<()> { @@ -40,6 +257,501 @@ fn state_only_at_concurrent_frontiers_excludes_later_ops() -> anyhow::Result<()> Ok(()) } +#[test] +fn state_only_import_allows_frontiers_that_include_shallow_root() -> anyhow::Result<()> { + let doc = LoroDoc::new(); + doc.set_peer_id(1)?; + doc.set_change_merge_interval(0); + + let text = doc.get_text("text"); + text.insert(0, "root")?; + doc.commit(); + let shallow_root = doc.state_frontiers(); + + doc.set_peer_id(2)?; + text.insert(text.len_unicode(), " latest")?; + doc.commit(); + let latest = doc.state_frontiers(); + let expected = doc.get_deep_value(); + + let target = Frontiers::from([ + shallow_root.as_single().unwrap(), + latest.as_single().unwrap(), + ]); + let bytes = doc.export(ExportMode::state_only(Some(&target)))?; + let new_doc = LoroDoc::new(); + new_doc.import(&bytes)?; + + assert!(new_doc.is_shallow()); + assert_eq!(new_doc.shallow_since_frontiers(), shallow_root); + assert_eq!(new_doc.oplog_frontiers(), latest); + assert_eq!(new_doc.get_deep_value(), expected); + new_doc.check_state_correctness_slow(); + Ok(()) +} + +#[test] +fn checkout_subset_of_multi_frontier_shallow_root_should_error() -> anyhow::Result<()> { + let doc = LoroDoc::new(); + doc.set_detached_editing(true); + + doc.set_peer_id(1)?; + doc.get_text("left").insert(0, "left")?; + doc.commit(); + let left = doc.state_frontiers(); + + doc.checkout(&Frontiers::default())?; + doc.set_peer_id(2)?; + doc.get_text("right").insert(0, "right")?; + doc.commit(); + let right = doc.state_frontiers(); + + let mut shallow_root = left.clone(); + shallow_root.merge_with_greater(&right); + let shallow_root = doc + .minimize_frontiers(&shallow_root) + .expect("frontiers should be reachable"); + assert_eq!(shallow_root.len(), 2); + + doc.checkout(&shallow_root)?; + let bytes = doc.export(ExportMode::shallow_snapshot(&shallow_root))?; + let shallow_doc = LoroDoc::new(); + shallow_doc.import(&bytes)?; + + let subset = Frontiers::from([shallow_root.iter().next().unwrap()]); + assert!(shallow_doc.checkout(&subset).is_err()); + Ok(()) +} + +#[test] +fn frontiers_to_vv_rejects_unrepresentable_shallow_root_versions() -> anyhow::Result<()> { + let (bytes, shallow_root, _) = multi_frontier_shallow_snapshot()?; + let shallow_doc = LoroDoc::new(); + shallow_doc.import(&bytes)?; + + let subset = Frontiers::from([shallow_root.iter().next().unwrap()]); + assert!(shallow_doc.frontiers_to_vv(&Frontiers::default()).is_none()); + assert!(shallow_doc.frontiers_to_vv(&subset).is_none()); + assert!(shallow_doc + .cmp_frontiers(&Frontiers::default(), &shallow_root) + .is_err()); + assert!(shallow_doc.cmp_frontiers(&subset, &shallow_root).is_err()); + assert!(shallow_doc.minimize_frontiers(&subset).is_err()); + assert_eq!( + shallow_doc.cmp_with_frontiers(&Frontiers::default()), + std::cmp::Ordering::Less + ); + assert_eq!( + shallow_doc.cmp_with_frontiers(&subset), + std::cmp::Ordering::Less + ); + + let shallow_root_vv = shallow_doc + .frontiers_to_vv(&shallow_root) + .expect("complete shallow root should be included"); + assert_eq!( + shallow_doc.vv_to_frontiers(&VersionVector::default()), + shallow_root + ); + assert_eq!(shallow_doc.vv_to_frontiers(&shallow_root_vv), shallow_root); + let mut subset_vv = VersionVector::new(); + subset_vv.set_last(subset.as_single().unwrap()); + assert_eq!(shallow_doc.vv_to_frontiers(&subset_vv), shallow_root); + assert_eq!( + shallow_doc + .cmp_frontiers(&shallow_root, &shallow_root) + .expect("complete shallow root should be comparable"), + Some(std::cmp::Ordering::Equal) + ); + Ok(()) +} + +#[test] +fn frontiers_to_vv_rejects_shallow_root_deps() -> anyhow::Result<()> { + let doc = LoroDoc::new(); + doc.set_peer_id(1)?; + doc.get_text("text").insert(0, "abcdef")?; + doc.commit(); + + let shallow_root = Frontiers::from_id(ID::new(1, 3)); + let before_root = Frontiers::from_id(ID::new(1, 2)); + let bytes = doc.export(ExportMode::shallow_snapshot(&shallow_root))?; + let shallow_doc = LoroDoc::new(); + shallow_doc.import(&bytes)?; + + assert_eq!(shallow_doc.shallow_since_frontiers(), shallow_root); + assert!(shallow_doc.checkout(&before_root).is_err()); + assert!(shallow_doc + .export(ExportMode::shallow_snapshot(&before_root)) + .is_err()); + assert!(shallow_doc + .export(ExportMode::state_only(Some(&before_root))) + .is_err()); + assert!(shallow_doc.frontiers_to_vv(&before_root).is_none()); + assert!(shallow_doc + .cmp_frontiers(&before_root, &shallow_root) + .is_err()); + assert!(shallow_doc.minimize_frontiers(&before_root).is_err()); + assert_eq!( + shallow_doc.cmp_with_frontiers(&before_root), + std::cmp::Ordering::Less + ); + Ok(()) +} + +#[test] +fn frontiers_to_vv_rejects_empty_deps_before_initial_shallow_root() -> anyhow::Result<()> { + let doc = LoroDoc::new(); + doc.set_peer_id(1)?; + doc.get_text("text").insert(0, "a")?; + doc.commit(); + let shallow_root = doc.state_frontiers(); + + let bytes = doc.export(ExportMode::shallow_snapshot(&shallow_root))?; + let shallow_doc = LoroDoc::new(); + shallow_doc.import(&bytes)?; + + assert_eq!(shallow_doc.shallow_since_frontiers(), shallow_root); + assert!(shallow_doc.frontiers_to_vv(&Frontiers::default()).is_none()); + assert!(shallow_doc.checkout(&Frontiers::default()).is_err()); + assert!(shallow_doc + .export(ExportMode::state_only(Some(&Frontiers::default()))) + .is_err()); + Ok(()) +} + +#[test] +fn reexport_multi_frontier_shallow_root_snapshot_imports() -> anyhow::Result<()> { + let (bytes, shallow_root, expected) = multi_frontier_shallow_snapshot()?; + let imported = LoroDoc::new(); + imported.import(&bytes)?; + + let reexported = match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + imported.export(ExportMode::shallow_snapshot(&shallow_root)) + })) { + Ok(result) => result?, + Err(_) => { + std::mem::forget(imported); + panic!("re-exporting a multi-frontier shallow root snapshot should not panic"); + } + }; + let imported_again = LoroDoc::new(); + imported_again.import(&reexported)?; + + assert!(imported_again.is_shallow()); + assert_eq!(imported_again.shallow_since_frontiers(), shallow_root); + assert_eq!(imported_again.get_deep_value(), expected); + Ok(()) +} + +#[test] +fn snapshot_export_preserves_multi_frontier_shallow_root() -> anyhow::Result<()> { + let (bytes, shallow_root, expected) = multi_frontier_shallow_snapshot()?; + let imported = LoroDoc::new(); + imported.import(&bytes)?; + + let snapshot = imported.export(ExportMode::Snapshot)?; + let imported_again = LoroDoc::new(); + imported_again.import(&snapshot)?; + + assert!(imported_again.is_shallow()); + assert_eq!(imported_again.shallow_since_frontiers(), shallow_root); + assert_eq!(imported_again.get_deep_value(), expected); + Ok(()) +} + +#[test] +fn state_only_export_preserves_multi_frontier_shallow_root() -> anyhow::Result<()> { + let (bytes, shallow_root, expected) = multi_frontier_shallow_snapshot()?; + let imported = LoroDoc::new(); + imported.import(&bytes)?; + + let state_only = imported.export(ExportMode::state_only(Some(&shallow_root)))?; + let imported_again = LoroDoc::new(); + imported_again.import(&state_only)?; + + assert!(imported_again.is_shallow()); + assert_eq!(imported_again.shallow_since_frontiers(), shallow_root); + assert_eq!(imported_again.get_deep_value(), expected); + Ok(()) +} + +#[test] +fn state_only_multi_frontier_shallow_root_can_accept_local_edits() -> anyhow::Result<()> { + let (bytes, shallow_root, _) = multi_frontier_shallow_snapshot()?; + let imported = LoroDoc::new(); + imported.import(&bytes)?; + let state_only = imported.export(ExportMode::state_only(Some(&shallow_root)))?; + + let edited = LoroDoc::new(); + edited.import(&state_only)?; + edited.set_peer_id(3)?; + edited.get_text("tail").insert(0, "tail")?; + edited.commit(); + + assert!(edited.is_shallow()); + assert_eq!(edited.shallow_since_frontiers(), shallow_root); + assert_eq!(edited.get_text("tail").to_string(), "tail"); + Ok(()) +} + +#[test] +fn state_correctness_check_handles_multi_frontier_shallow_root() -> anyhow::Result<()> { + let (bytes, _, _) = multi_frontier_shallow_snapshot()?; + let imported = LoroDoc::new(); + imported.import(&bytes)?; + + imported.check_state_correctness_slow(); + Ok(()) +} + +#[test] +fn reexport_shallow_snapshot_with_redundant_root_frontier_imports() -> anyhow::Result<()> { + let (bytes, shallow_root, _) = multi_frontier_shallow_snapshot()?; + let imported = LoroDoc::new(); + imported.import(&bytes)?; + imported.set_detached_editing(true); + + imported.checkout(&shallow_root)?; + imported.set_peer_id(3)?; + imported.get_text("tail").insert(0, "tail")?; + imported.commit(); + let tail = imported.state_frontiers(); + let expected = imported.get_deep_value(); + + let mut redundant_target = tail.clone(); + redundant_target.push(shallow_root.iter().next().unwrap()); + let minimized_target = imported + .minimize_frontiers(&redundant_target) + .expect("target should be reachable"); + assert_ne!(minimized_target, redundant_target); + + let snapshot = imported.export(ExportMode::shallow_snapshot(&redundant_target))?; + let imported_again = LoroDoc::new(); + imported_again.import(&snapshot)?; + + assert!(imported_again.is_shallow()); + assert_eq!(imported_again.shallow_since_frontiers(), minimized_target); + assert_eq!(imported_again.get_deep_value(), expected); + assert!(imported_again.frontiers_to_vv(&minimized_target).is_some()); + Ok(()) +} + +#[test] +fn state_only_from_shallow_doc_normalizes_redundant_target_frontiers() -> anyhow::Result<()> { + let (bytes, shallow_root, _) = multi_frontier_shallow_snapshot()?; + let imported = LoroDoc::new(); + imported.import(&bytes)?; + imported.set_detached_editing(true); + + imported.checkout(&shallow_root)?; + imported.set_peer_id(3)?; + imported.get_text("tail").insert(0, "tail")?; + imported.commit(); + let tail = imported.state_frontiers(); + let expected = imported.get_deep_value(); + + let mut redundant_target = tail.clone(); + redundant_target.push(shallow_root.iter().next().unwrap()); + let minimized_target = imported + .minimize_frontiers(&redundant_target) + .expect("target should be reachable"); + assert_eq!(minimized_target, tail); + assert_ne!(minimized_target, redundant_target); + + let state_only = imported.export(ExportMode::state_only(Some(&redundant_target)))?; + let imported_again = LoroDoc::new(); + imported_again.import(&state_only)?; + + assert!(imported_again.is_shallow()); + assert_eq!(imported_again.shallow_since_frontiers(), minimized_target); + assert_eq!(imported_again.get_deep_value(), expected); + imported_again.check_state_correctness_slow(); + Ok(()) +} + +#[test] +fn find_id_spans_between_normalizes_redundant_shallow_doc_frontiers() -> anyhow::Result<()> { + let (bytes, shallow_root, _) = multi_frontier_shallow_snapshot()?; + let imported = LoroDoc::new(); + imported.import(&bytes)?; + imported.set_detached_editing(true); + + imported.checkout(&shallow_root)?; + imported.set_peer_id(3)?; + imported.get_text("tail").insert(0, "tail")?; + imported.commit(); + let tail = imported.state_frontiers(); + + let mut redundant_target = tail.clone(); + redundant_target.push(shallow_root.iter().next().unwrap()); + let minimized_target = imported + .minimize_frontiers(&redundant_target) + .expect("target should be reachable"); + assert_eq!(minimized_target, tail); + + let expected = imported.find_id_spans_between(&shallow_root, &tail); + let actual = imported.find_id_spans_between(&shallow_root, &redundant_target); + + assert_eq!(actual, expected); + assert!(actual.forward.contains_key(&3)); + Ok(()) +} + +#[test] +fn shallow_snapshot_export_normalizes_redundant_target_frontiers() -> anyhow::Result<()> { + let doc = LoroDoc::new(); + doc.set_detached_editing(true); + + doc.set_peer_id(1)?; + doc.get_text("left").insert(0, "left")?; + doc.commit(); + let left = doc.state_frontiers(); + + doc.checkout(&Frontiers::default())?; + doc.set_peer_id(2)?; + doc.get_text("right").insert(0, "right")?; + doc.commit(); + let right = doc.state_frontiers(); + + let mut root = left.clone(); + root.merge_with_greater(&right); + let root = doc + .minimize_frontiers(&root) + .expect("root should be reachable"); + doc.checkout(&root)?; + + doc.set_peer_id(3)?; + doc.get_text("tail").insert(0, "tail")?; + doc.commit(); + let tail = doc.state_frontiers(); + let expected = doc.get_deep_value(); + + let mut redundant_target = tail.clone(); + redundant_target.push(left.as_single().unwrap()); + let minimized_target = doc + .minimize_frontiers(&redundant_target) + .expect("target should be reachable"); + assert_eq!(minimized_target, tail); + assert_ne!(minimized_target, redundant_target); + + let snapshot = doc.export(ExportMode::shallow_snapshot(&redundant_target))?; + let imported = LoroDoc::new(); + imported.import(&snapshot)?; + + assert!(imported.is_shallow()); + assert_eq!(imported.shallow_since_frontiers(), minimized_target); + assert_eq!(imported.get_deep_value(), expected); + Ok(()) +} + +#[test] +fn shallow_doc_with_multi_frontier_root_can_export_concurrent_tail() -> anyhow::Result<()> { + let (bytes, shallow_root, _) = multi_frontier_shallow_snapshot()?; + let imported = LoroDoc::new(); + imported.import(&bytes)?; + imported.set_detached_editing(true); + + imported.checkout(&shallow_root)?; + imported.set_peer_id(3)?; + imported.get_text("tail_a").insert(0, "a")?; + imported.get_tree("tail_tree").create(None)?; + imported.commit(); + let tail_a = imported.state_frontiers(); + + imported.checkout(&shallow_root)?; + imported.set_peer_id(4)?; + imported.get_text("tail_b").insert(0, "b")?; + imported.get_tree("tail_tree").create(None)?; + imported.commit(); + let tail_b = imported.state_frontiers(); + + let mut target = tail_a; + target.merge_with_greater(&tail_b); + let target = imported + .minimize_frontiers(&target) + .expect("tail frontiers should be reachable"); + imported.checkout(&target)?; + let expected = imported.get_deep_value(); + + imported.checkout(&shallow_root)?; + imported.checkout(&target)?; + assert_eq!(imported.get_deep_value(), expected); + + let root_to_target = imported.find_id_spans_between(&shallow_root, &target); + assert!(root_to_target.retreat.is_empty()); + assert!(root_to_target.forward.contains_key(&3)); + assert!(root_to_target.forward.contains_key(&4)); + + let clamped_start_to_target = imported.find_id_spans_between(&Frontiers::default(), &target); + assert_eq!(clamped_start_to_target, root_to_target); + + let target_to_root = imported.find_id_spans_between(&target, &shallow_root); + assert!(target_to_root.forward.is_empty()); + assert!(target_to_root.retreat.contains_key(&3)); + assert!(target_to_root.retreat.contains_key(&4)); + + let target_to_clamped_start = imported.find_id_spans_between(&target, &Frontiers::default()); + assert_eq!(target_to_clamped_start, target_to_root); + + let tail_updates = imported.export(ExportMode::updates_in_range( + root_to_target.get_id_spans_right().collect::>(), + ))?; + let updated_from_root = LoroDoc::new(); + updated_from_root.import(&bytes)?; + updated_from_root.import(&tail_updates)?; + assert_eq!(updated_from_root.get_deep_value(), expected); + + let root_vv = imported + .frontiers_to_vv(&shallow_root) + .expect("shallow root should be included"); + let target_vv = imported + .frontiers_to_vv(&target) + .expect("target should be included"); + let tail_json = imported.export_json_updates(&root_vv, &target_vv); + assert_eq!(tail_json.start_version, shallow_root); + let json_updated_from_root = LoroDoc::new(); + json_updated_from_root.import(&bytes)?; + json_updated_from_root.import_json_updates(tail_json)?; + assert_eq!(json_updated_from_root.get_deep_value(), expected); + + let all_tail_json = imported.export_json_updates(&Default::default(), &target_vv); + assert_eq!(all_tail_json.start_version, shallow_root); + let json_all_updated_from_root = LoroDoc::new(); + json_all_updated_from_root.import(&bytes)?; + json_all_updated_from_root.import_json_updates(all_tail_json)?; + assert_eq!(json_all_updated_from_root.get_deep_value(), expected); + + let bytes = imported.export(ExportMode::shallow_snapshot(&target))?; + let imported_again = LoroDoc::new(); + imported_again.import(&bytes)?; + + assert!(imported_again.is_shallow()); + assert_eq!(imported_again.get_deep_value(), expected); + + let state_only = imported.export(ExportMode::state_only(Some(&target)))?; + let state_only_imported = LoroDoc::new(); + state_only_imported.import(&state_only)?; + + assert!(state_only_imported.is_shallow()); + assert_eq!(state_only_imported.get_deep_value(), expected); + + let latest_state_only = imported.export(ExportMode::state_only(None))?; + let latest_state_only_imported = LoroDoc::new(); + latest_state_only_imported.import(&latest_state_only)?; + + assert!(latest_state_only_imported.is_shallow()); + assert_eq!(latest_state_only_imported.get_deep_value(), expected); + + let snapshot = imported.export(ExportMode::Snapshot)?; + let snapshot_imported = LoroDoc::new(); + snapshot_imported.import(&snapshot)?; + + assert!(snapshot_imported.is_shallow()); + assert_eq!(snapshot_imported.get_deep_value(), expected); + Ok(()) +} + #[test] fn test_gc() -> anyhow::Result<()> { let doc = LoroDoc::new(); @@ -235,6 +947,47 @@ fn test_richtext_gc() -> anyhow::Result<()> { Ok(()) } +#[test] +fn reexport_shallow_doc_at_style_start_advances_to_style_end() -> anyhow::Result<()> { + let doc = LoroDoc::new(); + doc.set_peer_id(1)?; + let text = doc.get_text("text"); + text.insert(0, "1")?; // 0 + text.insert(0, "2")?; // 1 + text.insert(0, "3")?; // 2 + doc.commit(); + text.mark(0..2, "bold", "value")?; // 3, 4 + text.insert(3, "456")?; // 5, 6, 7 + + let bytes = doc.export(loro::ExportMode::shallow_snapshot_since(ID::new(1, 2)))?; + let shallow_doc = LoroDoc::new(); + shallow_doc.import(&bytes)?; + + let reexported = shallow_doc.export(loro::ExportMode::shallow_snapshot_since(ID::new(1, 3)))?; + let imported = LoroDoc::new(); + imported.import(&reexported)?; + + assert_eq!( + imported.shallow_since_frontiers(), + Frontiers::from_id(ID::new(1, 4)) + ); + imported.checkout(&Frontiers::from_id(ID::new(1, 4)))?; + assert_eq!(imported.get_text("text").to_string(), "321"); + imported.checkout_to_latest(); + assert_eq!(imported.get_text("text").to_string(), "321456"); + + let style_start = Frontiers::from_id(ID::new(1, 3)); + let state_only = shallow_doc.export(ExportMode::state_only(Some(&style_start)))?; + let state_only_imported = LoroDoc::new(); + state_only_imported.import(&state_only)?; + assert_eq!( + state_only_imported.shallow_since_frontiers(), + Frontiers::from_id(ID::new(1, 4)) + ); + state_only_imported.check_state_correctness_slow(); + Ok(()) +} + #[test] fn import_updates_depend_on_shallow_history_should_raise_error() -> anyhow::Result<()> { let doc = LoroDoc::new(); diff --git a/crates/loro/tests/integration_test/snapshot_at_test.rs b/crates/loro/tests/integration_test/snapshot_at_test.rs index c9b1d33dd..b87e82789 100644 --- a/crates/loro/tests/integration_test/snapshot_at_test.rs +++ b/crates/loro/tests/integration_test/snapshot_at_test.rs @@ -1,7 +1,7 @@ use std::borrow::Cow; use super::gen_action; -use loro::{ExportMode, LoroDoc}; +use loro::{ExportMode, Frontiers, LoroDoc}; #[test] fn test_snapshot_at_with_multiple_actions() -> anyhow::Result<()> { @@ -65,3 +65,29 @@ fn test_fork_at_target_frontiers() -> anyhow::Result<()> { Ok(()) } + +#[test] +fn snapshot_at_normalizes_redundant_target_frontiers() -> anyhow::Result<()> { + let doc = LoroDoc::new(); + + doc.set_peer_id(1)?; + doc.get_text("text").insert(0, "root")?; + doc.commit(); + let root = doc.state_frontiers(); + + doc.set_peer_id(2)?; + doc.get_text("text").insert(4, " latest")?; + doc.commit(); + let latest = doc.state_frontiers(); + let expected = doc.get_deep_value(); + + let target = Frontiers::from([root.as_single().unwrap(), latest.as_single().unwrap()]); + let snapshot = doc.export(ExportMode::snapshot_at(&target))?; + let imported = LoroDoc::new(); + imported.import(&snapshot)?; + + assert_eq!(imported.oplog_frontiers(), latest); + assert_eq!(imported.get_deep_value(), expected); + imported.check_state_correctness_slow(); + Ok(()) +} diff --git a/crates/loro/tests/loro_rust_test.rs b/crates/loro/tests/loro_rust_test.rs index 42df863df..b104a03a2 100644 --- a/crates/loro/tests/loro_rust_test.rs +++ b/crates/loro/tests/loro_rust_test.rs @@ -18,7 +18,7 @@ use loro::{ event::{Diff, DiffBatch, ListDiffItem}, loro_value, CommitOptions, ContainerID, ContainerTrait, ContainerType, ExportMode, Frontiers, FrontiersNotIncluded, IdSpan, Index, LoroDoc, LoroError, LoroList, LoroMap, LoroMapValue, - LoroStringValue, LoroText, LoroValue, ToJson, TreeParentId, + LoroStringValue, LoroText, LoroValue, ToJson, TreeParentId, UpdateTimeoutError, }; use loro_internal::{ encoding::EncodedBlobMode, fx_map, handler::TextDelta, id::ID, version_range, vv, LoroResult, @@ -1811,6 +1811,24 @@ fn perform_action_on_deleted_container_should_return_error() { assert!(text.is_deleted()); } +#[test] +#[parallel] +fn update_deleted_text_should_return_error() { + let doc = LoroDoc::new(); + let list = doc.get_movable_list("list"); + let text = list.push_container(LoroText::new()).unwrap(); + list.set(0, 1).unwrap(); + + assert!(matches!( + text.update("Hello", Default::default()), + Err(UpdateTimeoutError::ContainerDeleted { .. }) + )); + assert!(matches!( + text.update_by_line("Hello", Default::default()), + Err(UpdateTimeoutError::ContainerDeleted { .. }) + )); +} + #[test] #[parallel] fn checkout_should_reset_container_deleted_cache() { @@ -1825,6 +1843,23 @@ fn checkout_should_reset_container_deleted_cache() { assert!(!text.is_deleted()); } +#[test] +#[parallel] +fn checkout_forward_should_reset_container_deleted_cache() { + let doc = LoroDoc::new(); + let list = doc.get_movable_list("list"); + let text = list.push_container(LoroText::new()).unwrap(); + doc.commit(); + let f = doc.state_frontiers(); + + doc.checkout(&Frontiers::default()).unwrap(); + // This populates the deleted-container cache. In debug builds, is_deleted() + // recomputes and repairs stale entries; release builds return from the cache. + assert!(text.is_deleted()); + doc.checkout(&f).unwrap(); + assert!(!text.is_deleted()); +} + #[test] #[parallel] fn test_fork_at_target_frontiers() { diff --git a/crates/loro/tests/mov.rs b/crates/loro/tests/mov.rs index 4a1655966..f7b9140c2 100644 --- a/crates/loro/tests/mov.rs +++ b/crates/loro/tests/mov.rs @@ -59,3 +59,42 @@ fn conflict_moves() -> Result<(), LoroError> { Ok(()) } + +#[test] +fn checkout_movable_list_multi_op_change_after_snapshot() -> Result<(), LoroError> { + let doc = LoroDoc::new(); + doc.set_peer_id(1)?; + let list = doc.get_movable_list("list"); + list.insert(0, 0)?; + list.insert(1, 1)?; + list.insert(2, 2)?; + doc.commit(); + let base = doc.state_frontiers(); + + doc.set_peer_id(2)?; + list.insert(0, 9)?; + list.mov(1, 3)?; + doc.commit(); + let latest = doc.state_frontiers(); + assert_eq!( + doc.get_deep_value().to_json_value(), + json!({ + "list": [9, 1, 2, 0] + }) + ); + + let restored = LoroDoc::new(); + restored.import(&doc.export(ExportMode::Snapshot)?)?; + restored.checkout(&base)?; + assert_eq!( + restored.get_deep_value().to_json_value(), + json!({ + "list": [0, 1, 2] + }) + ); + + restored.checkout(&latest)?; + assert_eq!(restored.get_deep_value(), doc.get_deep_value()); + + Ok(()) +} diff --git a/crates/loro/tests/panic_test.rs b/crates/loro/tests/panic_test.rs index 7e6f8bb2d..b8bd98d27 100644 --- a/crates/loro/tests/panic_test.rs +++ b/crates/loro/tests/panic_test.rs @@ -3,9 +3,11 @@ #![allow(unexpected_cfgs)] use serial_test::parallel; +use std::mem::ManuallyDrop; +use std::panic::AssertUnwindSafe; use loro::event::{Diff, DiffBatch}; -use loro::json::{JsonChange, JsonOp, JsonOpContent, JsonSchema, MapOp}; +use loro::json::{JsonChange, JsonOp, JsonOpContent, JsonSchema, MapOp, TextOp}; use loro::{CommitOptions, Container, ContainerID, ContainerType, LoroDoc, LoroList, ID}; use loro::{Frontiers, LoroValue}; @@ -252,6 +254,158 @@ fn import_json_updates_with_short_peers_array_no_longer_panics() { let _ = doc.import_json_updates(schema); } +#[test] +#[parallel] +fn import_json_updates_with_text_insert_out_of_bounds_should_error_without_mutating_doc() { + let src = LoroDoc::new(); + src.set_peer_id(31).unwrap(); + src.get_text("text").insert(0, "a").unwrap(); + src.commit(); + + let mut json = src.export_json_updates(&Default::default(), &src.oplog_vv()); + match &mut json.changes[0].ops[0].content { + JsonOpContent::Text(TextOp::Insert { pos, .. }) => { + *pos = 1_000; + } + other => panic!("expected text insert, got {other:?}"), + } + + let dst = LoroDoc::new(); + let result = std::panic::catch_unwind(AssertUnwindSafe(|| dst.import_json_updates(json))); + assert!( + result.is_ok(), + "malformed text JSON import should not panic" + ); + assert!( + result.unwrap().is_err(), + "malformed text JSON import unexpectedly succeeded; imported value = {:?}", + dst.get_deep_value() + ); + assert_eq!(dst.get_deep_value(), LoroValue::Map(Default::default())); +} + +#[test] +#[parallel] +fn import_json_updates_with_text_mark_empty_range_should_error_without_panic() { + let src = LoroDoc::new(); + src.set_peer_id(33).unwrap(); + let text = src.get_text("text"); + text.insert(0, "abc").unwrap(); + src.commit(); + let first = src.export_json_updates(&Default::default(), &src.oplog_vv()); + let first_vv = src.oplog_vv(); + + text.mark(0..2, "bold", true).unwrap(); + src.commit(); + let mut suffix = src.export_json_updates(&first_vv, &src.oplog_vv()); + match &mut suffix.changes[0].ops[0].content { + JsonOpContent::Text(TextOp::Mark { start, end, .. }) => { + *start = 2; + *end = 2; + } + other => panic!("expected text mark, got {other:?}"), + } + + let dst = ManuallyDrop::new(LoroDoc::new()); + dst.import_json_updates(first).unwrap(); + let before_vv = dst.oplog_vv(); + let before_frontiers = dst.oplog_frontiers(); + let before_value = dst.get_deep_value(); + + let result = std::panic::catch_unwind(AssertUnwindSafe(|| dst.import_json_updates(suffix))); + assert!( + result.is_ok(), + "malformed text mark JSON import should not panic" + ); + assert!( + result.unwrap().is_err(), + "malformed text mark JSON import unexpectedly succeeded; imported value = {:?}", + dst.get_deep_value() + ); + assert_eq!(dst.oplog_vv(), before_vv); + assert_eq!(dst.oplog_frontiers(), before_frontiers); + assert_eq!(dst.get_deep_value(), before_value); +} + +#[test] +#[parallel] +fn import_json_updates_with_text_mark_end_without_mark_should_error_without_panic() { + let src = LoroDoc::new(); + src.set_peer_id(34).unwrap(); + let text = src.get_text("text"); + text.insert(0, "abc").unwrap(); + src.commit(); + let first = src.export_json_updates(&Default::default(), &src.oplog_vv()); + let first_vv = src.oplog_vv(); + + text.mark(0..2, "bold", true).unwrap(); + src.commit(); + let mut suffix = src.export_json_updates(&first_vv, &src.oplog_vv()); + match &mut suffix.changes[0].ops[0].content { + content @ JsonOpContent::Text(TextOp::Mark { .. }) => { + *content = JsonOpContent::Text(TextOp::MarkEnd); + } + other => panic!("expected text mark, got {other:?}"), + } + + let dst = ManuallyDrop::new(LoroDoc::new()); + dst.import_json_updates(first).unwrap(); + let before_vv = dst.oplog_vv(); + let before_frontiers = dst.oplog_frontiers(); + let before_value = dst.get_deep_value(); + + let result = std::panic::catch_unwind(AssertUnwindSafe(|| dst.import_json_updates(suffix))); + assert!( + result.is_ok(), + "malformed text MarkEnd JSON import should not panic" + ); + assert!( + result.unwrap().is_err(), + "malformed text MarkEnd JSON import unexpectedly succeeded; imported value = {:?}", + dst.get_deep_value() + ); + assert_eq!(dst.oplog_vv(), before_vv); + assert_eq!(dst.oplog_frontiers(), before_frontiers); + assert_eq!(dst.get_deep_value(), before_value); +} + +#[test] +#[parallel] +fn import_json_updates_with_text_mark_end_counter_gap_should_error_without_panic() { + let src = LoroDoc::new(); + src.set_peer_id(35).unwrap(); + let text = src.get_text("text"); + text.insert(0, "abc").unwrap(); + src.commit(); + let first = src.export_json_updates(&Default::default(), &src.oplog_vv()); + let first_vv = src.oplog_vv(); + + text.mark(0..2, "bold", true).unwrap(); + src.commit(); + let mut suffix = src.export_json_updates(&first_vv, &src.oplog_vv()); + suffix.changes[0].ops[1].counter += 1; + + let dst = ManuallyDrop::new(LoroDoc::new()); + dst.import_json_updates(first).unwrap(); + let before_vv = dst.oplog_vv(); + let before_frontiers = dst.oplog_frontiers(); + let before_value = dst.get_deep_value(); + + let result = std::panic::catch_unwind(AssertUnwindSafe(|| dst.import_json_updates(suffix))); + assert!( + result.is_ok(), + "malformed text MarkEnd counter JSON import should not panic" + ); + assert!( + result.unwrap().is_err(), + "malformed text MarkEnd counter JSON import unexpectedly succeeded; imported value = {:?}", + dst.get_deep_value() + ); + assert_eq!(dst.oplog_vv(), before_vv); + assert_eq!(dst.oplog_frontiers(), before_frontiers); + assert_eq!(dst.get_deep_value(), before_value); +} + // --------------------------------------------------------------------------- // 9. Detached tree methods that used to panic — FIXED // --------------------------------------------------------------------------- diff --git a/plans/20260522-fast-diff-calc.md b/plans/20260522-fast-diff-calc.md new file mode 100644 index 000000000..731ae5b61 --- /dev/null +++ b/plans/20260522-fast-diff-calc.md @@ -0,0 +1,473 @@ +# Fast Diff Calc Tracker Span Routing Plan + +Date: 2026-05-22 + +## Goal + +Improve checkout diff calculation for documents with many peers and many text/list-like containers by avoiding repeated version-vector scanning and repeated empty `IdToCursor` lookups inside each container tracker. + +The target direction is to move tracker checkout APIs away from "checkout to target `VersionVector`" and toward "apply these directed counter spans". The diff calculator should compute or route the relevant spans once, then pass only container-relevant spans into each tracker. + +## Implementation Status + +Implemented on branch `feat/scale-text-checkout-perf`: + +- `c350b0e8 bench: add many text checkout scenario` +- `5c3cd62a refactor: route richtext checkout through spans` +- `91e5ceb6 perf: filter richtext checkout spans by coverage` +- `57bfd675 bench: report checkout span averages` +- `f9fb539d test: compare filtered richtext diff` + +Current implementation covers: + +- Phase 0 profiling counters for tracker spans, filtered spans, skipped spans, max/avg spans per tracker checkout, max/avg affected containers, `IdToCursor::iter` calls, and empty `IdToCursor::iter` calls. +- Phase 1 directed richtext tracker span checkout API, with existing `checkout`, `checkout_causal`, and `diff` APIs kept as adapters. +- Removal of the tracker-only `current_frontier_hint`. +- Phase 2 per-container coverage filtering for text/list/movable-list richtext trackers, with conservative fallback when coverage is unavailable. +- Phase 3 filtered final diff materialization through coverage-aware tracker diff. + +Benchmark notes for `multi-container/latest-to-base` with the default 1000 peers, 10000 changes, 10000 text containers, 8 large text containers, and `LORO_TEXT_CHECKOUT_PROFILE=1`: + +| Version | Time | Avg total | Avg diff calc | Avg tracker checkout | Avg tracker diff | +| --- | ---: | ---: | ---: | ---: | ---: | +| `c350b0e8` baseline | 905.53-908.52 ms | 916.002244 ms | 900.339939 ms | 412.268949 ms | 424.615097 ms | +| current | 823.02-826.74 ms | 832.607089 ms | 816.320180 ms | 467.040713 ms | 281.617226 ms | + +Current profiling counters for the same run: + +- `tracker_spans=377117000` +- `filtered_tracker_spans=123753500` +- `skipped_tracker_spans=253363500` +- `id_to_cursor_iters=123753500` +- `empty_id_to_cursor_iters=123623500` +- `tracker_span_filter_calls=520000` +- `avg_tracker_spans_per_checkout=725` +- `max_tracker_spans_per_checkout=1000` +- `avg_filtered_tracker_spans_per_checkout=237` +- `max_filtered_tracker_spans_per_checkout=1000` +- `avg_diff_containers=10000` +- `max_diff_containers=10000` + +This shows the routing is skipping about two thirds of tracker span checks in the target benchmark. The remaining empty iterator count is still high because the first implementation stores one broad coverage span per `(container, peer)`, which intentionally allows false positives. Phase 4/5 should only be considered if this remaining cost shows up in production profiles. + +## Current Architecture + +The current checkout diff flow is: + +1. `OpLog::iter_from_lca_causally()` finds the LCA between `before` and `after`, computes the merged VV, and iterates changes causally from the LCA to the merged version. +2. `DiffCalculator::calc_diff_internal()` iterates each change/op and calls the per-container calculator. +3. Before applying the first op for a container in each change, the container calculator asks its tracker to checkout to the causal version immediately before that op. +4. `RichtextTracker::checkout_causal(CausalVersion)` computes `retreat` and `forward` spans internally by comparing its current VV against the target causal version. +5. `RichtextTracker::_checkout_spans()` applies the resulting spans by iterating `IdToCursor`. +6. At final diff materialization, `RichtextTracker::diff(from_vv, to_vv)` again does two full tracker checkouts: first to `from`, then to `to` with diff status enabled. + +Important current details: + +- `IdToCursor` is already internally keyed by `PeerID`: `FxHashMap>`. +- Empty `IdToCursor::iter(span)` is cheap for a single call, but expensive when multiplied by many containers and many checkout steps. +- `CounterSpan` already has direction semantics. `start < end` is forward, `end < start` is reversed/retreat. `content_len()` uses absolute length, and `slice()` preserves direction. +- Existing `VersionVectorDiff` uses separate `retreat` and `forward` maps. Its internal `merge()` normalizes spans, so it should not be reused as-is for a single directed-span map API. +- `current_frontier_hint` is only maintained in the tracker today. It is not used as a fast path, and a single frontier hint is not enough to prove full causal equality. + +## Main Performance Problem + +The biggest waste is not just wide VV comparison. It is that a global peer span is handed to every affected text/list-like tracker even when that container has no op in that peer/counter range. + +Example: + +```text +global delta: peer 7, 0..1_000_000 +containers: 1000 LoroText roots +container A has peer 7 ops +container B..Z have no peer 7 ops in that span +``` + +Without container routing, every tracker still checks `id_to_cursor.iter(peer 7, 0..1_000_000)`. + +The proposed cache: + +```rust +FxHashMap> +``` + +is a good first-order way to skip most of these empty checks. + +## Critical Semantic Split + +There are two similar but different structures. They should not be conflated. + +### 1. Persistent Container Coverage Cache + +This answers: + +> Could this container possibly have any op from this peer in this counter range? + +Recommended representation: + +```rust +type ContainerPeerCoverage = FxHashMap>; +``` + +For coverage, spans should be treated as a coarse normalized min/max range. Direction is not meaningful because coverage is independent of checkout direction. + +False positives are allowed: + +```text +container has peer 1 ops at 10 and 1000 +coverage stores 10..1001 +query 500..600 falsely says "maybe" +``` + +False negatives are not allowed. + +### 2. Per-Checkout Directed Delta + +This answers: + +> From this tracker's current visibility state to the target visibility state, which peer/counter spans should be forwarded or retreated? + +Recommended representation: + +```rust +type DirectedPeerSpans = FxHashMap; +``` + +Here `CounterSpan` direction is meaningful: + +```rust +CounterSpan::new(10, 20) // forward 10..20 +CounterSpan::new(20, 10) // retreat 20..10 +``` + +For a single transition, a given peer should only have one direction. If implementation ever needs both directions for the same peer, that means it is combining multiple transitions and must flush or split the delta. + +## Proposed Design + +### New Internal Types + +Start with explicit names even if they are just type aliases initially: + +```rust +type PeerSpanMap = FxHashMap; + +struct ContainerOpCoverage { + by_container: FxHashMap, +} + +struct TrackerCheckoutSpans { + by_peer: PeerSpanMap, +} +``` + +`ContainerOpCoverage` stores broad normalized coverage. `TrackerCheckoutSpans` stores directed per-transition spans. + +Do not encode checkout direction in a persistent coverage cache. Preserve direction only in `TrackerCheckoutSpans`. + +### Span Operations Required + +Add helpers instead of using `CounterSpan::get_intersection()` directly. The existing intersection helper assumes forward spans. + +Needed helpers: + +```rust +fn normalized_overlap(a: CounterSpan, b: CounterSpan) -> Option<(Counter, Counter)>; + +fn intersect_preserve_direction( + directed: CounterSpan, + coverage: CounterSpan, +) -> Option; + +fn extend_coverage(coverage: &mut CounterSpan, op_span: CounterSpan); + +fn merge_directed_delta(existing: &mut CounterSpan, incoming: CounterSpan) -> Result<(), MixedDirection>; +``` + +Rules: + +- Coverage should store normalized min/max ranges. +- Directed delta should preserve `start/end` direction. +- Intersecting a reversed directed span with coverage must return a reversed span. +- Merging directed deltas must reject mixed directions for the same peer in one transition. + +### Tracker API + +Add a new API: + +```rust +impl Tracker { + pub(crate) fn checkout_peer_spans(&mut self, spans: &PeerSpanMap); +} +``` + +Execution rules: + +1. Iterate reversed spans first and run retreat logic. +2. Iterate forward spans second and run forward logic. +3. For `IdToCursor::iter`, use normalized spans internally. +4. Update `current_vv` using the directed span endpoint: + - forward `10..20` sets peer end to `20` + - retreat `20..10` sets peer end to `10` + +Keep adapters temporarily: + +```rust +checkout(&VersionVector) +checkout_causal(CausalVersion) +``` + +These adapters can compute directed spans and call `checkout_peer_spans()`. That keeps the first step behavior-preserving. + +Remove `current_frontier_hint` after `checkout_peer_spans()` is in place. It is not a strong enough invariant and becomes unnecessary. + +### Diff Calculator Routing + +`DiffCalculator` should become responsible for deciding which spans are relevant to a container before calling the tracker. + +For each container calculator, maintain or access `ContainerOpCoverage`. + +When a global directed span is produced: + +```rust +global: peer 7, 1000..2000 +container coverage: peer 7, 1500..1600 +directed for tracker: peer 7, 1500..1600 +``` + +For retreat: + +```rust +global: peer 7, 2000..1000 +coverage: peer 7, 1500..1600 +directed for tracker: peer 7, 1599..1499 or equivalent reversed slice +``` + +The exact reversed boundary helper must be tested carefully against `CounterSpan::contains`, `min`, `max`, and `norm_end`. + +### Where Coverage Comes From + +Preferred first implementation: per-diff-calculation, opportunistic coverage. + +- When a container tracker applies an op, record that op's counter span into the coverage for that container. +- This coverage describes ops already known by that tracker. +- It is sufficient to filter most empty retreat/forward checks because tracker cannot act on op ids it has not seen yet anyway. + +Special cases: + +- Trackers seeded from shallow-root state chunks must also seed coverage for those chunks. Otherwise a later retreat over seeded content could be falsely skipped. +- Delete/move op counters must be recorded. The delete/move may refer to insert spans from other peers, but the version visibility toggle is caused by the delete/move op id itself. +- Style start/end ops must be recorded under their actual op ids, not only text insert ids. + +Longer-term option: a persistent OpLog-level container op coverage index. This may be worth it if building coverage per diff calculation still costs too much, but it increases invalidation and shallow-history complexity. + +## Design Risks + +### Risk 1: False Negatives + +False negatives in coverage are correctness bugs. They can leave tracker rope visibility wrong. + +Mitigation: + +- In debug/test builds, compare the new filtered spans against the old unfiltered checkout for selected cases. +- Add assertions that any `IdToCursor` entry affected by the old global span is included by the filtered span. + +### Risk 2: Direction Loss + +Existing code often normalizes spans: + +- `IdToCursor::iter()` normalizes its input. +- `VersionVectorDiff::merge()` normalizes target spans. +- `IdSpan::ctr_start()` returns normalized start. + +This is fine for lookup, but not for representing a transition. The new directed API must preserve direction until after it updates `current_vv`. + +### Risk 3: Mixed Direction for Same Peer + +For one transition from current to target, a peer cannot both advance and retreat. But if the implementation accumulates spans across multiple transitions before flushing, mixed direction can appear. + +Mitigation: + +- Scope `TrackerCheckoutSpans` to a single target checkout. +- Reject mixed direction in helper code. + +### Risk 4: Final Diff Still Uses Full VV Checkout + +`RichtextTracker::diff(from, to)` currently calls: + +```rust +checkout(from) +checkout(to, on_diff_status = true) +``` + +If only `apply_change()` checkout is optimized, final diff materialization may still scan full VVs for every text container. + +Plan must include `diff_by_spans(from_spans, to_spans)` or equivalent container-filtered final diff checkout. + +### Risk 5: Sliced Changes and Partial Ops + +`calc_diff_internal()` may slice ops when the replay range starts or ends in the middle of a change/op. + +The filtered spans must align with the actual op slice being applied. It is acceptable for coverage to be broader than exact op slices, but it must never omit a sliced op that the tracker can see. + +### Risk 6: Shallow Snapshot / Unknown Chunks + +The current branch can seed richtext trackers from shallow-root state chunks. Coverage seeding must understand those chunks, or shallow checkout may skip spans that correspond to already-seeded tracker entries. + +Unknown chunks and GC/shallow root fallback paths should remain conservative: if coverage cannot be proven, pass the original global span through. + +## Phased Plan + +### Phase 0: Measurement + +Add test-utils profiling counters around tracker checkout: + +- number of global checkout spans +- number of container-filtered spans +- number of spans skipped by coverage +- number of `IdToCursor::iter` calls +- number of empty `IdToCursor::iter` calls +- max/avg peers per checkout +- max/avg affected containers per checkout + +Use current benchmarks that model many peers and many text roots. Keep before/after numbers in benchmark notes. + +### Phase 1: Extract Tracker Span API + +Implement: + +```rust +Tracker::checkout_peer_spans(&PeerSpanMap) +``` + +Keep existing APIs as adapters: + +- `checkout(&VersionVector)` +- `checkout_causal(CausalVersion)` +- `diff(from_vv, to_vv)` + +No routing yet. This phase should be behavior-preserving. + +Remove `current_frontier_hint` in this phase if no longer needed. + +Verification: + +- tracker unit tests +- existing richtext/list/movable-list diff tests +- `cargo test -p loro-internal checkout` +- focused fuzz artifacts that previously hit checkout/diff calc + +### Phase 2: Add Container Coverage and Span Filtering + +Introduce `ContainerOpCoverage` in diff calc or in each container calculator. + +Start with per-diff-calculation coverage: + +- record op spans when a tracker applies an op +- seed coverage when tracker is seeded from existing state chunks +- use coverage to filter global directed spans before calling `checkout_peer_spans` + +Keep fallback conservative: + +- if no coverage exists for a container, use current behavior +- if helper cannot safely preserve direction, use current behavior + +Verification: + +- debug comparison mode: run old unfiltered checkout and new filtered checkout on cloned trackers for small tests +- tests with many containers where only one container has ops in a wide peer span +- tests for reversed/retreat spans +- tests for sliced ops +- tests for delete/move/style ops + +### Phase 3: Container-Filtered Final Diff + +Add tracker diff API that accepts directed/container-filtered spans: + +```rust +Tracker::diff_by_spans(from_delta, to_delta) +``` + +or split it into: + +```rust +checkout_peer_spans(from_spans) +checkout_peer_spans_mark_diff(to_spans) +``` + +This avoids doing full `from_vv` and `to_vv` checkouts for every richtext tracker during final diff materialization. + +Verification: + +- compare final `InternalDiff::RichtextRaw` against old implementation +- include shallow-root seeded trackers +- include multi-frontier checkout + +### Phase 4: Optimize Representation + +Only after benchmarks show the map overhead matters, introduce inline variants: + +```rust +enum PeerSpanSet { + Empty, + One(PeerID, CounterSpan), + Small(SmallVec<[(PeerID, CounterSpan); 4]>), + Map(FxHashMap), +} +``` + +Do not start with this. It adds complexity before proving the basic routing wins. + +### Phase 5: Optional Persistent Coverage Index + +If per-diff coverage construction still costs too much, consider an OpLog/history-cache-level index: + +```rust +ContainerIdx -> PeerID -> CounterSpan coverage +``` + +This must handle: + +- import rollback +- shallow snapshot boundaries +- unknown containers +- history cache invalidation/freeing +- change-store compaction + +Because of those lifecycle risks, keep it as a later optimization. + +## Recommended First PR Scope + +Do not implement the full cache in one PR. + +First PR should: + +1. Introduce directed span helpers with tests. +2. Add `Tracker::checkout_peer_spans`. +3. Make `checkout` and `checkout_causal` delegate to it. +4. Remove `current_frontier_hint`. +5. Add profiling counters for skipped/empty span checks, even if routing is not active yet. + +Second PR should: + +1. Add `ContainerOpCoverage`. +2. Filter checkout spans per container. +3. Keep a conservative fallback path. +4. Add correctness comparison tests. + +This reduces blast radius and gives a clean place to benchmark API refactor vs container routing separately. + +## Open Questions + +1. Should coverage live in `DiffCalculator`, `RichtextDiffCalculator`, or a shared tracker layer? +2. How should shallow-root seeded richtext tracker coverage be initialized for style chunks? +3. Should movable-list use the same tracker span API immediately, or be migrated after richtext/list? +4. Is coarse one-span-per-container-peer enough for the known benchmarks, or do sparse same-peer histories require `SmallVec` later? +5. Should `VersionVectorDiff` be adapted to expose directed spans, or should this remain a separate type to avoid changing existing semantics? + +## Current Recommendation + +Proceed with the span-routing design, but keep two invariants explicit: + +1. Persistent coverage is broad and directionless. +2. Per-checkout deltas are directed and scoped to a single transition. + +This design is more general than `current_frontier_hint`, directly addresses the many-container empty-lookup cost, and can be introduced incrementally with conservative fallback paths.