diff --git a/.gitignore b/.gitignore index 20ff1822e..38723c384 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ trace-*.json loom_test.json .env sponsorkit/.cache.json +.claude/ diff --git a/Cargo.lock b/Cargo.lock index ec39f80b0..948a0a242 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1873,7 +1873,7 @@ checksum = "3f3d053a135388e6b1df14e8af1212af5064746e9b87a06a345a7a779ee9695a" [[package]] name = "loro-wasm" -version = "1.12.0" +version = "1.12.1" dependencies = [ "console_error_panic_hook", "js-sys", diff --git a/crates/fuzz/fuzz/Cargo.toml b/crates/fuzz/fuzz/Cargo.toml index 106ab5018..ee0055016 100644 --- a/crates/fuzz/fuzz/Cargo.toml +++ b/crates/fuzz/fuzz/Cargo.toml @@ -30,6 +30,12 @@ path = "fuzz_targets/all.rs" test = false doc = false +[[bin]] +name = "diff_calc" +path = "fuzz_targets/diff_calc.rs" +test = false +doc = false + [[bin]] name = "gc_fuzz" path = "fuzz_targets/gc_fuzz.rs" diff --git a/crates/fuzz/fuzz/fuzz_targets/diff_calc.rs b/crates/fuzz/fuzz/fuzz_targets/diff_calc.rs new file mode 100644 index 000000000..937c32ea2 --- /dev/null +++ b/crates/fuzz/fuzz/fuzz_targets/diff_calc.rs @@ -0,0 +1,152 @@ +#![no_main] + +use fuzz::{ + actions::ActionWrapper, test_multi_sites, + test_multi_sites_on_one_doc_with_peer_seed_and_targets, Action, FuzzTarget, +}; +use libfuzzer_sys::fuzz_target; +use loro::ContainerType; + +fn lca_biased_actions(actions: Vec) -> Vec { + let mut biased = Vec::with_capacity(actions.len().saturating_mul(2).min(128)); + for (i, action) in actions.into_iter().take(48).enumerate() { + biased.push(action); + + let site = ((i * 37) % 251) as u8; + let other = site.wrapping_add(1); + let version = (i as u32).wrapping_mul(97); + let injected = match i % 8 { + 0 => Action::Sync { + from: site, + to: other, + }, + 1 => Action::DiffApply { + from: site, + to: other, + }, + 2 => Action::Checkout { site, to: version }, + 3 => Action::ForkAt { site, to: version }, + 4 => Action::ImportShallow { site, from: other }, + 5 => Action::ExportShallow { site }, + 6 => Action::StateOnlyRoundTrip { site }, + _ => Action::Commit { site }, + }; + biased.push(injected); + } + + biased +} + +fn run_text_diff_calc(actions: Vec) { + let mut actions = lca_biased_actions(actions); + test_multi_sites(5, vec![FuzzTarget::Text], &mut actions); +} + +fn run_one_doc_diff_calc(actions: Vec) { + let peer_seed = peer_seed_from_actions(&actions); + let mut actions = lca_biased_actions(actions); + test_multi_sites_on_one_doc_with_peer_seed_and_targets( + 5, + peer_seed, + vec![ContainerType::Text], + &mut actions, + ); +} + +fn mix_seed(seed: u64, value: u64) -> u64 { + seed ^ value + .wrapping_add(0x9E37_79B9_7F4A_7C15) + .wrapping_add(seed << 6) + .wrapping_add(seed >> 2) +} + +fn peer_seed_from_actions(actions: &[Action]) -> u64 { + let mut seed = mix_seed(0xD1FF_CA1C_7E57_0001, actions.len() as u64); + for action in actions.iter().take(8) { + seed = match action { + Action::Handle { + site, + target, + container, + action, + } => { + let mut seed = mix_seed(seed, 0); + seed = mix_seed(seed, *site as u64); + seed = mix_seed(seed, *target as u64); + seed = mix_seed(seed, *container as u64); + if let ActionWrapper::Generic(g) = action { + seed = mix_seed(seed, g.bool as u64); + seed = mix_seed(seed, g.key as u64); + seed = mix_seed(seed, g.pos as u64); + seed = mix_seed(seed, g.length as u64); + seed = mix_seed(seed, g.prop); + } + seed + } + Action::Checkout { site, to } => mix_seed(mix_seed(seed, 1), ((*site as u64) << 32) | *to as u64), + Action::Undo { site, op_len } => { + mix_seed(mix_seed(seed, 2), ((*site as u64) << 32) | *op_len as u64) + } + Action::SyncAllUndo { site, op_len } => { + mix_seed(mix_seed(seed, 3), ((*site as u64) << 32) | *op_len as u64) + } + Action::Sync { from, to } => { + mix_seed(mix_seed(seed, 4), ((*from as u64) << 8) | *to as u64) + } + Action::SyncAll => mix_seed(seed, 5), + Action::ForkAt { site, to } => { + mix_seed(mix_seed(seed, 6), ((*site as u64) << 32) | *to as u64) + } + Action::DiffApply { from, to } => { + mix_seed(mix_seed(seed, 7), ((*from as u64) << 8) | *to as u64) + } + Action::Query { + site, + target, + query_type, + } => mix_seed( + mix_seed(seed, 8), + ((*site as u64) << 16) | ((*target as u64) << 8) | *query_type as u64, + ), + Action::ExportShallow { site } => mix_seed(mix_seed(seed, 9), *site as u64), + Action::ImportShallow { site, from } => { + mix_seed(mix_seed(seed, 10), ((*site as u64) << 8) | *from as u64) + } + Action::StateOnlyRoundTrip { site } => mix_seed(mix_seed(seed, 11), *site as u64), + Action::Commit { site } => mix_seed(mix_seed(seed, 12), *site as u64), + Action::SetCommitOptions { site, origin, msg } => mix_seed( + mix_seed(seed, 13), + ((*site as u64) << 16) | ((*origin as u64) << 8) | *msg as u64, + ), + }; + } + seed +} + +fuzz_target!(|actions: Vec| { + if actions.is_empty() { + return; + } + + let use_one_doc = match &actions[0] { + Action::Handle { site, .. } + | Action::Checkout { site, .. } + | Action::Undo { site, .. } + | Action::SyncAllUndo { site, .. } + | Action::ForkAt { site, .. } + | Action::Query { site, .. } + | Action::ExportShallow { site } + | Action::ImportShallow { site, .. } + | Action::StateOnlyRoundTrip { site } + | Action::Commit { site } + | Action::SetCommitOptions { site, .. } => site % 2 == 1, + Action::Sync { from, .. } | Action::DiffApply { from, .. } => from % 2 == 1, + Action::SyncAll => false, + }; + + if use_one_doc { + run_one_doc_diff_calc(actions); + } else { + run_text_diff_calc(actions); + } +}); diff --git a/crates/fuzz/src/container/text.rs b/crates/fuzz/src/container/text.rs index 68979bb55..14fc16f9b 100644 --- a/crates/fuzz/src/container/text.rs +++ b/crates/fuzz/src/container/text.rs @@ -83,7 +83,7 @@ impl ActorTrait for TextActor { .unwrap() .text .to_delta(); - assert_eq!(value, text_h); + assert_eq!(value, text_h, "peer={}", loro.peer_id()); } fn add_new_container(&mut self, container: Container) { diff --git a/crates/fuzz/src/container/tree.rs b/crates/fuzz/src/container/tree.rs index 97576b6cb..eba9b280a 100644 --- a/crates/fuzz/src/container/tree.rs +++ b/crates/fuzz/src/container/tree.rs @@ -333,7 +333,9 @@ impl Actionable for TreeAction { peer: before.0, counter: before.1, }; - super::unwrap(tree.mov_before(target, before)); + if let Err(LoroError::TreeError(e)) = tree.mov_before(target, before) { + tracing::warn!("move before error {}", e); + } None } TreeActionInner::MoveAfter { target, after } => { @@ -345,7 +347,9 @@ impl Actionable for TreeAction { peer: after.0, counter: after.1, }; - super::unwrap(tree.mov_after(target, after)); + if let Err(LoroError::TreeError(e)) = tree.mov_after(target, after) { + tracing::warn!("move after error {}", e); + } None } TreeActionInner::Meta { meta: (k, v) } => { @@ -362,7 +366,7 @@ impl Actionable for TreeAction { } TreeActionInner::MetaDelete { key } => { let meta = super::unwrap(tree.get_meta(target))?; - meta.delete(key); + let _ = meta.delete(key); None } TreeActionInner::MetaClear => { diff --git a/crates/fuzz/src/crdt_fuzzer.rs b/crates/fuzz/src/crdt_fuzzer.rs index 269043e18..9ae6d14fc 100644 --- a/crates/fuzz/src/crdt_fuzzer.rs +++ b/crates/fuzz/src/crdt_fuzzer.rs @@ -264,7 +264,11 @@ impl CRDTFuzzer { let a_frontiers = a.loro.oplog_frontiers(); let b_frontiers = b.loro.oplog_frontiers(); if let Ok(diff) = a.loro.diff(&a_frontiers, &b_frontiers) { - let _ = b.loro.apply_diff(diff); + let before_apply = b.loro.state_frontiers(); + let result = b.loro.apply_diff(diff); + if result.is_ok() || b.loro.state_frontiers() != before_apply { + b.loro.commit(); + } } } Action::Query { @@ -427,7 +431,18 @@ impl CRDTFuzzer { if let Ok(bytes) = actor.loro.export(loro::ExportMode::state_only(Some(&f))) { let new_doc = LoroDoc::new(); if new_doc.import(&bytes).is_ok() { - assert_eq!(new_doc.get_deep_value(), actor.loro.get_deep_value()); + assert_eq!( + new_doc.get_deep_value(), + actor.loro.get_deep_value(), + "site={site} state_frontiers={:?} oplog_frontiers={:?} oplog_vv={:?} imported_frontiers={:?} imported_vv={:?} shallow_frontiers={:?} shallow_vv={:?}", + actor.loro.state_frontiers(), + actor.loro.oplog_frontiers(), + actor.loro.oplog_vv(), + new_doc.oplog_frontiers(), + new_doc.oplog_vv(), + new_doc.shallow_since_frontiers(), + new_doc.shallow_since_vv(), + ); } } } @@ -463,8 +478,8 @@ impl CRDTFuzzer { if a_shallow || b_shallow { continue; } - let a_doc = &mut a.loro; - let b_doc = &mut b.loro; + let a_doc = &a.loro; + let b_doc = &b.loro; info_span!("Attach", peer = i).in_scope(|| { a_doc.attach(); }); diff --git a/crates/fuzz/src/lib.rs b/crates/fuzz/src/lib.rs index dacdf5456..e02e2d59a 100644 --- a/crates/fuzz/src/lib.rs +++ b/crates/fuzz/src/lib.rs @@ -15,4 +15,7 @@ pub use mem_kv_fuzzer::{ minify_simple as kv_minify_simple, test_mem_kv_fuzzer, test_random_bytes_import, Action as KVAction, }; -pub use one_doc_fuzzer::test_multi_sites_on_one_doc; +pub use one_doc_fuzzer::{ + test_multi_sites_on_one_doc, test_multi_sites_on_one_doc_with_peer_seed, + test_multi_sites_on_one_doc_with_peer_seed_and_targets, +}; diff --git a/crates/fuzz/src/one_doc_fuzzer.rs b/crates/fuzz/src/one_doc_fuzzer.rs index 4f537cc2f..357cd5011 100644 --- a/crates/fuzz/src/one_doc_fuzzer.rs +++ b/crates/fuzz/src/one_doc_fuzzer.rs @@ -13,19 +13,67 @@ struct OneDocFuzzer { doc: LoroDoc, branches: Vec, undo_managers: Vec, + peer_seed: u64, + valid_targets: Vec, +} + +fn default_targets() -> Vec { + vec![ + ContainerType::Text, + ContainerType::List, + ContainerType::Map, + ContainerType::MovableList, + ContainerType::Tree, + ContainerType::Counter, + ] } impl OneDocFuzzer { - pub fn new(site_num: usize) -> Self { + pub fn new_with_peer_seed_and_targets( + site_num: usize, + peer_seed: u64, + valid_targets: Vec, + ) -> Self { + assert!(!valid_targets.is_empty()); let doc = LoroDoc::new(); doc.set_detached_editing(true); - Self { + let mut fuzzer = Self { doc: doc.clone(), branches: (0..site_num).map(|_| Branch::default()).collect(), undo_managers: (0..site_num).map(|_| UndoManager::new(&doc)).collect(), + peer_seed, + valid_targets, + }; + fuzzer.use_next_peer_id(); + fuzzer + } + + fn next_peer_id(&mut self) -> u64 { + self.peer_seed = self.peer_seed.wrapping_add(0x9E37_79B9_7F4A_7C15); + let mut z = self.peer_seed; + z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9); + z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB); + let peer = z ^ (z >> 31); + if peer == u64::MAX { + 0 + } else { + peer } } + fn use_next_peer_id(&mut self) { + let peer = self.next_peer_id(); + self.doc.set_peer_id(peer).unwrap(); + } + + fn checkout(&mut self, frontiers: &Frontiers) -> loro::LoroResult<()> { + let result = self.doc.checkout(frontiers); + if result.is_ok() { + self.use_next_peer_id(); + } + result + } + fn site_num(&self) -> usize { self.branches.len() } @@ -53,17 +101,11 @@ impl OneDocFuzzer { } *site %= max_users; let branch = &mut self.branches[*site as usize]; - let valid_targets = [ - ContainerType::Text, - ContainerType::List, - ContainerType::Map, - ContainerType::MovableList, - ContainerType::Tree, - ContainerType::Counter, - ]; - *target %= valid_targets.len() as u8; - action.convert_to_inner(&valid_targets[*target as usize]); - self.doc.checkout(&branch.frontiers).unwrap(); + *target %= self.valid_targets.len() as u8; + let target_ty = self.valid_targets[*target as usize]; + action.convert_to_inner(&target_ty); + let frontiers = branch.frontiers.clone(); + self.checkout(&frontiers).unwrap(); if let Some(action) = action.as_action_mut() { match action { crate::actions::ActionInner::Map(..) => {} @@ -367,15 +409,7 @@ impl OneDocFuzzer { Action::Query { site, target, .. } => { *site %= max_users; // target maps to container type index - let valid_targets = [ - ContainerType::Text, - ContainerType::List, - ContainerType::Map, - ContainerType::MovableList, - ContainerType::Tree, - ContainerType::Counter, - ]; - *target %= valid_targets.len() as u8; + *target %= self.valid_targets.len() as u8; } Action::ExportShallow { site } => { *site %= max_users; @@ -402,9 +436,9 @@ impl OneDocFuzzer { fn apply_action(&mut self, action: &mut Action) { match action { Action::Handle { site, action, .. } => { + let branch_frontiers = self.branches[*site as usize].frontiers.clone(); + self.checkout(&branch_frontiers).unwrap(); let doc = &mut self.doc; - let branch = &mut self.branches[*site as usize]; - doc.checkout(&branch.frontiers).unwrap(); match action { ActionWrapper::Action(action_inner) => match action_inner { crate::actions::ActionInner::Map(map_action) => match map_action { @@ -551,7 +585,11 @@ impl OneDocFuzzer { peer: before.0, counter: before.1, }; - tree.mov_before(target, before).unwrap(); + if let Err(LoroError::TreeError(e)) = + tree.mov_before(target, before) + { + tracing::warn!("move before error {}", e); + } } crate::container::TreeActionInner::MoveAfter { target, after } => { let target = TreeID { @@ -562,7 +600,11 @@ impl OneDocFuzzer { peer: after.0, counter: after.1, }; - tree.mov_after(target, after).unwrap(); + if let Err(LoroError::TreeError(e)) = + tree.mov_after(target, after) + { + tracing::warn!("move after error {}", e); + } } crate::container::TreeActionInner::Meta { meta: (k, v) } => { let meta = tree.get_meta(target).unwrap(); @@ -615,12 +657,11 @@ impl OneDocFuzzer { } } Action::Undo { site, op_len } => { - let undo = &mut self.undo_managers[*site as usize]; let undo_len = *op_len % 16; - if undo_len != 0 && undo.can_undo() { - self.doc - .checkout(&self.branches[*site as usize].frontiers) - .unwrap(); + if undo_len != 0 && self.undo_managers[*site as usize].can_undo() { + let branch_frontiers = self.branches[*site as usize].frontiers.clone(); + self.checkout(&branch_frontiers).unwrap(); + let undo = &mut self.undo_managers[*site as usize]; for _ in 0..undo_len { undo.undo().unwrap(); } @@ -633,12 +674,11 @@ impl OneDocFuzzer { for b in self.branches.iter_mut() { b.frontiers = f.clone(); } - let undo = &mut self.undo_managers[*site as usize]; let undo_len = *op_len % 8; - if undo_len != 0 && undo.can_undo() { - self.doc - .checkout(&self.branches[*site as usize].frontiers) - .unwrap(); + if undo_len != 0 && self.undo_managers[*site as usize].can_undo() { + let branch_frontiers = self.branches[*site as usize].frontiers.clone(); + self.checkout(&branch_frontiers).unwrap(); + let undo = &mut self.undo_managers[*site as usize]; for _ in 0..undo_len { undo.undo().unwrap(); } @@ -646,7 +686,7 @@ impl OneDocFuzzer { undo.clear(); } } - Action::ForkAt { site, to } => { + Action::ForkAt { site, to: _ } => { let frontiers = self.branches[*site as usize].frontiers.clone(); let _forked = self.doc.fork_at(&frontiers); } @@ -654,7 +694,11 @@ impl OneDocFuzzer { let from_frontiers = self.branches[*from as usize].frontiers.clone(); let to_frontiers = self.branches[*to as usize].frontiers.clone(); if let Ok(diff) = self.doc.diff(&from_frontiers, &to_frontiers) { - let _ = self.doc.apply_diff(diff); + let before_apply = self.doc.state_frontiers(); + let result = self.doc.apply_diff(diff); + if result.is_ok() || self.doc.state_frontiers() != before_apply { + self.doc.commit(); + } } } Action::Query { @@ -662,17 +706,9 @@ impl OneDocFuzzer { target, query_type, } => { - let branch = &self.branches[*site as usize]; - self.doc.checkout(&branch.frontiers).unwrap(); - let valid_targets = [ - ContainerType::Text, - ContainerType::List, - ContainerType::Map, - ContainerType::MovableList, - ContainerType::Tree, - ContainerType::Counter, - ]; - let ty = valid_targets[*target as usize % valid_targets.len()]; + let branch_frontiers = self.branches[*site as usize].frontiers.clone(); + self.checkout(&branch_frontiers).unwrap(); + let ty = self.valid_targets[*target as usize % self.valid_targets.len()]; match ty { ContainerType::Text => { let text = self.doc.get_text("text"); @@ -800,8 +836,8 @@ impl OneDocFuzzer { } } Action::ExportShallow { site } => { - let branch = &self.branches[*site as usize]; - self.doc.checkout(&branch.frontiers).unwrap(); + let branch_frontiers = self.branches[*site as usize].frontiers.clone(); + self.checkout(&branch_frontiers).unwrap(); let f = self.doc.oplog_frontiers(); if !f.is_empty() { let _ = self.doc.export(loro::ExportMode::shallow_snapshot(&f)); @@ -809,25 +845,38 @@ impl OneDocFuzzer { } Action::ImportShallow { site, from } => { let from_frontiers = self.branches[*from as usize].frontiers.clone(); - self.doc.checkout(&from_frontiers).unwrap(); + self.checkout(&from_frontiers).unwrap(); let f = self.doc.oplog_frontiers(); if !f.is_empty() { if let Ok(bytes) = self.doc.export(loro::ExportMode::shallow_snapshot(&f)) { let site_frontiers = self.branches[*site as usize].frontiers.clone(); - self.doc.checkout(&site_frontiers).unwrap(); + self.checkout(&site_frontiers).unwrap(); let _ = self.doc.import(&bytes); } } } Action::StateOnlyRoundTrip { site } => { - let branch = &self.branches[*site as usize]; - self.doc.checkout(&branch.frontiers).unwrap(); + let branch_frontiers = self.branches[*site as usize].frontiers.clone(); + self.checkout(&branch_frontiers).unwrap(); let f = self.doc.state_frontiers(); if !f.is_empty() { if let Ok(bytes) = self.doc.export(loro::ExportMode::state_only(Some(&f))) { let new_doc = LoroDoc::new(); if new_doc.import(&bytes).is_ok() { - assert_eq!(new_doc.get_deep_value(), self.doc.get_deep_value()); + let imported = new_doc.get_deep_value(); + let current = self.doc.get_deep_value(); + assert_eq!( + imported, + current, + "site={site} state_frontiers={:?} oplog_frontiers={:?} oplog_vv={:?} imported_frontiers={:?} imported_vv={:?} shallow_frontiers={:?} shallow_vv={:?}", + self.doc.state_frontiers(), + self.doc.oplog_frontiers(), + self.doc.oplog_vv(), + new_doc.oplog_frontiers(), + new_doc.oplog_vv(), + new_doc.shallow_since_frontiers(), + new_doc.shallow_since_vv() + ); } } } @@ -836,8 +885,8 @@ impl OneDocFuzzer { self.doc.commit(); } Action::SetCommitOptions { site, origin, msg } => { - let branch = &self.branches[*site as usize]; - self.doc.checkout(&branch.frontiers).unwrap(); + let branch_frontiers = self.branches[*site as usize].frontiers.clone(); + self.checkout(&branch_frontiers).unwrap(); let origins = ["fuzz", "test", "a", "b", "c", "d", "e", "f"]; self.doc .set_next_commit_origin(origins[*origin as usize % origins.len()]); @@ -848,18 +897,43 @@ impl OneDocFuzzer { } } - fn check_sync(&self) { + fn check_sync(&mut self) { self.doc.checkout_to_latest(); + self.use_next_peer_id(); self.doc.check_state_correctness_slow(); - for b in self.branches.iter() { - self.doc.checkout(&b.frontiers).unwrap(); + for i in 0..self.branches.len() { + let frontiers = self.branches[i].frontiers.clone(); + self.checkout(&frontiers).unwrap(); self.doc.check_state_correctness_slow(); } } } pub fn test_multi_sites_on_one_doc(site_num: u8, actions: &mut [Action]) { - let mut fuzzer = OneDocFuzzer::new(site_num as usize); + test_multi_sites_on_one_doc_with_peer_seed(site_num, 0, actions); +} + +pub fn test_multi_sites_on_one_doc_with_peer_seed( + site_num: u8, + peer_seed: u64, + actions: &mut [Action], +) { + test_multi_sites_on_one_doc_with_peer_seed_and_targets( + site_num, + peer_seed, + default_targets(), + actions, + ); +} + +pub fn test_multi_sites_on_one_doc_with_peer_seed_and_targets( + site_num: u8, + peer_seed: u64, + valid_targets: Vec, + actions: &mut [Action], +) { + let mut fuzzer = + OneDocFuzzer::new_with_peer_seed_and_targets(site_num as usize, peer_seed, valid_targets); let mut applied = Vec::new(); for action in actions.iter_mut() { fuzzer.pre_process(action); diff --git a/crates/fuzz/tests/crash_repro.rs b/crates/fuzz/tests/crash_repro.rs index e31f80258..b172778fd 100644 --- a/crates/fuzz/tests/crash_repro.rs +++ b/crates/fuzz/tests/crash_repro.rs @@ -21,6 +21,45 @@ fn repro_crash_6044ee() { run_from_bytes(&bytes); } +/// Regression for branch-specific state divergence on MovableList. +/// +/// The diff calculator must rebuild MovableList's tracker with movable-list +/// semantics when the source version is not fully represented by the replayed op +/// context. Reusing List's insert/delete-only rebuild path is not enough. +#[test] +fn repro_crash_lca_movable_list_divergence() { + let bytes = [ + 0x21, 0x25, 0x80, 0x00, 0x01, 0x00, 0x07, 0x11, 0x11, 0x2f, 0xdf, 0x2f, 0x00, 0x2f, 0x00, + 0x01, 0x00, 0x00, 0x2f, 0x00, 0x01, 0x00, 0x2f, 0x2f, 0x2f, 0x2b, 0x2f, 0x2f, 0x2f, 0x03, + 0x73, 0x76, 0x00, 0x00, 0x76, 0xa4, 0x82, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0x4b, 0x70, 0xdf, + 0xdf, 0xdf, 0xdf, 0xdf, 0x00, 0xdf, 0xdf, 0xdf, 0xdf, 0xdf, 0xa4, 0x82, 0xbf, 0xbf, 0xbf, + 0x4b, 0x70, 0xdf, 0xdf, 0xdf, 0xdf, 0xdf, 0x00, 0xdf, 0xdf, 0xdf, 0xdf, 0xdf, 0xdf, 0xdf, + 0xdf, 0x30, 0xdf, 0xdf, 0xdf, 0xdf, 0xdf, 0xdf, 0xdf, 0x17, 0xdf, 0xdf, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x3d, 0x3d, 0x3d, 0x80, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0xdf, 0xdf, 0xdf, 0xdf, + ]; + run_from_bytes(&bytes); +} + +/// Manual guard for the old naive fix that widened List's rebuild gate to +/// include `MovableList`. +/// +/// That path panicked because List's rebuild helper only understands +/// insert/delete ops, while MovableList also has move/set ops. +#[test] +#[ignore = "manual guard for the old naive MovableList rebuild path"] +fn repro_crash_lca_movable_list_unreachable_after_naive_fix() { + let bytes = [ + 0x97, 0x76, 0x00, 0x01, 0x01, 0x2e, 0x01, 0x26, 0x28, 0x20, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, + 0x00, 0xbf, 0x01, 0x07, 0x00, 0x00, 0x01, 0x89, 0x89, 0x89, 0x89, 0x89, 0x89, 0x89, 0x89, + 0x89, 0x89, 0x89, 0x89, 0x89, 0x89, 0x89, 0x89, 0x89, 0x89, 0x89, 0x89, 0x89, 0x00, 0x07, + 0x3f, 0xbf, 0x00, 0x00, 0x00, 0x01, 0x01, 0x2e, 0x01, 0x26, 0x28, 0x20, 0xbf, 0xbf, 0xbf, + 0xbf, 0xbf, 0x00, 0xbf, 0x00, 0x07, 0x3f, 0xbf, 0x00, 0x00, 0xbf, 0x00, 0x00, 0x00, 0xbf, + 0xbf, 0xbf, 0x2f, 0x2f, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0x00, + ]; + run_from_bytes(&bytes); +} + #[test] #[ignore = "run manually with cargo test -- --ignored"] fn repro_crash_b2b3d9() { diff --git a/crates/fuzz/tests/test.rs b/crates/fuzz/tests/test.rs index 41964a5d3..73bbb488c 100644 --- a/crates/fuzz/tests/test.rs +++ b/crates/fuzz/tests/test.rs @@ -15,11 +15,12 @@ use fuzz::{ FuzzTarget, FuzzValue::*, }, - fuzz_local_events, test_multi_sites_on_one_doc, test_multi_sites_with_gc, + fuzz_local_events, test_multi_sites_on_one_doc, test_multi_sites_on_one_doc_with_peer_seed, + test_multi_sites_with_gc, }; -use loro::{ContainerType::*, ExportMode, LoroCounter, LoroDoc}; +use loro::{ContainerID, ContainerType::*, ExportMode, LoroCounter, LoroDoc, LoroError}; use loro_without_counter::ContainerTrait as _; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; #[ctor::ctor] fn init() { @@ -136,6 +137,67 @@ fn all_fuzz_state_only_before_shallow_root() { ) } +#[test] +fn one_doc_tree_move_before_cycle_error_is_ignored() { + test_multi_sites_on_one_doc( + 5, + &mut [ + Handle { + site: 0, + target: 4, + container: 0, + action: Generic(GenericAction { + value: I32(0), + bool: false, + key: 0, + pos: 0, + length: 0, + prop: 0, + }), + }, + Handle { + site: 0, + target: 4, + container: 0, + action: Generic(GenericAction { + value: I32(0), + bool: false, + key: 0, + pos: 0, + length: 0, + prop: 0, + }), + }, + Handle { + site: 0, + target: 4, + container: 0, + action: Generic(GenericAction { + value: I32(0), + bool: false, + key: 0, + pos: 1, + length: 0, + prop: 2, + }), + }, + Handle { + site: 0, + target: 4, + container: 0, + action: Generic(GenericAction { + value: I32(0), + bool: false, + key: 0, + pos: 0, + length: 1, + prop: 4, + }), + }, + ], + ) +} + #[test] fn test_local_events() { fuzz_local_events(vec![ @@ -8692,6 +8754,1446 @@ fn diff_calc_fuzz_err_3() { ) } +#[test] +fn diff_calc_text_lca_biased_regression() { + fn text_handle(site: u8, prop: u64, pos: usize, length: usize) -> Action { + Handle { + site, + target: 0, + container: 0, + action: Generic(GenericAction { + value: I32(length as i32), + bool: false, + key: prop as u32, + pos, + length, + prop, + }), + } + } + + test_multi_sites( + 5, + vec![FuzzTarget::Text], + &mut [ + text_handle(0, 0, 0, 10), + text_handle(0, 0, 1, 20), + Sync { from: 0, to: 1 }, + Sync { from: 0, to: 2 }, + text_handle(0, 1, 1, 1), + text_handle(0, 2, 0, 2), + text_handle(1, 0, 0, 100), + text_handle(1, 7, 1, 3), + text_handle(2, 0, 0, 200), + text_handle(2, 4, 0, 300), + DiffApply { from: 0, to: 1 }, + Checkout { site: 1, to: 1 }, + ForkAt { site: 2, to: 2 }, + ImportShallow { site: 3, from: 0 }, + DiffApply { from: 3, to: 1 }, + Sync { from: 0, to: 3 }, + text_handle(3, 6, 0, 1), + ExportShallow { site: 1 }, + StateOnlyRoundTrip { site: 0 }, + SyncAll, + DiffApply { from: 4, to: 0 }, + SyncAll, + ], + ) +} + +fn diff_calc_lca_biased_actions(actions: Vec) -> Vec { + let mut biased = Vec::with_capacity(actions.len().saturating_mul(2).min(128)); + for (i, action) in actions.into_iter().take(48).enumerate() { + biased.push(action); + + let site = ((i * 37) % 251) as u8; + let other = site.wrapping_add(1); + let version = (i as u32).wrapping_mul(97); + let injected = match i % 8 { + 0 => Action::Sync { + from: site, + to: other, + }, + 1 => Action::DiffApply { + from: site, + to: other, + }, + 2 => Action::Checkout { site, to: version }, + 3 => Action::ForkAt { site, to: version }, + 4 => Action::ImportShallow { site, from: other }, + 5 => Action::ExportShallow { site }, + 6 => Action::StateOnlyRoundTrip { site }, + _ => Action::Commit { site }, + }; + biased.push(injected); + } + + biased +} + +#[test] +fn diff_calc_text_shallow_partial_delete_state_only_regression() { + let mut actions = diff_calc_lca_biased_actions(vec![ + SyncAll, + Handle { + site: 0, + target: 0, + container: 107, + action: Generic(GenericAction { + value: Container(Map), + bool: true, + key: 2981738929, + pos: 234368343062969, + length: 10489240066702020864, + prop: 10510998633284997521, + }), + }, + DiffApply { from: 145, to: 145 }, + SyncAllUndo { + site: 145, + op_len: 2442236305, + }, + DiffApply { from: 145, to: 145 }, + DiffApply { from: 145, to: 107 }, + SyncAll, + Handle { + site: 0, + target: 255, + container: 255, + action: Generic(GenericAction { + value: Container(Unknown(0)), + bool: false, + key: 4294967253, + pos: 13961544589419610111, + length: 10489324899457679809, + prop: 10489238200097457195, + }), + }, + Commit { site: 145 }, + DiffApply { from: 145, to: 222 }, + DiffApply { from: 145, to: 145 }, + DiffApply { from: 145, to: 145 }, + DiffApply { from: 222, to: 145 }, + DiffApply { from: 145, to: 145 }, + SyncAllUndo { + site: 145, + op_len: 2442236305, + }, + DiffApply { from: 145, to: 145 }, + DiffApply { from: 145, to: 107 }, + SyncAll, + Handle { + site: 0, + target: 255, + container: 255, + action: Generic(GenericAction { + value: Container(Counter), + bool: true, + key: 4294967295, + pos: 18446675634996717601, + length: 940423290353090559, + prop: 940422246894996749, + }), + }, + DiffApply { from: 145, to: 145 }, + Commit { site: 145 }, + DiffApply { from: 145, to: 145 }, + DiffApply { from: 37, to: 107 }, + Handle { + site: 0, + target: 0, + container: 51, + action: Generic(GenericAction { + value: I32(0), + bool: false, + key: 0, + pos: 0, + length: 0, + prop: 0, + }), + }, + ]); + + test_multi_sites(5, vec![FuzzTarget::Text], &mut actions) +} + +#[test] +fn diff_calc_text_state_only_target_state_delta_regression() { + let mut actions = diff_calc_lca_biased_actions(vec![ + Handle { + site: 50, + target: 11, + container: 11, + action: Generic(GenericAction { + value: Container(Tree), + bool: true, + key: 1074771381, + pos: 11619076483642895195, + length: 10923366098549577594, + prop: 14250796595886039674, + }), + }, + Handle { + site: 0, + target: 48, + container: 102, + action: Generic(GenericAction { + value: I32(0), + bool: true, + key: 2644362693, + pos: 11357407136651739293, + length: 1683503609540921722, + prop: 7382525687876616192, + }), + }, + DiffApply { from: 39, to: 190 }, + SetCommitOptions { + site: 255, + origin: 254, + msg: 249, + }, + DiffApply { from: 11, to: 11 }, + Handle { + site: 121, + target: 121, + container: 121, + action: Generic(GenericAction { + value: Container(List), + bool: true, + key: 1802201899, + pos: 13910329915888855935, + length: 7740398493640649563, + prop: 4340356560781419, + }), + }, + SyncAll, + SyncAll, + SyncAll, + SyncAll, + Handle { + site: 3, + target: 136, + container: 107, + action: Generic(GenericAction { + value: Container(Text), + bool: false, + key: 1536884736, + pos: 1080864967130854165, + length: 313532612608, + prop: 18446659501508528384, + }), + }, + Handle { + site: 52, + target: 52, + container: 52, + action: Generic(GenericAction { + value: I32(589824), + bool: false, + key: 589824, + pos: 269380348805120, + length: 3761688987579973647, + prop: 150994944, + }), + }, + Handle { + site: 0, + target: 0, + container: 0, + action: Generic(GenericAction { + value: I32(65535), + bool: true, + key: 3318072772, + pos: 18430354503300924869, + length: 4467548590027359531, + prop: 9476562491464173885, + }), + }, + DiffApply { from: 131, to: 131 }, + SyncAll, + Handle { + site: 0, + target: 0, + container: 0, + action: Generic(GenericAction { + value: I32(0), + bool: false, + key: 0, + pos: 0, + length: 0, + prop: 0, + }), + }, + ]); + + test_multi_sites(5, vec![FuzzTarget::Text], &mut actions) +} + +#[test] +fn diff_calc_text_lca_crash_246468929277f72f() { + test_multi_sites( + 5, + vec![FuzzTarget::Text], + &mut [ + Handle { + site: 46, + target: 32, + container: 130, + action: Generic(GenericAction { + value: I32(939524287), + bool: false, + key: 4294967287, + pos: 10016005571271983106, + length: 18446742974198019702, + prop: 3387553543993819034, + }), + }, + Sync { from: 0, to: 1 }, + Handle { + site: 2, + target: 243, + container: 255, + action: Generic(GenericAction { + value: Container(Map), + bool: false, + key: 3570717697, + pos: 144680349937369088, + length: 18446464806379717122, + prop: 513691898498514943, + }), + }, + DiffApply { from: 37, to: 38 }, + Handle { + site: 0, + target: 0, + container: 0, + action: Generic(GenericAction { + value: I32(0), + bool: false, + key: 0, + pos: 0, + length: 0, + prop: 0, + }), + }, + Checkout { site: 74, to: 194 }, + ], + ) +} + +#[test] +fn diff_calc_text_rollback_with_sliced_concurrent_insert_regression() { + test_multi_sites( + 5, + vec![FuzzTarget::Text], + &mut [ + Handle { + site: 0, + target: 17, + container: 0, + action: Generic(GenericAction { + value: I32(-184602613), + bool: false, + key: 1414812741, + pos: 6076574518398440532, + length: 464807907718091860, + prop: 12080808962634274303, + }), + }, + Sync { from: 0, to: 1 }, + Handle { + site: 3, + target: 190, + container: 238, + action: Generic(GenericAction { + value: Container(Tree), + bool: true, + key: 3187870631, + pos: 797321472558737319, + length: 12080808863958810115, + prop: 12080833445734360999, + }), + }, + DiffApply { from: 37, to: 38 }, + Handle { + site: 11, + target: 3, + container: 190, + action: Generic(GenericAction { + value: I32(0), + bool: false, + key: 0, + pos: 0, + length: 0, + prop: 0, + }), + }, + Checkout { site: 74, to: 194 }, + ], + ) +} + +#[test] +fn diff_calc_text_checkout_then_splice_position_regression() { + test_multi_sites( + 5, + vec![FuzzTarget::Text], + &mut [ + Handle { + site: 94, + target: 193, + container: 44, + action: Generic(GenericAction { + value: I32(-12845057), + bool: true, + key: 16777215, + pos: 7366546743523016704, + length: 14788478684628962674, + prop: 4268070197446523707, + }), + }, + Sync { from: 0, to: 1 }, + StateOnlyRoundTrip { site: 205 }, + DiffApply { from: 37, to: 38 }, + Sync { from: 88, to: 94 }, + Checkout { site: 74, to: 194 }, + Handle { + site: 59, + target: 205, + container: 205, + action: Generic(GenericAction { + value: I32(-1144177861), + bool: true, + key: 990039486, + pos: 15564440312192434177, + length: 13748851727568419771, + prop: 14788479554049213133, + }), + }, + ForkAt { site: 111, to: 291 }, + Handle { + site: 0, + target: 0, + container: 0, + action: Generic(GenericAction { + value: I32(0), + bool: false, + key: 0, + pos: 0, + length: 0, + prop: 0, + }), + }, + ImportShallow { + site: 148, + from: 149, + }, + ], + ) +} + +#[test] +fn diff_calc_text_update_tracker_regression() { + test_multi_sites( + 5, + vec![FuzzTarget::Text], + &mut [ + Handle { + site: 102, + target: 13, + container: 114, + action: Generic(GenericAction { + value: I32(-12573), + bool: true, + key: 3825205247, + pos: 17870283321406115343, + length: 11646817304741675007, + prop: 11659273617933336335, + }), + }, + Sync { from: 0, to: 1 }, + Handle { + site: 4, + target: 97, + container: 137, + action: Generic(GenericAction { + value: I32(3618560), + bool: false, + key: 4278190080, + pos: 11646768017753543804, + length: 12686045370614353259, + prop: 4991472726644998155, + }), + }, + DiffApply { from: 37, to: 38 }, + Sync { from: 85, to: 85 }, + Checkout { site: 74, to: 194 }, + Sync { from: 85, to: 85 }, + ForkAt { site: 111, to: 291 }, + DiffApply { from: 129, to: 129 }, + ImportShallow { + site: 148, + from: 149, + }, + DiffApply { from: 129, to: 129 }, + ExportShallow { site: 185 }, + SyncAllUndo { + site: 69, + op_len: 4294788595, + }, + StateOnlyRoundTrip { site: 222 }, + SetCommitOptions { + site: 213, + origin: 185, + msg: 185, + }, + Commit { site: 8 }, + ImportShallow { + site: 161, + from: 115, + }, + Sync { from: 45, to: 46 }, + Handle { + site: 0, + target: 0, + container: 0, + action: Generic(GenericAction { + value: I32(0), + bool: false, + key: 0, + pos: 0, + length: 0, + prop: 0, + }), + }, + DiffApply { from: 82, to: 83 }, + ], + ) +} + +#[test] +fn diff_calc_text_shallow_split_style_end_regression() { + test_multi_sites( + 5, + vec![FuzzTarget::Text], + &mut [ + SyncAll, + Sync { from: 0, to: 1 }, + Handle { + site: 1, + target: 1, + container: 1, + action: Generic(GenericAction { + value: I32(16843009), + bool: true, + key: 3520188881, + pos: 15119096123158024657, + length: 14106333700171092433, + prop: 14106333089244627907, + }), + }, + DiffApply { from: 37, to: 38 }, + DiffApply { from: 195, to: 1 }, + Checkout { site: 74, to: 194 }, + Handle { + site: 1, + target: 1, + container: 53, + action: Generic(GenericAction { + value: I32(16843009), + bool: true, + key: 19792129, + pos: 72340172838076673, + length: 3314931904544833793, + prop: 1953167796008487780, + }), + }, + ForkAt { site: 111, to: 291 }, + ImportShallow { + site: 195, + from: 195, + }, + ImportShallow { + site: 148, + from: 149, + }, + ForkAt { + site: 115, + to: 1936946035, + }, + ExportShallow { site: 185 }, + Checkout { + site: 65, + to: 16843009, + }, + StateOnlyRoundTrip { site: 222 }, + Handle { + site: 1, + target: 1, + container: 1, + action: Generic(GenericAction { + value: Container(Counter), + bool: true, + key: 16841217, + pos: 72340172838076673, + length: 72340172841484545, + prop: 72340172838076673, + }), + }, + Commit { site: 8 }, + Handle { + site: 0, + target: 0, + container: 0, + action: Generic(GenericAction { + value: I32(0), + bool: false, + key: 0, + pos: 0, + length: 0, + prop: 0, + }), + }, + Sync { from: 45, to: 46 }, + ], + ) +} + +#[test] +fn diff_calc_text_shallow_export_clamps_start_regression() { + test_multi_sites( + 5, + vec![FuzzTarget::Text], + &mut [ + SyncAll, + Sync { from: 0, to: 1 }, + Handle { + site: 0, + target: 129, + container: 95, + action: Generic(GenericAction { + value: Container(MovableList), + bool: true, + key: 2172748161, + pos: 9331882296112087425, + length: 9331882296112152961, + prop: 9331882296111890817, + }), + }, + DiffApply { from: 37, to: 38 }, + ImportShallow { + site: 199, + from: 199, + }, + Checkout { site: 74, to: 194 }, + Handle { + site: 129, + target: 129, + container: 3, + action: Generic(GenericAction { + value: Container(Unknown(255)), + bool: true, + key: 2172748287, + pos: 9548055078225674625, + length: 9331891092204913025, + prop: 9331882296111890821, + }), + }, + ForkAt { site: 111, to: 291 }, + DiffApply { from: 129, to: 255 }, + ImportShallow { + site: 148, + from: 149, + }, + SetCommitOptions { + site: 255, + origin: 251, + msg: 255, + }, + ExportShallow { site: 185 }, + ], + ) +} + +#[test] +fn diff_calc_text_tracker_after_diff_apply_regression() { + test_multi_sites( + 5, + vec![FuzzTarget::Text], + &mut [ + SyncAll, + Sync { from: 0, to: 1 }, + Handle { + site: 0, + target: 0, + container: 0, + action: Generic(GenericAction { + value: I32(1800732672), + bool: true, + key: 1802174471, + pos: 9325641373152144235, + length: 7242212069011980673, + prop: 6174271723265851391, + }), + }, + DiffApply { from: 37, to: 38 }, + Handle { + site: 141, + target: 3, + container: 97, + action: Generic(GenericAction { + value: Container(MovableList), + bool: true, + key: 4287718529, + pos: 4611685380279500799, + length: 288230373237456895, + prop: 128837181699, + }), + }, + Checkout { site: 74, to: 194 }, + Query { + site: 149, + target: 149, + query_type: 149, + }, + ForkAt { site: 111, to: 291 }, + Query { + site: 149, + target: 149, + query_type: 149, + }, + ImportShallow { + site: 148, + from: 149, + }, + Query { + site: 149, + target: 149, + query_type: 149, + }, + ExportShallow { site: 185 }, + DiffApply { from: 129, to: 129 }, + StateOnlyRoundTrip { site: 222 }, + DiffApply { from: 129, to: 129 }, + Commit { site: 8 }, + DiffApply { from: 129, to: 129 }, + Sync { from: 45, to: 46 }, + Handle { + site: 129, + target: 255, + container: 107, + action: Generic(GenericAction { + value: Container(Unknown(255)), + bool: true, + key: 1811939327, + pos: 9346575856848732149, + length: 10127809810591351169, + prop: 9331870154437528973, + }), + }, + DiffApply { from: 82, to: 83 }, + Sync { from: 209, to: 209 }, + Checkout { site: 119, to: 970 }, + SetCommitOptions { + site: 141, + origin: 141, + msg: 141, + }, + ForkAt { + site: 156, + to: 1067, + }, + DiffApply { from: 141, to: 177 }, + ImportShallow { + site: 193, + from: 194, + }, + ], + ) +} + +#[test] +fn shallow_snapshot_import_notifies_text_subscription() { + let source = LoroDoc::new(); + let text = source.get_text("text"); + text.insert(0, "[7242212069011980673]").unwrap(); + text.insert(text.len_unicode(), "[288230373237456895]") + .unwrap(); + text.insert(text.len_unicode(), "[10127809810591351169]") + .unwrap(); + source.commit(); + + let target = LoroDoc::new(); + let seen = Arc::new(Mutex::new(0usize)); + let seen_in_sub = seen.clone(); + target + .subscribe( + &ContainerID::new_root("text", Text), + Arc::new(move |event| { + *seen_in_sub.lock().unwrap() += event.events.len(); + }), + ) + .detach(); + let _ = target.get_text("text"); + + let bytes = source + .export(ExportMode::shallow_snapshot(&source.oplog_frontiers())) + .unwrap(); + target.import(&bytes).unwrap(); + + assert_eq!( + target.get_text("text").to_string(), + source.get_text("text").to_string() + ); + assert_ne!(*seen.lock().unwrap(), 0); +} + +#[test] +fn failed_shallow_snapshot_import_keeps_text_subscription_recording() { + let a = LoroDoc::new(); + a.set_peer_id(0).unwrap(); + a.get_text("text") + .insert(0, "[7242212069011980673]") + .unwrap(); + a.commit(); + + let b = LoroDoc::new(); + b.set_peer_id(1).unwrap(); + b.get_text("text") + .insert(0, "[288230373237456895]") + .unwrap(); + b.commit(); + + let c = LoroDoc::new(); + c.set_peer_id(4).unwrap(); + c.get_text("text") + .insert(0, "[10127809810591351169]") + .unwrap(); + c.commit(); + c.import(&a.export(ExportMode::all_updates()).unwrap()) + .unwrap(); + c.import(&b.export(ExportMode::all_updates()).unwrap()) + .unwrap(); + + let target = LoroDoc::new(); + let seen = Arc::new(Mutex::new(0usize)); + let seen_in_sub = seen.clone(); + target + .subscribe( + &ContainerID::new_root("text", Text), + Arc::new(move |event| { + *seen_in_sub.lock().unwrap() += event.events.len(); + }), + ) + .detach(); + let _ = target.get_text("text"); + + let bytes = c + .export(ExportMode::shallow_snapshot(&c.oplog_frontiers())) + .unwrap(); + assert!(matches!( + target.import(&bytes), + Err(LoroError::SwitchToVersionBeforeShallowRoot) + )); + + let json = c.export_json_updates(&Default::default(), &c.oplog_vv()); + target.import_json_updates(json).unwrap(); + assert_eq!( + target.get_text("text").to_string(), + c.get_text("text").to_string() + ); + assert_ne!(*seen.lock().unwrap(), 0); +} + +#[test] +fn diff_calc_text_state_only_after_undo_regression() { + test_multi_sites_on_one_doc_with_peer_seed( + 5, + 0x689d_de15_de3a_9ddc, + &mut [ + Handle { + site: 17, + target: 255, + container: 255, + action: Generic(GenericAction { + value: Container(List), + bool: true, + key: 1802201963, + pos: 7740329224441654123, + length: 7740398493674204011, + prop: 7740398493674204011, + }), + }, + Sync { from: 0, to: 1 }, + Handle { + site: 0, + target: 0, + container: 0, + action: Generic(GenericAction { + value: Container(Tree), + bool: true, + key: 4294914303, + pos: 7740561856774864895, + length: 2951253807997335659, + prop: 4297100540780611861, + }), + }, + DiffApply { from: 37, to: 38 }, + Handle { + site: 0, + target: 0, + container: 0, + action: Generic(GenericAction { + value: Container(Unknown(88)), + bool: false, + key: 1799444692, + pos: 7740398493674204011, + length: 7740398223091264363, + prop: 7740398493674138475, + }), + }, + Checkout { site: 74, to: 194 }, + SyncAll, + ForkAt { site: 111, to: 291 }, + SyncAll, + ImportShallow { + site: 148, + from: 149, + }, + Handle { + site: 107, + target: 3, + container: 174, + action: Generic(GenericAction { + value: Container(Unknown(255)), + bool: true, + key: 16777215, + pos: 280480043325875, + length: 7740561856774799360, + prop: 2951253807997335659, + }), + }, + ExportShallow { site: 185 }, + Handle { + site: 115, + target: 102, + container: 94, + action: Generic(GenericAction { + value: I32(31), + bool: false, + key: 3573547008, + pos: 4268213989382922846, + length: 5690226776453947, + prop: 4268070707552569852, + }), + }, + StateOnlyRoundTrip { site: 222 }, + SyncAllUndo { + site: 59, + op_len: 3443211067, + }, + Commit { site: 8 }, + StateOnlyRoundTrip { site: 195 }, + Sync { from: 45, to: 46 }, + Commit { site: 195 }, + DiffApply { from: 82, to: 83 }, + ], + ) +} + +#[test] +fn diff_calc_text_shallow_trimmed_deps_lca_regression() { + test_multi_sites( + 5, + vec![FuzzTarget::Text], + &mut [ + Handle { + site: 82, + target: 82, + container: 0, + action: Generic(GenericAction { + value: I32(131072), + bool: false, + key: 5373952, + pos: 6004234347672895488, + length: 11357130451341398481, + prop: 11644104115566411873, + }), + }, + Sync { from: 0, to: 1 }, + SyncAllUndo { + site: 37, + op_len: 185310621, + }, + DiffApply { from: 37, to: 38 }, + Sync { from: 36, to: 146 }, + Checkout { site: 74, to: 194 }, + Handle { + site: 0, + target: 0, + container: 0, + action: Generic(GenericAction { + value: Container(Unknown(255)), + bool: true, + key: 2113929471, + pos: 7033950412504562129, + length: 11646767317412478976, + prop: 7885201674638920353, + }), + }, + ForkAt { site: 111, to: 291 }, + SyncAll, + ImportShallow { + site: 148, + from: 149, + }, + SyncAll, + ExportShallow { site: 185 }, + Handle { + site: 0, + target: 109, + container: 147, + action: Generic(GenericAction { + value: I32(755031808), + bool: true, + key: 1162167621, + pos: 7740398495797169455, + length: 1157443155982815248, + prop: 17666513099442622480, + }), + }, + StateOnlyRoundTrip { site: 222 }, + Handle { + site: 16, + target: 16, + container: 16, + action: Generic(GenericAction { + value: I32(269488144), + bool: false, + key: 4113305616, + pos: 7740398493674204131, + length: 2913105066323845023, + prop: 18446744073709551519, + }), + }, + Commit { site: 8 }, + SetCommitOptions { + site: 255, + origin: 255, + msg: 255, + }, + Sync { from: 45, to: 46 }, + SetCommitOptions { + site: 255, + origin: 255, + msg: 255, + }, + DiffApply { from: 82, to: 83 }, + SetCommitOptions { + site: 255, + origin: 255, + msg: 255, + }, + Checkout { site: 119, to: 970 }, + SetCommitOptions { + site: 255, + origin: 255, + msg: 255, + }, + ForkAt { + site: 156, + to: 1067, + }, + SetCommitOptions { + site: 255, + origin: 255, + msg: 255, + }, + ImportShallow { + site: 193, + from: 194, + }, + SetCommitOptions { + site: 109, + origin: 109, + msg: 109, + }, + ExportShallow { site: 230 }, + SetCommitOptions { + site: 147, + origin: 0, + msg: 0, + }, + StateOnlyRoundTrip { site: 16 }, + ], + ) +} + +#[test] +fn diff_calc_text_shallow_seed_split_unknown_regression() { + test_multi_sites( + 5, + vec![FuzzTarget::Text], + &mut [ + SyncAll, + Sync { from: 0, to: 1 }, + Handle { + site: 0, + target: 129, + container: 95, + action: Generic(GenericAction { + value: Container(MovableList), + bool: true, + key: 2172748161, + pos: 9331882296112087425, + length: 9331882296112152961, + prop: 9331882296111890817, + }), + }, + DiffApply { from: 37, to: 38 }, + ImportShallow { + site: 199, + from: 199, + }, + Checkout { site: 74, to: 194 }, + Handle { + site: 129, + target: 129, + container: 3, + action: Generic(GenericAction { + value: Container(Unknown(255)), + bool: true, + key: 2172748287, + pos: 9548055078225674625, + length: 9620112672263602561, + prop: 9331882296111890817, + }), + }, + ForkAt { site: 111, to: 291 }, + DiffApply { from: 129, to: 255 }, + ImportShallow { + site: 148, + from: 149, + }, + SetCommitOptions { + site: 255, + origin: 251, + msg: 255, + }, + ExportShallow { site: 185 }, + DiffApply { from: 3, to: 255 }, + StateOnlyRoundTrip { site: 222 }, + SetCommitOptions { + site: 255, + origin: 250, + msg: 255, + }, + Commit { site: 8 }, + SyncAllUndo { + site: 56, + op_len: 2172748231, + }, + Sync { from: 45, to: 46 }, + DiffApply { from: 129, to: 129 }, + DiffApply { from: 82, to: 83 }, + Handle { + site: 0, + target: 129, + container: 129, + action: Generic(GenericAction { + value: Container(Counter), + bool: true, + key: 455, + pos: 18446743390868570368, + length: 4607, + prop: 9367487224930631424, + }), + }, + Checkout { site: 119, to: 970 }, + SyncAll, + ForkAt { + site: 156, + to: 1067, + }, + Handle { + site: 0, + target: 0, + container: 0, + action: Generic(GenericAction { + value: I32(0), + bool: false, + key: 0, + pos: 0, + length: 0, + prop: 0, + }), + }, + ImportShallow { + site: 193, + from: 194, + }, + ], + ) +} + +#[test] +fn diff_calc_text_shallow_insert_origin_left_regression() { + test_multi_sites( + 5, + vec![FuzzTarget::Text], + &mut [ + Handle { + site: 118, + target: 118, + container: 11, + action: Generic(GenericAction { + value: Container(List), + bool: false, + key: 4294967141, + pos: 6148914691236517338, + length: 21845, + prop: 15367429818727531264, + }), + }, + Sync { from: 0, to: 1 }, + ImportShallow { site: 0, from: 193 }, + DiffApply { from: 37, to: 38 }, + ForkAt { + site: 11, + to: 4294967236, + }, + Checkout { site: 74, to: 194 }, + Handle { + site: 0, + target: 42, + container: 255, + action: Generic(GenericAction { + value: Container(Unknown(60)), + bool: false, + key: 0, + pos: 14178739001845810944, + length: 3978712807997649348, + prop: 3978709506094217015, + }), + }, + ForkAt { site: 111, to: 291 }, + SyncAllUndo { + site: 55, + op_len: 926365495, + }, + ImportShallow { + site: 148, + from: 149, + }, + SyncAllUndo { + site: 55, + op_len: 4294967095, + }, + ExportShallow { site: 185 }, + ForkAt { + site: 0, + to: 4294901760, + }, + StateOnlyRoundTrip { site: 222 }, + Handle { + site: 170, + target: 170, + container: 153, + action: Generic(GenericAction { + value: Container(Map), + bool: true, + key: 704643073, + pos: 18446744073709551615, + length: 15615, + prop: 14178674130659835655, + }), + }, + Commit { site: 8 }, + SyncAllUndo { + site: 55, + op_len: 926365495, + }, + Sync { from: 45, to: 46 }, + SetCommitOptions { + site: 255, + origin: 255, + msg: 255, + }, + DiffApply { from: 82, to: 83 }, + Undo { + site: 48, + op_len: 3293926656, + }, + Checkout { site: 119, to: 970 }, + Handle { + site: 0, + target: 107, + container: 107, + action: Generic(GenericAction { + value: I32(286331153), + bool: false, + key: 1154864597, + pos: 7297027664991289668, + length: 255, + prop: 0, + }), + }, + ForkAt { + site: 156, + to: 1067, + }, + ], + ) +} + +#[test] +fn diff_calc_text_shallow_diff_apply_duplicate_regression() { + test_multi_sites( + 5, + vec![FuzzTarget::Text], + &mut [ + Handle { + site: 36, + target: 0, + container: 176, + action: Generic(GenericAction { + value: Container(Tree), + bool: false, + key: 2728567458, + pos: 756277224098, + length: 19232863551488, + prop: 1513249057215086592, + }), + }, + Sync { from: 0, to: 1 }, + Handle { + site: 0, + target: 0, + container: 119, + action: Generic(GenericAction { + value: I32(65280), + bool: false, + key: 1431661312, + pos: 13933808665473959108, + length: 10489283573281046977, + prop: 10489325060224199569, + }), + }, + DiffApply { from: 37, to: 38 }, + Commit { site: 145 }, + Checkout { site: 74, to: 194 }, + DiffApply { from: 145, to: 145 }, + ForkAt { site: 111, to: 291 }, + DiffApply { from: 145, to: 145 }, + ImportShallow { + site: 148, + from: 149, + }, + Commit { site: 145 }, + ExportShallow { site: 185 }, + DiffApply { from: 145, to: 145 }, + StateOnlyRoundTrip { site: 222 }, + SyncAllUndo { + site: 145, + op_len: 2442236305, + }, + Commit { site: 8 }, + DiffApply { from: 145, to: 145 }, + Sync { from: 45, to: 46 }, + DiffApply { from: 145, to: 107 }, + DiffApply { from: 82, to: 83 }, + Handle { + site: 0, + target: 0, + container: 0, + action: Generic(GenericAction { + value: I32(0), + bool: false, + key: 0, + pos: 0, + length: 0, + prop: 7740439813752356864, + }), + }, + Checkout { site: 119, to: 970 }, + Handle { + site: 255, + target: 33, + container: 41, + action: Generic(GenericAction { + value: Container(Counter), + bool: true, + key: 730960273, + pos: 7462906111928124484, + length: 18446743418314385255, + prop: 7451037802321897471, + }), + }, + ForkAt { + site: 156, + to: 1067, + }, + SyncAll, + ImportShallow { + site: 193, + from: 194, + }, + SyncAll, + ExportShallow { site: 230 }, + SyncAll, + StateOnlyRoundTrip { site: 16 }, + SyncAll, + Commit { site: 53 }, + SyncAll, + Sync { from: 90, to: 91 }, + SyncAll, + DiffApply { from: 127, to: 128 }, + SyncAll, + Checkout { + site: 164, + to: 1746, + }, + SyncAll, + ForkAt { + site: 201, + to: 1843, + }, + Handle { + site: 103, + target: 103, + container: 103, + action: Generic(GenericAction { + value: I32(1734829927), + bool: true, + key: 1734829927, + pos: 7451037800654241791, + length: 4294967143, + prop: 18403682804614299647, + }), + }, + ImportShallow { + site: 238, + from: 239, + }, + Handle { + site: 0, + target: 0, + container: 0, + action: Generic(GenericAction { + value: I32(0), + bool: false, + key: 0, + pos: 0, + length: 0, + prop: 0, + }), + }, + ExportShallow { site: 24 }, + ], + ) +} + #[test] fn fast_snapshot_0() { test_multi_sites( diff --git a/crates/loro-internal/src/container/richtext/tracker.rs b/crates/loro-internal/src/container/richtext/tracker.rs index 70cab4f18..53e3fe89a 100644 --- a/crates/loro-internal/src/container/richtext/tracker.rs +++ b/crates/loro-internal/src/container/richtext/tracker.rs @@ -63,6 +63,10 @@ impl Tracker { #[allow(unused)] fn new() -> Self { + Self::new_empty() + } + + pub(crate) fn new_empty() -> Self { Self { rope: CrdtRope::new(), id_to_cursor: IdToCursor::default(), @@ -110,6 +114,20 @@ impl Tracker { } fn _insert(&mut self, pos: usize, content: RichtextChunk, op_id: IdFull) { + self._insert_inner(pos, content, op_id, true); + } + + pub(crate) fn insert_seeded(&mut self, pos: usize, content: RichtextChunk, op_id: IdFull) { + self._insert_inner(pos, content, op_id, false); + } + + fn _insert_inner( + &mut self, + pos: usize, + content: RichtextChunk, + op_id: IdFull, + update_vv: bool, + ) { let result = self.rope.insert( pos, FugueSpan { @@ -134,9 +152,11 @@ impl Tracker { self.update_insert_by_split(&result.splitted.arr); - let end_id = op_id.inc(content.len() as Counter); - self.current_vv.extend_to_include_end_id(end_id.id()); - self.applied_vv.extend_to_include_end_id(end_id.id()); + if update_vv { + let end_id = op_id.inc(content.len() as Counter); + self.current_vv.extend_to_include_end_id(end_id.id()); + self.applied_vv.extend_to_include_end_id(end_id.id()); + } } fn update_insert_by_split(&mut self, split: &[LeafIndex]) { @@ -632,6 +652,61 @@ impl Tracker { self.rope.get_diff() } + + pub(crate) fn len(&self) -> usize { + self.rope.len() + } + + pub(crate) fn active_real_span_at(&self, pos: usize) -> Option<(ID, usize)> { + self.rope.active_real_span_at(pos) + } + + pub(crate) fn active_segments_of_real_id_span( + &self, + target_start: ID, + len: usize, + ) -> Vec<(ID, usize, usize)> { + let target_end = target_start.counter + len as Counter; + let mut index = 0; + let mut ans = Vec::new(); + + for span in self.rope.tree().iter() { + if !span.is_activated() { + continue; + } + + let span_len = span.rle_len(); + let real_id = span.real_id(); + if real_id.peer == UNKNOWN_PEER_ID { + index += span_len; + continue; + } + + if real_id.peer == target_start.peer { + let span_end = real_id.counter + span_len as Counter; + let overlap_start = real_id.counter.max(target_start.counter); + let overlap_end = span_end.min(target_end); + if overlap_start < overlap_end { + let offset = (overlap_start - real_id.counter) as usize; + let len = (overlap_end - overlap_start) as usize; + ans.push(( + ID::new(target_start.peer, overlap_start), + index + offset, + len, + )); + } + } + + index += span_len; + } + + ans + } + + pub(crate) fn mark_shallow_root_applied(&mut self, vv: &VersionVector) { + self.applied_vv = vv.clone(); + self.current_vv = vv.clone(); + } } #[cfg(test)] diff --git a/crates/loro-internal/src/container/richtext/tracker/crdt_rope.rs b/crates/loro-internal/src/container/richtext/tracker/crdt_rope.rs index 6c3c901c5..3e3576c5f 100644 --- a/crates/loro-internal/src/container/richtext/tracker/crdt_rope.rs +++ b/crates/loro-internal/src/container/richtext/tracker/crdt_rope.rs @@ -40,6 +40,26 @@ impl CrdtRope { &self.tree } + pub(super) fn active_real_span_at(&self, pos: usize) -> Option<(ID, usize)> { + if pos >= self.len() { + return None; + } + + let start = self + .tree + .query::(&(pos as i32))?; + let cursor = start.cursor; + let elem = self.tree.get_elem(cursor.leaf)?; + if !elem.is_activated() || cursor.offset >= elem.rle_len() { + return None; + } + + Some(( + elem.real_id().inc(cursor.offset as Counter), + elem.rle_len() - cursor.offset, + )) + } + pub(super) fn insert( &mut self, pos: usize, diff --git a/crates/loro-internal/src/dag.rs b/crates/loro-internal/src/dag.rs index fc7ee9c97..516108acf 100644 --- a/crates/loro-internal/src/dag.rs +++ b/crates/loro-internal/src/dag.rs @@ -504,7 +504,10 @@ where let mut node = get(node_id).unwrap(); while node.deps().len() == 1 { node_id = node.deps().as_single().unwrap(); - node = get(node_id).unwrap(); + let Some(next) = get(node_id) else { + return (Default::default(), DiffMode::ImportGreaterUpdates); + }; + node = next; } if node.deps().is_empty() { @@ -545,6 +548,7 @@ where let mut is_linear = left.len() <= 1 && right.len() == 1; let mut is_right_greater = true; + let mut has_unmatched_branch = false; let mut ans: Frontiers = Default::default(); let mut queue: BinaryHeap<(SmallVec<[OrdIdSpan; 1]>, NodeType)> = BinaryHeap::new(); @@ -568,8 +572,121 @@ where Some(ans) } - queue.push((ids_to_ord_id_spans(left, get).unwrap(), NodeType::A)); - queue.push((ids_to_ord_id_spans(right, get).unwrap(), NodeType::B)); + fn push_ord_id_spans<'a>( + queue: &mut BinaryHeap<(SmallVec<[OrdIdSpan<'a>; 1]>, NodeType)>, + spans: SmallVec<[OrdIdSpan<'a>; 1]>, + node_type: NodeType, + ) { + for span in spans { + queue.push((smallvec::smallvec![span], node_type)); + } + } + + fn deps_to_ord_id_spans<'a, D: DagNode + 'a, F: Fn(ID) -> Option>( + node: &OrdIdSpan<'a>, + get: &'a F, + ) -> Option; 1]>> { + let mut deps = ids_to_ord_id_spans(node.deps.as_ref(), get)?; + if node.id.counter > 0 { + let prev = node.id.inc(-1); + if let Some(prev) = OrdIdSpan::from_dag_node(prev, get) { + if !deps.iter().any(|dep| dep.contains_id(prev.id_last())) { + deps.push(prev); + } + } + } + + if deps.len() > 1 { + deps.sort_unstable_by(|a, b| b.cmp(a)); + } + + Some(deps) + } + + fn shrink_ancestor_frontiers<'a, D: DagNode + 'a, F: Fn(ID) -> Option>( + ids: &Frontiers, + get: &'a F, + ) -> Frontiers { + if ids.len() <= 1 { + return ids.clone(); + } + + let mut ids = ids_to_ord_id_spans(ids, get).expect("common ancestors should be in dag"); + ids.sort_unstable(); + let mut frontiers = Vec::with_capacity(ids.len()); + for id in ids.iter().rev() { + let mut should_insert = true; + for frontier in frontiers.iter().rev() { + if contains_in_ancestors(get, *frontier, id) { + should_insert = false; + break; + } + } + + if should_insert { + frontiers.push(id.id_last()); + } + } + + frontiers.into_iter().collect() + } + + fn has_trimmed_history_deps<'a, D: DagNode + 'a, F: Fn(ID) -> Option>( + ids: &Frontiers, + get: &'a F, + ) -> bool { + ids.iter().any(|id| { + let Some(node) = OrdIdSpan::from_dag_node(id, get) else { + return true; + }; + ids_to_ord_id_spans(node.deps.as_ref(), get).is_none() + }) + } + + fn contains_in_ancestors<'a, D: DagNode + 'a, F: Fn(ID) -> Option>( + get: &'a F, + frontier: ID, + target: &OrdIdSpan<'_>, + ) -> bool { + let mut visited = FxHashSet::default(); + let mut pending = BinaryHeap::new(); + let Some(node) = OrdIdSpan::from_dag_node(frontier, get) else { + return false; + }; + pending.push(node); + while let Some(node) = pending.pop() { + if node.contains_id(target.id_last()) { + return true; + } + + if node.lamport_last() < target.lamport_last() { + break; + } + + if !visited.insert(node.id_start()) { + continue; + } + + if let Some(deps) = deps_to_ord_id_spans(&node, get) { + for dep in deps { + pending.push(dep); + } + } + } + + false + } + + push_ord_id_spans( + &mut queue, + ids_to_ord_id_spans(left, get).unwrap(), + NodeType::A, + ); + push_ord_id_spans( + &mut queue, + ids_to_ord_id_spans(right, get).unwrap(), + NodeType::B, + ); while let Some((mut node, mut node_type)) = queue.pop() { while let Some((other_node, other_type)) = queue.peek() { if node == *other_node @@ -587,13 +704,15 @@ where } } - if queue.is_empty() { - if node_type == NodeType::Shared { - ans = node.into_iter().map(|x| x.id_last()).collect(); + if node_type == NodeType::Shared { + for id in node.iter().map(|x| x.id_last()) { + ans.push(id); } + continue; + } - // Iteration is done and no common ancestor is found - // So the ans is empty + if queue.is_empty() { + has_unmatched_branch = true; is_right_greater = false; break; } @@ -620,34 +739,48 @@ where } if node[0].len > 1 { - if other.0[0].lamport_last() > node[0].lamport { - node[0].len = (other.0[0].lamport_last() - node[0].lamport) - .min(node[0].len as u32 - 1) as usize; - queue.push((node, node_type)); - continue; + node[0].len = if other.0[0].lamport_last() >= node[0].lamport { + (other.0[0].lamport_last() - node[0].lamport + 1).min(node[0].len as u32 - 1) + as usize } else { - node[0].len = 1; - queue.push((node, node_type)); - continue; - } + 1 + }; + queue.push((node, node_type)); + continue; } } - if !node[0].deps.is_empty() { - if let Some(deps) = ids_to_ord_id_spans(node[0].deps.as_ref(), get) { - queue.push((deps, node_type)); - } else { - // dep on trimmed history - panic!("deps on trimmed history"); + if let Some(deps) = deps_to_ord_id_spans(&node[0], get) { + if !deps.is_empty() { + push_ord_id_spans(&mut queue, deps, node_type); + is_linear = false; + continue; } - - is_linear = false; } else { + // The dependency is on trimmed shallow history. The exact ancestor is + // not representable in the current DAG, so fall back to a conservative + // checkout base. + has_unmatched_branch = true; is_right_greater = false; - break; + continue; + } + + { + // Some checkout calculators still require replaying from a base that + // includes every branch whose operation positions may affect the diff. + // In non-linear checkout mode, an earlier common ancestor is a valid + // conservative base even when it is not the mathematical LCA. + has_unmatched_branch = true; + is_right_greater = false; + continue; } } + ans = shrink_ancestor_frontiers(&ans, get); + if has_unmatched_branch && !has_trimmed_history_deps(&ans, get) { + ans = Default::default(); + } + let mode = if is_right_greater { if ans.len() <= 1 { debug_assert_eq!(&ans, left); @@ -681,6 +814,7 @@ mod tests { use std::collections::BTreeMap; use loro_common::{HasId, HasIdSpan}; + use rand::{rngs::StdRng, Rng, SeedableRng}; use super::*; @@ -762,8 +896,8 @@ mod tests { fn get(&self, id: ID) -> Option { self.nodes .range(..=id) - .next_back() - .filter(|(_, node)| node.contains_id(id)) + .rev() + .find(|(_, node)| node.contains_id(id)) .map(|(_, node)| node.clone()) } @@ -795,6 +929,216 @@ mod tests { } } + fn ancestors_of_frontiers(dag: &TestDag, frontiers: &Frontiers) -> FxHashSet { + let mut ans = FxHashSet::default(); + for id in frontiers.iter() { + collect_ancestors(dag, id, &mut ans); + } + ans + } + + fn collect_ancestors(dag: &TestDag, id: ID, ans: &mut FxHashSet) { + let mut stack = vec![id]; + let mut visited_targets = FxHashSet::default(); + while let Some(id) = stack.pop() { + if !visited_targets.insert(id) { + continue; + } + + for node in dag.nodes.values() { + if node.id_start().peer != id.peer || node.id_start().counter > id.counter { + continue; + } + + let end = node.id_last().counter.min(id.counter); + for counter in node.id_start().counter..=end { + ans.insert(ID::new(node.id_start().peer, counter)); + } + + for dep in node.deps().iter() { + stack.push(dep); + } + } + } + } + + fn is_ancestor(dag: &TestDag, ancestor: ID, descendant: ID) -> bool { + let mut ancestors = FxHashSet::default(); + collect_ancestors(dag, descendant, &mut ancestors); + ancestors.contains(&ancestor) + } + + fn maximal_frontiers(dag: &TestDag, ids: impl IntoIterator) -> Frontiers { + let mut ids = Vec::::from_iter(ids); + ids.sort_by_key(|id| dag.get(*id).unwrap().get_lamport_from_counter(id.counter)); + let mut frontiers = Vec::new(); + for id in ids.into_iter().rev() { + if frontiers + .iter() + .any(|frontier| is_ancestor(dag, id, *frontier)) + { + continue; + } + + frontiers.retain(|frontier| !is_ancestor(dag, *frontier, id)); + frontiers.push(id); + } + + frontiers.into_iter().collect() + } + + fn oracle_common_ancestor(dag: &TestDag, left: &Frontiers, right: &Frontiers) -> Frontiers { + let left_ancestors = ancestors_of_frontiers(dag, left); + let right_ancestors = ancestors_of_frontiers(dag, right); + maximal_frontiers( + dag, + left_ancestors + .into_iter() + .filter(|id| right_ancestors.contains(id)), + ) + } + + fn assert_common_ancestor_valid_against_oracle( + dag: &TestDag, + left: &Frontiers, + right: &Frontiers, + ) { + let (actual, mode) = dag.find_common_ancestor(left, right); + let expected = oracle_common_ancestor(dag, left, right); + let left_ancestors = ancestors_of_frontiers(dag, left); + let right_ancestors = ancestors_of_frontiers(dag, right); + for id in actual.iter() { + assert!( + left_ancestors.contains(&id) && right_ancestors.contains(&id), + "actual LCA id {id} must be common: left={left:?} right={right:?} actual={actual:?} expected={expected:?} mode={mode:?}\ndag={dag:?}", + ); + } + + for a in actual.iter() { + for b in actual.iter() { + if a != b { + assert!( + !is_ancestor(dag, a, b), + "actual LCA must be a minimal frontier set: left={left:?} right={right:?} actual={actual:?} expected={expected:?} mode={mode:?}\ndag={dag:?}", + ); + } + } + } + + if !matches!(mode, DiffMode::Checkout) { + assert_eq!( + actual, expected, + "non-checkout mode should use the maximal common ancestor: left={left:?} right={right:?} mode={mode:?}\ndag={dag:?}", + ); + assert_eq!( + &actual, left, + "non-checkout mode must use left as LCA: left={left:?} right={right:?} mode={mode:?}" + ); + for id in left.iter() { + assert!( + right_ancestors.contains(&id), + "non-checkout mode requires right to include left id {id}; left={left:?} right={right:?} mode={mode:?}", + ); + } + } + } + + fn all_ids(dag: &TestDag) -> Vec { + dag.nodes + .values() + .flat_map(|node| { + (0..node.content_len()).map(|offset| node.id_start().inc(offset as Counter)) + }) + .collect() + } + + fn random_frontiers(dag: &TestDag, ids: &[ID], rng: &mut impl Rng) -> Frontiers { + if ids.is_empty() || rng.gen_bool(0.1) { + return Frontiers::default(); + } + + let len = rng.gen_range(1..=ids.len().min(4)); + maximal_frontiers(dag, (0..len).map(|_| ids[rng.gen_range(0..ids.len())])) + } + + fn random_dag(seed: u64, node_count: usize) -> TestDag { + let mut rng = StdRng::seed_from_u64(seed); + let mut next_counter = [0 as Counter; 8]; + let mut nodes = Vec::new(); + let mut existing_ids = Vec::new(); + for i in 0..node_count { + let peer = rng.gen_range(1..next_counter.len() as PeerID); + let counter = next_counter[peer as usize]; + let len = rng.gen_range(1..=3); + next_counter[peer as usize] += len as Counter; + let dep_count = if existing_ids.is_empty() { + 0 + } else { + rng.gen_range(0..=existing_ids.len().min(3)) + }; + let deps = maximal_frontiers( + &TestDag::new(nodes.clone(), Frontiers::default()), + (0..dep_count).map(|_| existing_ids[rng.gen_range(0..existing_ids.len())]), + ); + let node = node(peer, counter, len, i as Lamport * 4, deps); + existing_ids.extend((0..len).map(|offset| node.id_start().inc(offset as Counter))); + nodes.push(node); + } + + let dag = TestDag::new(nodes, Frontiers::default()); + let frontier = maximal_frontiers(&dag, existing_ids); + TestDag::new(dag.nodes.into_values(), frontier) + } + + fn layered_merge_dag() -> TestDag { + let root = node(1, 0, 3, 0, Frontiers::default()); + let left = node(1, 3, 2, 4, ID::new(1, 2).into()); + let right = node(2, 0, 3, 5, ID::new(1, 1).into()); + let late_right = node(2, 3, 2, 9, ID::new(2, 2).into()); + let third = node(3, 0, 2, 6, ID::new(1, 2).into()); + let merge_left_right = node( + 4, + 0, + 1, + 12, + Frontiers::from([left.id_last(), right.id_last()]), + ); + let merge_all = node( + 5, + 0, + 2, + 16, + Frontiers::from([ + merge_left_right.id_last(), + late_right.id_last(), + third.id_last(), + ]), + ); + let independent = node(6, 0, 2, 20, Frontiers::default()); + let final_merge = node( + 7, + 0, + 1, + 25, + Frontiers::from([merge_all.id_last(), independent.id_last()]), + ); + + TestDag::new( + vec![ + root, + left, + right, + late_right, + third, + merge_left_right, + merge_all, + independent, + final_merge.clone(), + ], + final_merge.id_last().into(), + ) + } + #[test] fn common_ancestor_handles_empty_linear_same_span_and_parent_child_cases() { let first = node(1, 0, 2, 0, Frontiers::default()); @@ -823,6 +1167,17 @@ mod tests { ); } + #[test] + fn common_ancestor_left_empty_stops_linear_scan_at_missing_shallow_dependency() { + let visible = node(1, 1, 1, 1, ID::new(1, 0).into()); + let dag = TestDag::new(vec![visible], ID::new(1, 1).into()); + + assert_eq!( + dag.find_common_ancestor(&Frontiers::default(), &ID::new(1, 1).into()), + (Frontiers::default(), DiffMode::ImportGreaterUpdates) + ); + } + #[test] fn common_ancestor_of_parallel_branches_is_shared_dependency() { let root = node(1, 0, 1, 0, Frontiers::default()); @@ -842,6 +1197,181 @@ mod tests { assert_eq!(ancestor, root.id.into()); } + #[test] + fn common_ancestor_falls_back_before_independent_branch() { + let left = node(1, 0, 1, 0, Frontiers::default()); + let independent = node(2, 0, 1, 1, Frontiers::default()); + let merge = node(3, 0, 1, 2, Frontiers::from([left.id, independent.id])); + let dag = TestDag::new( + vec![left.clone(), independent, merge.clone()], + merge.id.into(), + ); + + let (ancestor, mode) = dag.find_common_ancestor(&left.id.into(), &merge.id.into()); + assert_eq!(ancestor, Frontiers::default()); + assert_eq!(mode, DiffMode::Checkout); + } + + #[test] + fn common_ancestor_falls_back_before_unmatched_branch_with_multiple_left_frontiers() { + let left_a = node(1, 0, 1, 0, Frontiers::default()); + let left_b = node(2, 0, 1, 1, Frontiers::default()); + let independent = node(3, 0, 1, 2, Frontiers::default()); + let left_frontiers = Frontiers::from([left_a.id, left_b.id]); + let merge = node( + 4, + 0, + 1, + 3, + Frontiers::from([left_a.id, left_b.id, independent.id]), + ); + let dag = TestDag::new( + vec![left_a.clone(), left_b.clone(), independent, merge.clone()], + merge.id.into(), + ); + + let (ancestor, mode) = dag.find_common_ancestor(&left_frontiers, &merge.id.into()); + assert_eq!(ancestor, Frontiers::default()); + assert_eq!(mode, DiffMode::Checkout); + } + + #[test] + fn common_ancestor_marks_cross_peer_direct_dependency_as_greater_update() { + let left = node(1, 0, 1, 0, Frontiers::default()); + let right = node(2, 0, 1, 1, left.id.into()); + let dag = TestDag::new(vec![left.clone(), right.clone()], right.id.into()); + + let (ancestor, mode) = dag.find_common_ancestor(&left.id.into(), &right.id.into()); + assert_eq!(ancestor, left.id.into()); + assert_eq!(mode, DiffMode::ImportGreaterUpdates); + } + + #[test] + fn common_ancestor_falls_back_when_right_adds_concurrent_branch_from_shared_root() { + let root = node(1, 0, 1, 0, Frontiers::default()); + let left = node(2, 0, 1, 1, root.id.into()); + let concurrent = node(3, 0, 1, 2, root.id.into()); + let merge = node(4, 0, 1, 3, Frontiers::from([left.id, concurrent.id])); + let dag = TestDag::new( + vec![root, left.clone(), concurrent, merge.clone()], + merge.id.into(), + ); + + let (ancestor, mode) = dag.find_common_ancestor(&left.id.into(), &merge.id.into()); + assert_eq!(ancestor, Frontiers::default()); + assert_eq!(mode, DiffMode::Checkout); + } + + #[test] + fn common_ancestor_keeps_target_when_checking_out_to_ancestor_with_extra_branch() { + let root = node(1, 0, 1, 0, Frontiers::default()); + let left = node(2, 0, 2, 1, root.id.into()); + let right = node(3, 0, 2, 3, root.id.into()); + let extra = node( + 4, + 0, + 1, + 5, + Frontiers::from([left.id_last(), right.id_last()]), + ); + let dag = TestDag::new( + vec![root, left.clone(), right.clone(), extra.clone()], + extra.id.into(), + ); + let target = Frontiers::from([ID::new(2, 0), ID::new(3, 0)]); + let current = Frontiers::from([extra.id, left.id_last(), right.id_last()]); + + let (ancestor, mode) = dag.find_common_ancestor(¤t, &target); + assert_eq!(ancestor, target); + assert_eq!(mode, DiffMode::Checkout); + } + + #[test] + fn common_ancestor_does_not_keep_ancestor_of_shared_descendant() { + let root = node(1, 0, 1, 0, Frontiers::default()); + let shared = node(2, 0, 1, 1, root.id.into()); + let left_only = node(3, 0, 1, 2, root.id.into()); + let right_only = node(4, 0, 1, 3, root.id.into()); + let dag = TestDag::new( + vec![root, shared.clone(), left_only.clone(), right_only.clone()], + Frontiers::from([shared.id, left_only.id, right_only.id]), + ); + + let (ancestor, mode) = dag.find_common_ancestor( + &Frontiers::from([shared.id, left_only.id]), + &Frontiers::from([shared.id, right_only.id]), + ); + assert_eq!(ancestor, shared.id.into()); + assert_eq!(mode, DiffMode::Checkout); + } + + #[test] + fn common_ancestor_valid_against_slow_oracle_on_random_dags() { + for seed in 0..128 { + let mut rng = StdRng::seed_from_u64(seed); + let dag = random_dag(seed, rng.gen_range(1..=18)); + let ids = all_ids(&dag); + for _ in 0..64 { + let left = random_frontiers(&dag, &ids, &mut rng); + let right = random_frontiers(&dag, &ids, &mut rng); + assert_common_ancestor_valid_against_oracle(&dag, &left, &right); + } + } + } + + #[test] + fn common_ancestor_valid_against_slow_oracle_on_all_pairs_in_small_random_dags() { + for seed in 1000..1020 { + let dag = random_dag(seed, 8); + let ids = all_ids(&dag); + let mut frontiers = vec![Frontiers::default()]; + frontiers.extend(ids.iter().copied().map(Frontiers::from)); + for chunk in ids.chunks(3) { + frontiers.push(maximal_frontiers(&dag, chunk.iter().copied())); + } + + for left in frontiers.iter() { + for right in frontiers.iter() { + assert_common_ancestor_valid_against_oracle(&dag, left, right); + } + } + } + } + + #[test] + fn common_ancestor_valid_against_slow_oracle_on_layered_merge_dag() { + let dag = layered_merge_dag(); + let ids = all_ids(&dag); + let mut frontiers = vec![Frontiers::default(), dag.frontier().clone()]; + frontiers.extend(ids.iter().copied().map(Frontiers::from)); + frontiers.extend([ + Frontiers::from([ID::new(1, 4), ID::new(2, 2)]), + Frontiers::from([ID::new(1, 4), ID::new(3, 1)]), + Frontiers::from([ID::new(4, 0), ID::new(2, 4), ID::new(3, 1)]), + Frontiers::from([ID::new(5, 1), ID::new(6, 1)]), + ]); + + for left in frontiers.iter() { + for right in frontiers.iter() { + assert_common_ancestor_valid_against_oracle(&dag, left, right); + } + } + } + + #[test] + fn common_ancestor_valid_against_slow_oracle_on_branchy_random_dags() { + for seed in 2000..2064 { + let mut rng = StdRng::seed_from_u64(seed); + let dag = random_dag(seed, rng.gen_range(20..=36)); + let ids = all_ids(&dag); + for _ in 0..96 { + let left = random_frontiers(&dag, &ids, &mut rng); + let right = random_frontiers(&dag, &ids, &mut rng); + assert_common_ancestor_valid_against_oracle(&dag, &left, &right); + } + } + } + #[test] fn find_path_reports_forward_and_retreat_spans_for_linear_and_branch_paths() { let first = node(1, 0, 2, 0, Frontiers::default()); diff --git a/crates/loro-internal/src/diff_calc.rs b/crates/loro-internal/src/diff_calc.rs index 71bb4c2c5..1d8e7efff 100644 --- a/crates/loro-internal/src/diff_calc.rs +++ b/crates/loro-internal/src/diff_calc.rs @@ -7,7 +7,7 @@ pub(crate) use counter::CounterDiffCalculator; pub(super) mod tree; mod unknown; use either::Either; -use generic_btree::rle::HasLength as _; +use generic_btree::rle::{HasLength as _, Sliceable as _}; use itertools::Itertools; use enum_dispatch::enum_dispatch; @@ -108,6 +108,7 @@ pub(crate) struct DiffCalcVersionInfo<'a> { to_vv: &'a VersionVector, from_frontiers: &'a Frontiers, to_frontiers: &'a Frontiers, + lca_vv: &'a VersionVector, } impl DiffCalculator { @@ -221,6 +222,10 @@ impl DiffCalculator { calculator.start_tracking(oplog, &lca, diff_mode); } + if !vv.includes_vv(before) { + calculator.mark_source_not_in_op_context(); + } + if visited.contains(&op.container) { // don't checkout if we have already checked out this container in this round calculator.apply_change(oplog, RichOp::new_by_change(&change, op), None); @@ -262,6 +267,7 @@ impl DiffCalculator { to_vv: after, from_frontiers: before_frontiers, to_frontiers: after_frontiers, + lca_vv: &lca, }; while !all.is_empty() { // sort by depth and lamport, ensure we iterate from top to bottom @@ -419,6 +425,17 @@ pub(crate) enum ContainerDiffCalculator { Unknown(UnknownDiffCalculator), } +impl ContainerDiffCalculator { + fn mark_source_not_in_op_context(&mut self) { + match self { + Self::Richtext(calc) => calc.mark_source_not_in_op_context(), + Self::List(calc) => calc.mark_source_not_in_op_context(), + Self::MovableList(calc) => calc.mark_source_not_in_op_context(), + _ => {} + } + } +} + #[derive(Debug)] pub(crate) struct MapDiffCalculator { container_idx: ContainerIdx, @@ -556,12 +573,82 @@ use rle::{HasLength as _, Sliceable}; pub(crate) struct ListDiffCalculator { start_vv: VersionVector, tracker: Box, + source_not_in_op_context: bool, } impl ListDiffCalculator { pub(crate) fn get_id_latest_pos(&self, id: ID) -> Option { self.tracker.get_target_id_latest_index_at_new_version(id) } + + fn mark_source_not_in_op_context(&mut self) { + self.source_not_in_op_context = true; + } + + fn apply_op_to_tracker(tracker: &mut RichtextTracker, op: crate::op::RichOp) { + match &op.op().content { + crate::op::InnerContent::List(l) => match l { + InnerListOp::Insert { slice, pos } => { + tracker.insert(op.id_full(), *pos, RichtextChunk::new_text(slice.0.clone())); + } + InnerListOp::Delete(del) => { + tracker.delete( + op.id_start(), + del.id_start, + del.start() as usize, + del.atom_len(), + del.is_reversed(), + ); + } + _ => unreachable!(), + }, + _ => unreachable!(), + } + } + + fn build_full_tracker(idx: ContainerIdx, oplog: &OpLog, vv: &VersionVector) -> RichtextTracker { + let mut tracker = RichtextTracker::new_with_unknown(); + let empty_vv = VersionVector::default(); + let empty_frontiers = Frontiers::default(); + let target_frontiers = oplog.dag.vv_to_frontiers(vv); + let (_, _, iter) = + oplog.iter_from_lca_causally(&empty_vv, &empty_frontiers, vv, &target_frontiers); + + for (change, (start_counter, end_counter), vv) in iter { + let iter_start = change + .ops + .binary_search_by(|op| op.ctr_last().cmp(&start_counter)) + .unwrap_or_else(|e| e); + for mut op in &change.ops.vec()[iter_start..] { + if op.counter >= end_counter { + break; + } + + if op.container != idx || op.ctr_last() < start_counter { + continue; + } + + let stack_sliced_op; + if op.counter < start_counter || op.ctr_end() > end_counter { + stack_sliced_op = Some(op.slice( + (start_counter as usize).saturating_sub(op.counter as usize), + op.atom_len().min((end_counter - op.counter) as usize), + )); + op = stack_sliced_op.as_ref().unwrap(); + } + + let vv = &mut vv.borrow_mut(); + vv.extend_to_include_end_id(ID::new(change.peer(), op.counter)); + tracker.checkout(vv); + Self::apply_op_to_tracker( + &mut tracker, + crate::op::RichOp::new_by_change(&change, op), + ); + } + } + + tracker + } } impl MovableListDiffCalculator { @@ -570,6 +657,10 @@ impl MovableListDiffCalculator { .tracker .get_target_id_latest_index_at_new_version(id) } + + fn mark_source_not_in_op_context(&mut self) { + self.list.mark_source_not_in_op_context(); + } } impl std::fmt::Debug for ListDiffCalculator { @@ -582,6 +673,7 @@ impl std::fmt::Debug for ListDiffCalculator { impl DiffCalculatorTrait for ListDiffCalculator { fn start_tracking(&mut self, _oplog: &OpLog, vv: &crate::VersionVector, _mode: DiffMode) { + self.source_not_in_op_context = false; if !vv.includes_vv(&self.start_vv) || !self.tracker.all_vv().includes_vv(vv) { *self.tracker = RichtextTracker::new_with_unknown(); self.start_vv = vv.clone(); @@ -600,28 +692,7 @@ impl DiffCalculatorTrait for ListDiffCalculator { self.tracker.checkout(vv); } - match &op.op().content { - crate::op::InnerContent::List(l) => match l { - InnerListOp::Insert { slice, pos } => { - self.tracker.insert( - op.id_full(), - *pos, - RichtextChunk::new_text(slice.0.clone()), - ); - } - InnerListOp::Delete(del) => { - self.tracker.delete( - op.id_start(), - del.id_start, - del.start() as usize, - del.atom_len(), - del.is_reversed(), - ); - } - _ => unreachable!(), - }, - _ => unreachable!(), - } + Self::apply_op_to_tracker(&mut self.tracker, op); } fn finish_this_round(&mut self) {} @@ -634,7 +705,22 @@ impl DiffCalculatorTrait for ListDiffCalculator { mut on_new_container: impl FnMut(&ContainerID), ) -> (InternalDiff, DiffMode) { let mut delta = Delta::new(); - for item in self.tracker.diff(info.from_vv, info.to_vv) { + let (mut retreat, _) = info.from_vv.diff_iter(info.to_vv); + let has_retreat = retreat.next().is_some(); + let should_rebuild = matches!(idx.get_type(), crate::ContainerType::List) + && (has_retreat || info.lca_vv != info.from_vv || self.source_not_in_op_context); + let diff_items = if should_rebuild { + let mut merged = info.from_vv.clone(); + merged.merge(info.to_vv); + let mut full_tracker = Self::build_full_tracker(idx, oplog, &merged); + let diff_items = full_tracker.diff(info.from_vv, info.to_vv).collect_vec(); + *self.tracker = full_tracker; + diff_items + } else { + self.tracker.diff(info.from_vv, info.to_vv).collect_vec() + }; + + for item in diff_items { match item { CrdtRopeDelta::Retain(len) => { delta = delta.retain(len); @@ -764,6 +850,8 @@ enum RichtextCalcMode { /// (op, end_pos) styles: Vec<(StyleOp, usize)>, start_vv: VersionVector, + source_not_in_op_context: bool, + shallow_root_seeded: bool, }, Linear { diff: DeltaRope, @@ -778,10 +866,90 @@ impl RichtextDiffCalculator { tracker: Box::new(RichtextTracker::new_with_unknown()), styles: Vec::new(), start_vv: VersionVector::new(), + source_not_in_op_context: false, + shallow_root_seeded: false, }), } } + fn mark_source_not_in_op_context(&mut self) { + if let RichtextCalcMode::Crdt { + source_not_in_op_context, + .. + } = &mut *self.mode + { + *source_not_in_op_context = true; + } + } + + fn style_for_end_anchor(oplog: &OpLog, op: &RichOp) -> Option<(StyleOp, usize)> { + let style_start_id = op.id().inc(-1); + if let Some(start_op) = oplog.get_op_that_includes(style_start_id) { + let InnerListOp::StyleStart { + start: _, + end, + key, + value, + info, + } = start_op.content.as_list().unwrap() + else { + unreachable!() + }; + + Some(( + StyleOp { + lamport: start_op.lamport(), + peer: style_start_id.peer, + cnt: style_start_id.counter, + key: key.clone(), + value: value.clone(), + info: *info, + }, + *end as usize, + )) + } else { + oplog.with_history_cache(|history_cache| { + history_cache + .find_text_style_end_in_shallow_root(op.raw_op().container, style_start_id) + }) + } + } + + fn shallow_delete_range_matches_tracker( + tracker: &RichtextTracker, + target_start: ID, + pos: usize, + len: usize, + ) -> bool { + let mut remaining = len; + let mut pos = pos; + let mut expected = target_start; + while remaining > 0 { + let Some((real_id, available)) = tracker.active_real_span_at(pos) else { + return false; + }; + + if real_id.peer != expected.peer || real_id.counter != expected.counter { + return false; + } + + let take = remaining.min(available); + expected = expected.inc(take as Counter); + pos += take; + remaining -= take; + } + + true + } + + fn shallow_clamped_tracker_pos(oplog: &OpLog, tracker: &RichtextTracker, pos: usize) -> usize { + if oplog.shallow_since_vv().is_empty() { + pos + } else { + pos.min(tracker.len()) + } + } + /// This should be called after calc_diff /// /// TODO: Refactor, this can be simplified @@ -793,6 +961,365 @@ impl RichtextDiffCalculator { RichtextCalcMode::Linear { .. } => unreachable!(), } } + + fn apply_crdt_op_to_tracker( + oplog: &OpLog, + tracker: &mut RichtextTracker, + styles: &mut Vec<(StyleOp, usize)>, + op: RichOp, + ) { + match &op.raw_op().content { + crate::op::InnerContent::List(l) => match l { + InnerListOp::Insert { .. } | InnerListOp::Move { .. } | InnerListOp::Set { .. } => { + unreachable!() + } + InnerListOp::InsertText { + slice: _, + unicode_start, + unicode_len: len, + pos, + } => { + let pos = Self::shallow_clamped_tracker_pos(oplog, tracker, *pos as usize); + tracker.insert( + op.id_full(), + pos, + RichtextChunk::new_text(*unicode_start..*unicode_start + *len), + ); + } + InnerListOp::Delete(del) => { + let is_shallow = !oplog.shallow_since_vv().is_empty(); + let pos = del.start() as usize; + if is_shallow + && !Self::shallow_delete_range_matches_tracker( + tracker, + del.id_start, + pos, + del.atom_len(), + ) + { + let atom_len = del.atom_len(); + let mut segments = + tracker.active_segments_of_real_id_span(del.id_start, atom_len); + if segments.is_empty() { + return; + } + + segments.sort_unstable_by_key(|(_, pos, _)| *pos); + if del.is_reversed() { + for (target_id, pos, len) in segments.into_iter().rev() { + let target_offset = + (target_id.counter - del.id_start.counter) as usize; + debug_assert!(target_offset + len <= atom_len); + let op_offset = atom_len - target_offset - len; + tracker.delete( + op.id_start().inc(op_offset as Counter), + target_id, + pos, + len, + true, + ); + } + } else { + let mut deleted_before = 0; + for (target_id, pos, len) in segments { + let target_offset = + (target_id.counter - del.id_start.counter) as usize; + debug_assert!(pos >= deleted_before); + tracker.delete( + op.id_start().inc(target_offset as Counter), + target_id, + pos.saturating_sub(deleted_before), + len, + false, + ); + deleted_before += len; + } + } + + return; + } + + tracker.delete( + op.id_start(), + del.id_start, + pos, + del.atom_len(), + del.is_reversed(), + ); + } + InnerListOp::StyleStart { + start, + end, + key, + info, + value, + } => { + debug_assert!(start < end, "start: {}, end: {}", start, end); + let style_id = styles.len(); + styles.push(( + StyleOp { + lamport: op.lamport(), + peer: op.peer, + cnt: op.id_start().counter, + key: key.clone(), + value: value.clone(), + info: *info, + }, + *end as usize, + )); + let start = Self::shallow_clamped_tracker_pos(oplog, tracker, *start as usize); + tracker.insert( + op.id_full(), + start, + RichtextChunk::new_style_anchor(style_id as u32, AnchorType::Start), + ); + } + InnerListOp::StyleEnd => { + let id = op.id(); + if let Some(pos) = styles + .iter() + .rev() + .position(|(op, _pos)| op.peer == id.peer && op.cnt == id.counter - 1) + { + let style_id = styles.len() - pos - 1; + let (_start_op, end_pos) = &styles[style_id]; + tracker.insert( + op.id_full(), + // need to shift 1 because we insert the start style anchor before this pos + (*end_pos + 1).min(tracker.len()), + RichtextChunk::new_style_anchor(style_id as u32, AnchorType::End), + ); + } else { + let Some((style, end)) = Self::style_for_end_anchor(oplog, &op) else { + panic!("Unhandled checkout case") + }; + styles.push((style, end)); + let style_id = styles.len() - 1; + tracker.insert( + op.id_full(), + // need to shift 1 because we insert the start style anchor before this pos + (end + 1).min(tracker.len()), + RichtextChunk::new_style_anchor(style_id as u32, AnchorType::End), + ); + } + } + }, + _ => unreachable!(), + } + } + + fn seed_tracker_from_shallow_root( + idx: ContainerIdx, + oplog: &OpLog, + tracker: &mut RichtextTracker, + styles: &mut Vec<(StyleOp, usize)>, + vv: &VersionVector, + ) { + if oplog.shallow_since_vv().is_empty() { + return; + } + + *tracker = RichtextTracker::new_empty(); + styles.clear(); + let shallow_root_vv = oplog + .dag + .frontiers_to_vv(oplog.shallow_since_frontiers()) + .unwrap_or_else(|| oplog.shallow_since_vv().to_vv()); + let seed_vv = if vv.includes_vv(&shallow_root_vv) { + &shallow_root_vv + } else { + vv + }; + + #[derive(Debug, Clone, Copy)] + struct SeedItem { + order: usize, + id: IdFull, + content: RichtextChunk, + } + + struct Fenwick { + tree: Vec, + } + + impl Fenwick { + fn new(len: usize) -> Self { + Self { + tree: vec![0; len + 1], + } + } + + fn add(&mut self, mut index: usize, value: usize) { + index += 1; + while index < self.tree.len() { + self.tree[index] += value; + index += index & index.wrapping_neg(); + } + } + + fn prefix_sum(&self, mut end: usize) -> usize { + let mut sum = 0; + while end > 0 { + sum += self.tree[end]; + end -= end & end.wrapping_neg(); + } + + sum + } + } + + let chunks = oplog.with_history_cache(|h| h.find_text_chunks_in_shallow_root_order(idx)); + let mut pos = 0; + let mut seed_items = Vec::new(); + let mut style_id_to_index = FxHashMap::default(); + for chunk in chunks { + match chunk { + RichtextStateChunk::Text(text) => { + let id = text.id_full(); + let vv_end = seed_vv.get(&id.peer).copied().unwrap_or(0); + if vv_end <= id.counter { + continue; + } + + let end = vv_end.min(id.counter + text.unicode_len() as Counter); + let len = (end - id.counter) as usize; + if len == 0 { + continue; + } + + seed_items.push(SeedItem { + order: seed_items.len(), + id, + content: RichtextChunk::new_unknown(len as u32), + }); + pos += len; + } + RichtextStateChunk::Style { style, anchor_type } => { + let id = match anchor_type { + AnchorType::Start => style.id(), + AnchorType::End => style.id().inc(1), + }; + if !seed_vv.includes_id(id) { + continue; + } + + let style_id = if let Some(id) = style_id_to_index.get(&style.id()) { + *id + } else { + let id = styles.len(); + styles.push((style.as_ref().clone(), pos)); + style_id_to_index.insert(style.id(), id); + id + }; + + if anchor_type == AnchorType::End { + styles[style_id].1 = pos.saturating_sub(1); + } + + seed_items.push(SeedItem { + order: seed_items.len(), + id: IdFull::new( + id.peer, + id.counter, + style.lamport + (id.counter - style.cnt) as u32, + ), + content: RichtextChunk::new_style_anchor(style_id as u32, anchor_type), + }); + pos += 1; + } + } + } + + seed_items.sort_unstable_by_key(|item| (item.id.peer, item.id.counter)); + let mut seen_end_by_peer: FxHashMap = FxHashMap::default(); + let mut normalized_items = Vec::with_capacity(seed_items.len()); + for mut item in seed_items { + let end = item.id.counter + item.content.len() as Counter; + let seen_end = seen_end_by_peer.entry(item.id.peer).or_default(); + if end <= *seen_end { + continue; + } + + if item.id.counter < *seen_end { + let skip = (*seen_end - item.id.counter) as usize; + item.id = item.id.inc(skip as Counter); + item.content = item.content.slice(skip..item.content.len()); + } + + *seen_end = end; + normalized_items.push(item); + } + + let seed_items = normalized_items; + let fenwick_len = seed_items + .iter() + .map(|item| item.order) + .max() + .map_or(0, |order| order + 1); + let mut inserted = Fenwick::new(fenwick_len); + for item in seed_items { + let pos = inserted.prefix_sum(item.order); + tracker.insert_seeded(pos, item.content, item.id); + inserted.add(item.order, item.content.len()); + } + + tracker.mark_shallow_root_applied(seed_vv); + } + + fn build_full_crdt_tracker( + idx: ContainerIdx, + oplog: &OpLog, + vv: &VersionVector, + ) -> (RichtextTracker, Vec<(StyleOp, usize)>) { + let mut tracker = RichtextTracker::new_with_unknown(); + let mut styles = Vec::new(); + if !oplog.shallow_since_vv().is_empty() { + Self::seed_tracker_from_shallow_root(idx, oplog, &mut tracker, &mut styles, vv); + } + + let empty_vv = VersionVector::default(); + let empty_frontiers = Frontiers::default(); + let target_frontiers = oplog.dag.vv_to_frontiers(vv); + let (_, _, iter) = + oplog.iter_from_lca_causally(&empty_vv, &empty_frontiers, vv, &target_frontiers); + + for (change, (start_counter, end_counter), vv) in iter { + let iter_start = change + .ops + .binary_search_by(|op| op.ctr_last().cmp(&start_counter)) + .unwrap_or_else(|e| e); + for mut op in &change.ops.vec()[iter_start..] { + if op.counter >= end_counter { + break; + } + + if op.container != idx || op.ctr_last() < start_counter { + continue; + } + + let stack_sliced_op; + if op.counter < start_counter || op.ctr_end() > end_counter { + stack_sliced_op = Some(op.slice( + (start_counter as usize).saturating_sub(op.counter as usize), + op.atom_len().min((end_counter - op.counter) as usize), + )); + op = stack_sliced_op.as_ref().unwrap(); + } + + let vv = &mut vv.borrow_mut(); + vv.extend_to_include_end_id(ID::new(change.peer(), op.counter)); + tracker.checkout(vv); + Self::apply_crdt_op_to_tracker( + oplog, + &mut tracker, + &mut styles, + RichOp::new_by_change(&change, op), + ); + } + } + + (tracker, styles) + } } impl DiffCalculatorTrait for RichtextDiffCalculator { @@ -821,11 +1348,15 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { tracker, styles, start_vv, + source_not_in_op_context, + shallow_root_seeded, } => { + *source_not_in_op_context = false; if !vv.includes_vv(start_vv) || !tracker.all_vv().includes_vv(vv) { **tracker = RichtextTracker::new_with_unknown(); styles.clear(); *start_vv = vv.clone(); + *shallow_root_seeded = false; } tracker.checkout(vv); @@ -897,31 +1428,12 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { let (style_op, pos) = match last_style_start.take() { Some((style_op, pos)) => (style_op, pos), None => { - let Some(start_op) = oplog.get_op_that_includes(op.id().inc(-1)) + let Some((style_op, pos)) = Self::style_for_end_anchor(oplog, &op) else { panic!("Unhandled checkout case") }; - let InnerListOp::StyleStart { - key, - value, - info, - end, - .. - } = start_op.content.as_list().unwrap() - else { - unreachable!() - }; - let style_op = Arc::new(StyleOp { - lamport: op.lamport() - 1, - peer: op.peer, - cnt: op.id_start().counter - 1, - key: key.clone(), - value: value.clone(), - info: *info, - }); - - (style_op, *end) + (Arc::new(style_op), pos as u32) } }; assert_eq!(style_op.peer, op.peer); @@ -938,126 +1450,25 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { RichtextCalcMode::Crdt { tracker, styles, - start_vv: _, + start_vv, + source_not_in_op_context: _, + shallow_root_seeded, } => { - if let Some(vv) = vv { - tracker.checkout(vv); + if !*shallow_root_seeded && !oplog.shallow_since_vv().is_empty() { + Self::seed_tracker_from_shallow_root( + op.raw_op().container, + oplog, + tracker, + styles, + start_vv, + ); + *shallow_root_seeded = true; } - match &op.raw_op().content { - crate::op::InnerContent::List(l) => match l { - InnerListOp::Insert { .. } - | InnerListOp::Move { .. } - | InnerListOp::Set { .. } => { - unreachable!() - } - InnerListOp::InsertText { - slice: _, - unicode_start, - unicode_len: len, - pos, - } => { - tracker.insert( - op.id_full(), - *pos as usize, - RichtextChunk::new_text(*unicode_start..*unicode_start + *len), - ); - } - InnerListOp::Delete(del) => { - tracker.delete( - op.id_start(), - del.id_start, - del.start() as usize, - del.atom_len(), - del.is_reversed(), - ); - } - InnerListOp::StyleStart { - start, - end, - key, - info, - value, - } => { - debug_assert!(start < end, "start: {}, end: {}", start, end); - let style_id = styles.len(); - styles.push(( - StyleOp { - lamport: op.lamport(), - peer: op.peer, - cnt: op.id_start().counter, - key: key.clone(), - value: value.clone(), - info: *info, - }, - *end as usize, - )); - tracker.insert( - op.id_full(), - *start as usize, - RichtextChunk::new_style_anchor(style_id as u32, AnchorType::Start), - ); - } - InnerListOp::StyleEnd => { - let id = op.id(); - if let Some(pos) = styles.iter().rev().position(|(op, _pos)| { - op.peer == id.peer && op.cnt == id.counter - 1 - }) { - let style_id = styles.len() - pos - 1; - let (_start_op, end_pos) = &styles[style_id]; - tracker.insert( - op.id_full(), - // need to shift 1 because we insert the start style anchor before this pos - *end_pos + 1, - RichtextChunk::new_style_anchor( - style_id as u32, - AnchorType::End, - ), - ); - } else { - let Some(start_op) = oplog.get_op_that_includes(op.id().inc(-1)) - else { - // Checkout on richtext that export at a gc version that split - // start style op and end style op apart. Won't fix for now. - // It's such a rare case... - unimplemented!("Unhandled checkout case") - }; - let InnerListOp::StyleStart { - start: _, - end, - key, - value, - info, - } = start_op.content.as_list().unwrap() - else { - unreachable!() - }; - styles.push(( - StyleOp { - lamport: op.lamport() - 1, - peer: id.peer, - cnt: id.counter - 1, - key: key.clone(), - value: value.clone(), - info: *info, - }, - *end as usize, - )); - let style_id = styles.len() - 1; - tracker.insert( - op.id_full(), - // need to shift 1 because we insert the start style anchor before this pos - *end as usize + 1, - RichtextChunk::new_style_anchor( - style_id as u32, - AnchorType::End, - ), - ); - } - } - }, - _ => unreachable!(), + if let Some(vv) = vv { + tracker.checkout(vv); } + Self::apply_crdt_op_to_tracker(oplog, tracker, styles, op); } } } @@ -1069,103 +1480,155 @@ impl DiffCalculatorTrait for RichtextDiffCalculator { info: DiffCalcVersionInfo, _: impl FnMut(&ContainerID), ) -> (InternalDiff, DiffMode) { + fn push_tracker_chunk( + delta: &mut DeltaRope, + idx: ContainerIdx, + oplog: &OpLog, + styles: &[(StyleOp, usize)], + value: RichtextChunk, + id: ID, + lamport: Option, + ) { + match value.value() { + RichtextChunkValue::Text(text) => { + delta.push_insert( + RichtextStateChunk::Text( + // PERF: can be speedup by acquiring lock on arena + TextChunk::new( + oplog + .arena + .slice_by_unicode(text.start as usize..text.end as usize), + IdFull::new(id.peer, id.counter, lamport.unwrap()), + ), + ), + (), + ); + } + RichtextChunkValue::StyleAnchor { id, anchor_type } => { + delta.push_insert( + RichtextStateChunk::Style { + style: Arc::new(styles[id as usize].0.clone()), + anchor_type, + }, + (), + ); + } + RichtextChunkValue::Unknown(len) => { + // assert not unknown id + assert_ne!(id.peer, PeerID::MAX); + let mut id = id; + let mut acc_len = 0; + let end = id.counter + len as Counter; + let shallow_root = oplog.shallow_since_vv().get(&id.peer).copied().unwrap_or(0); + if id.counter < shallow_root { + // need to find the content between id.counter ~ target_end in gc state + let target_end = shallow_root.min(end); + oplog.with_history_cache(|h| { + let chunks = h.find_text_chunks_in( + idx, + IdSpan::new(id.peer, id.counter, target_end), + ); + for c in chunks { + acc_len += c.rle_len(); + delta.push_insert(c, ()); + } + }); + id.counter = shallow_root; + } + + if id.counter < end { + for rich_op in oplog.iter_ops(IdSpan::new(id.peer, id.counter, end)) { + acc_len += rich_op.content_len(); + let op = rich_op.op(); + let lamport = rich_op.lamport(); + let content = op.content.as_list().unwrap(); + match content { + InnerListOp::InsertText { slice, .. } => { + delta.push_insert( + RichtextStateChunk::Text(TextChunk::new( + slice.clone(), + IdFull::new(id.peer, op.counter, lamport), + )), + (), + ); + } + _ => unreachable!("{:?}", content), + } + } + } + + debug_assert_eq!(acc_len, len as usize); + } + RichtextChunkValue::MoveAnchor => unreachable!(), + } + } + + fn push_tracker_delta_item( + delta: &mut DeltaRope, + idx: ContainerIdx, + oplog: &OpLog, + styles: &[(StyleOp, usize)], + item: CrdtRopeDelta, + ) { + match item { + CrdtRopeDelta::Retain(len) => { + delta.push_retain(len, ()); + } + CrdtRopeDelta::Insert { + chunk: value, + id, + lamport, + } => push_tracker_chunk(delta, idx, oplog, styles, value, id, lamport), + CrdtRopeDelta::Delete(len) => { + delta.push_delete(len); + } + } + } + match &mut *self.mode { RichtextCalcMode::Linear { diff, .. } => ( InternalDiff::RichtextRaw(std::mem::take(diff)), DiffMode::Linear, ), RichtextCalcMode::Crdt { - tracker, styles, .. + tracker, + styles, + source_not_in_op_context, + .. } => { - let mut delta = DeltaRope::new(); - for item in tracker.diff(info.from_vv, info.to_vv) { - match item { - CrdtRopeDelta::Retain(len) => { - delta.push_retain(len, ()); - } - CrdtRopeDelta::Insert { - chunk: value, - id, - lamport, - } => match value.value() { - RichtextChunkValue::Text(text) => { - delta.push_insert( - RichtextStateChunk::Text( - // PERF: can be speedup by acquiring lock on arena - TextChunk::new( - oplog.arena.slice_by_unicode( - text.start as usize..text.end as usize, - ), - IdFull::new(id.peer, id.counter, lamport.unwrap()), - ), - ), - (), - ); - } - RichtextChunkValue::StyleAnchor { id, anchor_type } => { - delta.push_insert( - RichtextStateChunk::Style { - style: Arc::new(styles[id as usize].0.clone()), - anchor_type, - }, - (), - ); - } - RichtextChunkValue::Unknown(len) => { - // assert not unknown id - assert_ne!(id.peer, PeerID::MAX); - let mut id = id; - let mut acc_len = 0; - let end = id.counter + len as Counter; - let shallow_root = - oplog.shallow_since_vv().get(&id.peer).copied().unwrap_or(0); - if id.counter < shallow_root { - // need to find the content between id.counter ~ target_end in gc state - let target_end = shallow_root.min(end); - oplog.with_history_cache(|h| { - let chunks = h.find_text_chunks_in( - idx, - IdSpan::new(id.peer, id.counter, target_end), - ); - for c in chunks { - acc_len += c.rle_len(); - delta.push_insert(c, ()); - } - }); - id.counter = shallow_root; - } + let (mut retreat, _) = info.from_vv.diff_iter(info.to_vv); + let has_retreat = retreat.next().is_some(); + let should_rebuild = has_retreat + || info.lca_vv != info.from_vv + || *source_not_in_op_context + || !oplog.shallow_since_vv().is_empty(); + if should_rebuild { + // Richtext diffs can start from a tracker that only knows the LCA state as + // unknown spans. Expressing a rollback or an import from `lca != from` as local + // edits can target the wrong visible text when the source state contains + // concurrent inserts or sliced ops. The same risk exists when an op is replayed + // from a dependency version that does not include the visible source state. + // Preserve correctness by replacing the visible source state with the target + // state reconstructed from CRDT ids. Shallow docs seed this tracker from the + // shallow-root state and replay only the retained suffix of history. + let mut merged = info.from_vv.clone(); + merged.merge(info.to_vv); + let (mut full_tracker, full_styles) = + Self::build_full_crdt_tracker(idx, oplog, &merged); + + let mut delta = DeltaRope::new(); + for item in full_tracker.diff(info.from_vv, info.to_vv) { + push_tracker_delta_item(&mut delta, idx, oplog, &full_styles, item); + } + **tracker = full_tracker; + *styles = full_styles; - if id.counter < end { - for rich_op in - oplog.iter_ops(IdSpan::new(id.peer, id.counter, end)) - { - acc_len += rich_op.content_len(); - let op = rich_op.op(); - let lamport = rich_op.lamport(); - let content = op.content.as_list().unwrap(); - match content { - InnerListOp::InsertText { slice, .. } => { - delta.push_insert( - RichtextStateChunk::Text(TextChunk::new( - slice.clone(), - IdFull::new(id.peer, op.counter, lamport), - )), - (), - ); - } - _ => unreachable!("{:?}", content), - } - } - } + return (InternalDiff::RichtextRaw(delta), DiffMode::Checkout); + } - debug_assert_eq!(acc_len, len as usize); - } - RichtextChunkValue::MoveAnchor => unreachable!(), - }, - CrdtRopeDelta::Delete(len) => { - delta.push_delete(len); - } - } + let mut delta = DeltaRope::new(); + for item in tracker.diff(info.from_vv, info.to_vv) { + push_tracker_delta_item(&mut delta, idx, oplog, styles, item); } (InternalDiff::RichtextRaw(delta), DiffMode::Checkout) @@ -1202,6 +1665,7 @@ struct MovableListInner { impl DiffCalculatorTrait for MovableListDiffCalculator { fn start_tracking(&mut self, _oplog: &OpLog, vv: &crate::VersionVector, mode: DiffMode) { + self.list.source_not_in_op_context = false; if !vv.includes_vv(&self.list.start_vv) || !self.list.tracker.all_vv().includes_vv(vv) { *self.list.tracker = RichtextTracker::new_with_unknown(); self.list.start_vv = vv.clone(); @@ -1299,78 +1763,16 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { { // Apply change on the list items - let this = &mut self.list; if let Some(vv) = vv { - this.tracker.checkout(vv); - } - - let real_op = op.op(); - match &real_op.content { - crate::op::InnerContent::List(l) => match l { - InnerListOp::Insert { slice, pos } => { - this.tracker.insert( - op.id_full(), - *pos, - RichtextChunk::new_text(slice.0.clone()), - ); - } - InnerListOp::Delete(del) => { - this.tracker.delete( - op.id_start(), - del.id_start, - del.start() as usize, - del.atom_len(), - del.is_reversed(), - ); - } - InnerListOp::Move { from, elem_id, to } => { - self.inner.move_id_to_elem_id.insert(op.id(), *elem_id); - if !this.tracker.current_vv().includes_id(op.id()) { - let last_pos = if is_checkout { - // TODO: PERF: this lookup can be optimized - oplog.with_history_cache(|h| { - let list = &h.get_checkout_index().movable_list; - list.last_pos( - *elem_id, - this.tracker.current_vv(), - // TODO: PERF: Provide the lamport of to version - Lamport::MAX, - oplog, - ) - .unwrap() - .id() - }) - } else { - // When it's import or linear mode, we need to use a fake id - // because we want to avoid using the history cache - // - // This ID will not be used. Because it will only be used when - // we switch to an older version. And we know it's for importing and - // to version is always after from version (!is_checkout), so that - // we don't need to checkout to the version before from. - const FAKE_ID: ID = ID { - peer: PeerID::MAX - 2, - counter: 0, - }; - FAKE_ID - }; - this.tracker.move_item( - op.id_full(), - last_pos, - *from as usize, - *to as usize, - ); - } - } - InnerListOp::Set { .. } => { - // don't need to update tracker here - } - InnerListOp::InsertText { .. } - | InnerListOp::StyleStart { .. } - | InnerListOp::StyleEnd => unreachable!(), - }, - _ => unreachable!(), + self.list.tracker.checkout(vv); } + Self::apply_op_to_tracker( + &mut self.list.tracker, + &mut self.inner.move_id_to_elem_id, + oplog, + &op, + is_checkout, + ); }; } @@ -1386,6 +1788,14 @@ impl DiffCalculatorTrait for MovableListDiffCalculator { info: DiffCalcVersionInfo, mut on_new_container: impl FnMut(&ContainerID), ) -> (InternalDiff, DiffMode) { + let (mut retreat, _) = info.from_vv.diff_iter(info.to_vv); + let has_retreat = retreat.next().is_some(); + if has_retreat || info.lca_vv != info.from_vv || self.list.source_not_in_op_context { + let mut merged = info.from_vv.clone(); + merged.merge(info.to_vv); + self.rebuild_full_tracker(idx, oplog, &merged); + } + let (InternalDiff::ListRaw(list_diff), diff_mode) = self.list.calculate_diff(idx, oplog, info, |_| {}) else { @@ -1516,6 +1926,104 @@ impl MovableListDiffCalculator { }), } } + + fn apply_op_to_tracker( + tracker: &mut RichtextTracker, + move_id_to_elem_id: &mut FxHashMap, + oplog: &OpLog, + op: &RichOp<'_>, + is_checkout: bool, + ) { + let real_op = op.op(); + match &real_op.content { + InnerContent::List(l) => match l { + InnerListOp::Insert { slice, pos } => { + tracker.insert(op.id_full(), *pos, RichtextChunk::new_text(slice.0.clone())); + } + InnerListOp::Delete(del) => { + tracker.delete( + op.id_start(), + del.id_start, + del.start() as usize, + del.atom_len(), + del.is_reversed(), + ); + } + InnerListOp::Move { from, elem_id, to } => { + move_id_to_elem_id.insert(op.id(), *elem_id); + if !tracker.current_vv().includes_id(op.id()) { + let last_pos = if is_checkout { + oplog.with_history_cache(|h| { + let list = &h.get_checkout_index().movable_list; + list.last_pos(*elem_id, tracker.current_vv(), Lamport::MAX, oplog) + .expect("moved element should have a visible source position") + .id() + }) + } else { + // In import/linear mode this id is only needed if the tracker is later + // checked out before the source version, which those modes do not do. + ID::new(PeerID::MAX - 2, 0) + }; + tracker.move_item(op.id_full(), last_pos, *from as usize, *to as usize); + } + } + InnerListOp::Set { .. } => {} + InnerListOp::InsertText { .. } + | InnerListOp::StyleStart { .. } + | InnerListOp::StyleEnd => unreachable!(), + }, + _ => unreachable!(), + } + } + + fn rebuild_full_tracker(&mut self, idx: ContainerIdx, oplog: &OpLog, vv: &VersionVector) { + let mut tracker = RichtextTracker::new_with_unknown(); + let mut move_id_to_elem_id = FxHashMap::default(); + let empty_vv = VersionVector::default(); + let empty_frontiers = Frontiers::default(); + let target_frontiers = oplog.dag.vv_to_frontiers(vv); + let (_, _, iter) = + oplog.iter_from_lca_causally(&empty_vv, &empty_frontiers, vv, &target_frontiers); + + for (change, (start_counter, end_counter), vv) in iter { + let iter_start = change + .ops + .binary_search_by(|op| op.ctr_last().cmp(&start_counter)) + .unwrap_or_else(|e| e); + for mut op in &change.ops.vec()[iter_start..] { + if op.counter >= end_counter { + break; + } + + if op.container != idx || op.ctr_last() < start_counter { + continue; + } + + let stack_sliced_op; + if op.counter < start_counter || op.ctr_end() > end_counter { + stack_sliced_op = Some(op.slice( + (start_counter as usize).saturating_sub(op.counter as usize), + op.atom_len().min((end_counter - op.counter) as usize), + )); + op = stack_sliced_op.as_ref().unwrap(); + } + + let vv = &mut vv.borrow_mut(); + vv.extend_to_include_end_id(ID::new(change.peer(), op.counter)); + tracker.checkout(vv); + Self::apply_op_to_tracker( + &mut tracker, + &mut move_id_to_elem_id, + oplog, + &RichOp::new_by_change(&change, op), + true, + ); + } + } + + *self.list.tracker = tracker; + self.inner.move_id_to_elem_id = move_id_to_elem_id; + } } #[test] diff --git a/crates/loro-internal/src/diff_calc/tree.rs b/crates/loro-internal/src/diff_calc/tree.rs index 5dc93eb52..58c410316 100644 --- a/crates/loro-internal/src/diff_calc/tree.rs +++ b/crates/loro-internal/src/diff_calc/tree.rs @@ -632,7 +632,22 @@ impl TreeParentToChildrenCache { new_parent: TreeParentId, ) { if !old_parent.is_unexist() { - self.cache.get_mut(&old_parent).unwrap().remove(&target); + let removed = if let Some(children) = self.cache.get_mut(&old_parent) { + children.remove(&target) + } else { + false + }; + + if !removed { + let removed = self + .cache + .values_mut() + .any(|children| children.remove(&target)); + assert!( + removed, + "target {target:?} should be present in TreeParentToChildrenCache before moving from {old_parent:?} to {new_parent:?}", + ); + } } self.cache.entry(new_parent).or_default().insert(target); } diff --git a/crates/loro-internal/src/encoding/outdated_encode_reordered.rs b/crates/loro-internal/src/encoding/outdated_encode_reordered.rs index e2f2c504c..703393976 100644 --- a/crates/loro-internal/src/encoding/outdated_encode_reordered.rs +++ b/crates/loro-internal/src/encoding/outdated_encode_reordered.rs @@ -51,7 +51,7 @@ pub(crate) fn import_changes_to_oplog( continue; } - if oplog.dag.is_before_shallow_root(&change.deps) { + if oplog.dag.import_deps_before_shallow_root(&change.deps) { changes_before_shallow_root.push(change); continue; } diff --git a/crates/loro-internal/src/encoding/shallow_snapshot.rs b/crates/loro-internal/src/encoding/shallow_snapshot.rs index c4991bcbd..294731c7f 100644 --- a/crates/loro-internal/src/encoding/shallow_snapshot.rs +++ b/crates/loro-internal/src/encoding/shallow_snapshot.rs @@ -170,7 +170,6 @@ pub(crate) fn export_state_only_snapshot( ); let to_vv = frontiers_to_vv_for_export(&oplog, target_frontiers, "export_state_only_snapshot")?; - let oplog_bytes = oplog.export_change_store_in_range(&start_vv, &start_from, &to_vv, target_frontiers); let state_frontiers = doc.state_frontiers(); @@ -181,16 +180,41 @@ pub(crate) fn export_state_only_snapshot( .map_err(LoroEncodeError::from)?; let mut state = doc.app_state().lock(); let alive_containers = state.ensure_all_alive_containers(); - let alive_c_bytes = cids_to_bytes(alive_containers); + if has_unknown_container(alive_containers.iter()) { + return Err(LoroEncodeError::UnknownContainer); + } + let mut alive_c_bytes = cids_to_bytes(alive_containers); state.store.flush(); let shallow_state_kv = state.store.get_kv_clone(); drop(state); + + doc._checkout_without_emitting(target_frontiers, false, false) + .map_err(LoroEncodeError::from)?; + let mut state = doc.app_state().lock(); + state.ensure_all_alive_containers(); + state.store.encode(); + for cid in state.store.iter_all_container_ids() { + if let ContainerID::Normal { peer, counter, .. } = cid { + let temp_id = ID::new(peer, counter); + if !start_from.contains(&temp_id) { + alive_c_bytes.insert(cid.to_bytes()); + } + } else { + alive_c_bytes.insert(cid.to_bytes()); + } + } + + let target_state_kv = state.store.get_kv_clone(); + drop(state); + target_state_kv.remove_same(&shallow_state_kv); + target_state_kv.retain_keys(&alive_c_bytes); + shallow_state_kv.retain_keys(&alive_c_bytes); shallow_state_kv.insert(FRONTIERS_KEY, start_from.encode().into()); let shallow_state_bytes = shallow_state_kv.export(); let snapshot = Snapshot { oplog_bytes, - state_bytes: None, + state_bytes: Some(target_state_kv.export()), shallow_root_state_bytes: shallow_state_bytes, }; _encode_snapshot(snapshot, w); @@ -267,7 +291,7 @@ fn calc_shallow_doc_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Fronti if next == current { // Cannot converge further (pairwise GCAs are the nodes themselves). // Fall back to empty frontiers, meaning export full history. - return Frontiers::default(); + return clamp_to_shallow_root(oplog, Frontiers::default()); } current = next; } @@ -293,7 +317,23 @@ fn calc_shallow_doc_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Fronti } } - ans + clamp_to_shallow_root(oplog, ans) +} + +fn clamp_to_shallow_root(oplog: &crate::OpLog, frontiers: Frontiers) -> Frontiers { + if oplog.shallow_since_vv().is_empty() { + return frontiers; + } + + let Some(vv) = oplog.dag().frontiers_to_vv(&frontiers) else { + return oplog.shallow_since_frontiers().clone(); + }; + + if vv.includes_vv(&oplog.shallow_since_vv().to_vv()) { + frontiers + } else { + oplog.shallow_since_frontiers().clone() + } } pub(crate) fn encode_snapshot_at( diff --git a/crates/loro-internal/src/history_cache.rs b/crates/loro-internal/src/history_cache.rs index 2d1aa9873..57b96b0e5 100644 --- a/crates/loro-internal/src/history_cache.rs +++ b/crates/loro-internal/src/history_cache.rs @@ -9,7 +9,7 @@ use crate::sync::Mutex; use either::Either; use enum_as_inner::EnumAsInner; use enum_dispatch::enum_dispatch; -use generic_btree::rle::Sliceable; +use generic_btree::rle::{HasLength as _, Sliceable}; use loro_common::{ ContainerType, Counter, HasLamport, IdFull, IdLp, InternalString, LoroValue, PeerID, ID, }; @@ -19,8 +19,10 @@ use rustc_hash::FxHashMap; use crate::{ change::{Change, Lamport}, container::{ - idx::ContainerIdx, list::list_op::InnerListOp, - richtext::richtext_state::RichtextStateChunk, tree::tree_op::TreeOp, + idx::ContainerIdx, + list::list_op::InnerListOp, + richtext::{richtext_state::RichtextStateChunk, AnchorType, StyleOp}, + tree::tree_op::TreeOp, }, delta::MapValue, diff_calc::tree::{MoveLamportAndID, TreeCacheForDiff}, @@ -343,6 +345,104 @@ impl ContainerHistoryCache { }); ans.sort_unstable_by_key(|x| x.counter()); + let mut normalized = Vec::with_capacity(ans.len()); + let mut covered_end = target_span.counter.start; + for chunk in ans { + let span = chunk.get_id_span(); + let start = span.counter.start.max(covered_end); + let end = span.counter.end.min(target_span.counter.end); + if start >= end { + continue; + } + + normalized.push( + chunk.slice( + (start - span.counter.start) as usize..(end - span.counter.start) as usize, + ), + ); + covered_end = end; + } + + normalized + } + + pub(crate) fn find_text_chunks_in_shallow_root_order( + &self, + idx: ContainerIdx, + ) -> Vec { + ensure_cov::notify_cov( + "loro_internal::history_cache::find_text_chunks_in_shallow_root_order", + ); + let Some(state) = self.shallow_root_state.as_ref() else { + return Vec::new(); + }; + + let mut binding = state.store.lock(); + let Some(text) = binding.get_mut(idx) else { + return Vec::new(); + }; + + let text_state = text + .get_state( + idx, + ContainerCreationContext { + configure: &Default::default(), + peer: 0, + }, + ) + .as_richtext_state() + .unwrap(); + + let mut ans = Vec::new(); + text_state.iter_raw(&mut |chunk| { + ans.push(chunk.slice(0..chunk.rle_len())); + }); + + ans + } + + pub(crate) fn find_text_style_end_in_shallow_root( + &self, + idx: ContainerIdx, + style_start_id: ID, + ) -> Option<(StyleOp, usize)> { + ensure_cov::notify_cov("loro_internal::history_cache::find_text_style_end_in_shallow_root"); + let state = self.shallow_root_state.as_ref()?; + + let mut binding = state.store.lock(); + let text = binding.get_mut(idx)?; + + let text_state = text + .get_state( + idx, + ContainerCreationContext { + configure: &Default::default(), + peer: 0, + }, + ) + .as_richtext_state() + .unwrap(); + + let mut entity_index = 0; + let mut ans = None; + text_state.iter_raw(&mut |chunk| { + if ans.is_some() { + return; + } + + match chunk { + RichtextStateChunk::Text(text) => { + entity_index += text.unicode_len() as usize; + } + RichtextStateChunk::Style { style, anchor_type } => { + if style.id() == style_start_id && *anchor_type == AnchorType::End { + ans = Some((style.as_ref().clone(), entity_index.saturating_sub(1))); + } + entity_index += 1; + } + } + }); + ans } diff --git a/crates/loro-internal/src/loro.rs b/crates/loro-internal/src/loro.rs index 8094c37ac..4581c1422 100644 --- a/crates/loro-internal/src/loro.rs +++ b/crates/loro-internal/src/loro.rs @@ -1581,6 +1581,7 @@ impl LoroDoc { ); if &from_frontiers == frontiers { + self.set_detached(frontiers != &self.oplog_frontiers()); return Ok(()); } diff --git a/crates/loro-internal/src/oplog.rs b/crates/loro-internal/src/oplog.rs index 3282e8de5..3cc19c5ce 100644 --- a/crates/loro-internal/src/oplog.rs +++ b/crates/loro-internal/src/oplog.rs @@ -233,7 +233,7 @@ impl OpLog { continue; } - if self.dag.is_before_shallow_root(&change.deps) { + if self.dag.import_deps_before_shallow_root(&change.deps) { ans.has_deps_before_shallow_root = true; continue; } @@ -591,13 +591,23 @@ impl OpLog { let mut merged_vv = from.clone(); merged_vv.merge(to); loro_common::debug!("to_frontiers={:?} vv={:?}", &to_frontiers, to); - let (common_ancestors, mut diff_mode) = + let (mut common_ancestors, mut diff_mode) = self.dag.find_common_ancestor(from_frontiers, to_frontiers); if diff_mode == DiffMode::Checkout && to > from { diff_mode = DiffMode::Import; } - let common_ancestors_vv = self.dag.frontiers_to_vv(&common_ancestors).unwrap(); + let mut common_ancestors_vv = self.dag.frontiers_to_vv(&common_ancestors).unwrap(); + let shallow_since_vv = self.dag.shallow_since_vv().to_vv(); + if !common_ancestors_vv.includes_vv(&shallow_since_vv) { + // The replay base cannot point before shallow history because those + // ops are no longer available to the causal iterator. + common_ancestors = self.dag.shallow_since_frontiers().clone(); + common_ancestors_vv = self + .dag + .frontiers_to_vv(&common_ancestors) + .unwrap_or(shallow_since_vv); + } // go from lca to merged_vv let diff = common_ancestors_vv.diff(&merged_vv).forward; let mut iter = self.dag.iter_causal(common_ancestors, diff); diff --git a/crates/loro-internal/src/oplog/loro_dag.rs b/crates/loro-internal/src/oplog/loro_dag.rs index aeeead3b8..12b73cb1a 100644 --- a/crates/loro-internal/src/oplog/loro_dag.rs +++ b/crates/loro-internal/src/oplog/loro_dag.rs @@ -793,6 +793,34 @@ impl AppDag { false } + pub(crate) fn import_deps_before_shallow_root(&self, deps: &Frontiers) -> bool { + if self.shallow_since_vv.is_empty() { + return false; + } + + if deps.is_empty() { + return true; + } + + let shallow_vv = VersionVector::from_im_vv(&self.shallow_since_vv); + if let Some(vv) = self.frontiers_to_vv(deps) { + return !vv.includes_vv(&shallow_vv); + } + + // Import only needs to reject updates whose causal source is older than + // the shallow root. A dependency set that touches the retained boundary + // can still be a valid post-root update, even when the rest of the deps + // are imported later in the same batch. + if deps + .iter() + .any(|id| self.shallow_since_frontiers.contains(&id)) + { + return false; + } + + deps.iter().any(|id| self.shallow_since_vv.includes_id(id)) + } + /// Travel the ancestors of the given id, and call the callback for each node /// /// It will travel the ancestors in the reverse order (from the greatest lamport to the smallest) @@ -1074,7 +1102,13 @@ impl AppDag { } else { let mut all_deps_processed = true; for id in top_node.deps.iter() { - let node = self.get(id).expect("deps should be in the dag"); + let Some(node) = self.get(id) else { + if self.shallow_since_vv.includes_id(id) { + continue; + } + + panic!("deps should be in the dag"); + }; if node.vv.get().is_none() { if all_deps_processed { stack.push(top_node.clone()); @@ -1090,7 +1124,14 @@ impl AppDag { } for id in top_node.deps.iter() { - let node = self.get(id).expect("deps should be in the dag"); + let Some(node) = self.get(id) else { + if self.shallow_since_vv.includes_id(id) { + ans_vv.extend_to_include_vv(self.shallow_since_vv.iter()); + continue; + } + + panic!("deps should be in the dag"); + }; let dep_vv = node.vv.get().unwrap(); if ans_vv.is_empty() { ans_vv = dep_vv.clone(); @@ -1341,6 +1382,23 @@ mod ensure_vv_for_tests { .into() } + fn make_shallow_dag_for_import_deps() -> AppDag { + let change_store = ChangeStore::new_mem(&SharedArena::new(), Arc::new(AtomicI64::new(0))); + let mut dag = AppDag::new(change_store); + let root_deps = Frontiers::from_id(ID::new(1, 1)); + let boundary = make_dag_node(1, 2, 1, root_deps.clone()); + + { + let mut map = dag.map.lock(); + map.insert(boundary.id_start(), boundary); + } + + dag.shallow_since_vv.insert(1, 2); + dag.shallow_since_frontiers = Frontiers::from_id(ID::new(1, 2)); + dag.shallow_root_frontiers_deps = root_deps; + dag + } + /// Regression for loro-dev/loro#929: when computing the vv for a node /// whose DAG fan-in contains a shared ancestor reached through multiple /// paths, the iterative DFS used to push the shared ancestor onto the @@ -1385,4 +1443,33 @@ mod ensure_vv_for_tests { assert_eq!(vv.get(&2).copied(), Some(1)); assert!(vv.get(&3).is_none()); } + + #[test] + fn import_deps_before_shallow_root_rejects_trimmed_history_dep() { + let dag = make_shallow_dag_for_import_deps(); + let deps = Frontiers::from_id(ID::new(1, 0)); + + assert!(dag.frontiers_to_vv(&deps).is_none()); + assert!(dag.import_deps_before_shallow_root(&deps)); + } + + #[test] + fn import_deps_before_shallow_root_allows_boundary_with_missing_peer() { + let dag = make_shallow_dag_for_import_deps(); + let mut deps = Frontiers::default(); + deps.push(ID::new(1, 2)); + deps.push(ID::new(2, 0)); + + assert!(dag.frontiers_to_vv(&deps).is_none()); + assert!(!dag.import_deps_before_shallow_root(&deps)); + } + + #[test] + fn import_deps_before_shallow_root_allows_missing_non_trimmed_dep() { + let dag = make_shallow_dag_for_import_deps(); + let deps = Frontiers::from_id(ID::new(2, 0)); + + assert!(dag.frontiers_to_vv(&deps).is_none()); + assert!(!dag.import_deps_before_shallow_root(&deps)); + } } diff --git a/crates/loro-internal/src/state.rs b/crates/loro-internal/src/state.rs index 545d414ac..3b9c69098 100644 --- a/crates/loro-internal/src/state.rs +++ b/crates/loro-internal/src/state.rs @@ -951,12 +951,16 @@ impl DocState { } pub(crate) fn reset_to_empty_for_failed_snapshot_import(&mut self) { + let was_recording = self.is_recording(); self.frontiers = Frontiers::default(); self.store = ContainerStore::new(self.arena.clone(), self.config.clone(), self.peer.clone()); self.in_txn = false; self.changed_idx_in_txn.clear(); self.event_recorder = Default::default(); + if was_recording { + self.start_recording(); + } self.dead_containers_cache = Default::default(); } diff --git a/crates/loro-internal/src/state/richtext_state.rs b/crates/loro-internal/src/state/richtext_state.rs index 32f969376..a15109d57 100644 --- a/crates/loro-internal/src/state/richtext_state.rs +++ b/crates/loro-internal/src/state/richtext_state.rs @@ -937,11 +937,6 @@ impl RichtextState { } } - #[inline] - pub(crate) fn has_styles(&mut self) -> bool { - self.state.get_mut().has_styles() - } - pub(crate) fn has_style_key_in_entity_range( &mut self, range: Range, diff --git a/crates/loro-internal/tests/undo.rs b/crates/loro-internal/tests/undo.rs index 23e2103b9..c1ad818f2 100644 --- a/crates/loro-internal/tests/undo.rs +++ b/crates/loro-internal/tests/undo.rs @@ -175,7 +175,8 @@ fn test_clear_redo() { // Make some edits text.update("hello", UpdateOptions::default()).unwrap(); doc.commit_then_renew(); - text.update("hello world", UpdateOptions::default()).unwrap(); + text.update("hello world", UpdateOptions::default()) + .unwrap(); doc.commit_then_renew(); // Undo to create redo stack @@ -187,7 +188,10 @@ fn test_clear_redo() { // Clear only redo stack undo_manager.clear_redo(); assert!(!undo_manager.can_redo(), "redo stack should be empty"); - assert!(undo_manager.can_undo(), "undo stack should still have items"); + assert!( + undo_manager.can_undo(), + "undo stack should still have items" + ); // Verify undo still works undo_manager.undo().unwrap(); @@ -203,7 +207,8 @@ fn test_clear_undo() { // Make some edits text.update("hello", UpdateOptions::default()).unwrap(); doc.commit_then_renew(); - text.update("hello world", UpdateOptions::default()).unwrap(); + text.update("hello world", UpdateOptions::default()) + .unwrap(); doc.commit_then_renew(); // Undo to create redo stack @@ -214,7 +219,10 @@ fn test_clear_undo() { // Clear only undo stack undo_manager.clear_undo(); - assert!(undo_manager.can_redo(), "redo stack should still have items"); + assert!( + undo_manager.can_redo(), + "redo stack should still have items" + ); assert!(!undo_manager.can_undo(), "undo stack should be empty"); // Verify redo still works diff --git a/crates/loro/tests/integration_test/shallow_snapshot_test.rs b/crates/loro/tests/integration_test/shallow_snapshot_test.rs index 14cc55079..c2188cf6d 100644 --- a/crates/loro/tests/integration_test/shallow_snapshot_test.rs +++ b/crates/loro/tests/integration_test/shallow_snapshot_test.rs @@ -416,3 +416,65 @@ fn test_export_shallow_snapshot_from_shallow_doc() -> anyhow::Result<()> { Ok(()) } + +/// Regression for a branch-specific import bug on `feat/diff-text-lca-review`. +/// +/// Setup: 3 peers each commit a few ops, then sync. Peer 1 then commits enough +/// post-mid_f ops that `shallow_snapshot(&mid_f)` actually trims peer 1's +/// pre-mid_f history (`shallow_since_vv = {1: 2}`, +/// `shallow_since_frontiers = [2@1]`). A second peer commits one cross-peer op +/// whose deps equal mid_f (`[2@1, 2@2, 2@3]`). +/// +/// The import preflight should not reuse checkout's conservative +/// `is_before_shallow_root` semantics here: deps that touch the boundary +/// together with valid same-or-other-peer post-shallow ids should be +/// importable. +#[test] +fn shallow_doc_accepts_cross_peer_op_whose_deps_include_boundary() -> anyhow::Result<()> { + let p1 = LoroDoc::new(); + p1.set_peer_id(1)?; + let p2 = LoroDoc::new(); + p2.set_peer_id(2)?; + let p3 = LoroDoc::new(); + p3.set_peer_id(3)?; + + for _ in 0..3 { + p1.get_text("t").insert(0, "1")?; + p1.commit(); + p2.get_text("t").insert(0, "2")?; + p2.commit(); + p3.get_text("t").insert(0, "3")?; + p3.commit(); + } + let docs = [&p1, &p2, &p3]; + for i in 0..3 { + for j in 0..3 { + if i != j { + docs[j].import(&docs[i].export(ExportMode::all_updates())?)?; + } + } + } + + let mid_f = p1.oplog_frontiers(); + for k in 0..5 { + p1.get_text("t").insert(0, &format!("{k}"))?; + p1.commit(); + } + + let snap = p1.export(ExportMode::shallow_snapshot(&mid_f))?; + let s = LoroDoc::new(); + s.import(&snap)?; + assert!( + !s.shallow_since_vv().is_empty(), + "test setup must produce a real shallow trim" + ); + + p2.get_text("t").insert(0, "Y")?; + p2.commit(); + + let p2_updates = p2.export(ExportMode::all_updates())?; + s.import(&p2_updates) + .expect("shallow doc should accept cross-peer op whose deps include the shallow boundary"); + + Ok(()) +}