diff --git a/Cargo.lock b/Cargo.lock index ffdb7c81598..3cc2660068c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -607,6 +607,12 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bimap" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7" + [[package]] name = "bincode" version = "1.3.3" @@ -781,6 +787,7 @@ dependencies = [ "ansi-to-tui", "anyhow", "arboard", + "bimap", "boolean-enums", "bstr", "but-action", diff --git a/Cargo.toml b/Cargo.toml index fb72c976c32..0d29597218b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -300,6 +300,7 @@ self_cell = "1.2.2" unicode-segmentation = "1.12.0" urlencoding = "2.1" inventory = "0.3" +bimap = "=0.6.3" [workspace.lints.clippy] all = { level = "deny", priority = -1 } diff --git a/crates/but/Cargo.toml b/crates/but/Cargo.toml index cfcd2bee911..f3542a5bca4 100644 --- a/crates/but/Cargo.toml +++ b/crates/but/Cargo.toml @@ -112,7 +112,7 @@ regex.workspace = true anyhow.workspace = true rmcp.workspace = true command-group = { version = "5.0.1", features = ["with-tokio"] } -gix = { workspace = true, features = ["tracing", "tracing-detail"] } +gix = { workspace = true, features = ["tracing", "tracing-detail", "revision"] } colored = "3.0.0" serde_json.workspace = true boolean-enums.workspace = true @@ -153,6 +153,7 @@ syntect = { version = "5.3.0", default-features = false, features = [ "html", ] } tracing-appender = "0.2.4" +bimap.workspace = true [dev-dependencies] but-graph.workspace = true diff --git a/crates/but/src/args/mod.rs b/crates/but/src/args/mod.rs index b876b8f6f26..13e1f5dc4cf 100644 --- a/crates/but/src/args/mod.rs +++ b/crates/but/src/args/mod.rs @@ -901,6 +901,12 @@ pub enum Subcommands { Merge { /// Branch ID or name to merge branch: String, + #[clap(long = "remote")] + remote: Option, + #[clap(long = "local")] + local: Option, + #[clap(long = "graph")] + graph: Option, }, /// Move a commit or branch to a different location. diff --git a/crates/but/src/command/legacy/merge.rs b/crates/but/src/command/legacy/merge.rs index ed964121760..f3a837ce540 100644 --- a/crates/but/src/command/legacy/merge.rs +++ b/crates/but/src/command/legacy/merge.rs @@ -1,22 +1,311 @@ +use std::cell::RefCell; +use std::collections::{BTreeSet, HashMap, HashSet}; use std::fmt::Write; +use std::rc::Rc; -use anyhow::bail; +use anyhow::{Context as _, bail}; +use bimap::BiMap; +use bstr::BString; use but_ctx::Context; use colored::Colorize; +use itertools::Itertools as _; use crate::{ CliId, IdMap, utils::{OutputChannel, shorten_object_id}, }; +fn do_evo( + ctx: &mut Context, + _guard: &but_core::sync::RepoShared, + remote: String, + local: String, + graph: String, +) -> anyhow::Result { + let repo = &*ctx.repo.get()?; + + type RevCommit = + gix::revision::plumbing::graph::Commit; + let mut gix_graph: gix::revision::plumbing::Graph<'_, '_, RevCommit> = + gix::revision::plumbing::Graph::new(&repo.objects, None); + let remote_commit_id = repo.rev_parse_single(&*remote)?.detach(); + let local_commit_id = repo.rev_parse_single(&*local)?.detach(); + let merge_base = + gix::revision::plumbing::merge_base(remote_commit_id, &[local_commit_id], &mut gix_graph)? + .context("missing merge base")? + .first() + .to_owned(); + + /// Push the ancestors of `commit_id` up to, but not including, `merge_base` + /// in reverse topological order. + fn push_parents_then_self( + gix_graph: &gix::revision::plumbing::Graph<'_, '_, RevCommit>, + commit_id: &gix::ObjectId, + merge_base: &gix::ObjectId, + reverse_topology: &mut Vec, + ) -> anyhow::Result<()> { + if commit_id == merge_base { + return Ok(()); + } + if reverse_topology.contains(commit_id) { + return Ok(()); + } + let commit = gix_graph.get(commit_id).context("missing")?; + for parent_id in &commit.parents { + push_parents_then_self(gix_graph, parent_id, merge_base, reverse_topology)?; + } + reverse_topology.push(commit_id.to_owned()); + Ok(()) + } + let mut remote_reverse_topology: Vec = Vec::new(); + push_parents_then_self( + &gix_graph, + &remote_commit_id, + &merge_base, + &mut remote_reverse_topology, + )?; + let mut local_reverse_topology: Vec = Vec::new(); + push_parents_then_self( + &gix_graph, + &local_commit_id, + &merge_base, + &mut local_reverse_topology, + )?; + + // Assumes that family is family, no matter how distantly related (thus, this union-find structure is sufficient). + // We'll need to switch to something that can distinguish close family from distant family. + // TODO link to a doc describing this + #[derive(Debug, Default)] + struct Family<'repo> { + chars: BTreeSet, + /// In reverse topological order. + remote_commits: Vec>, + /// In reverse topological order. + local_commits: Vec>, + } + type FamilyCell<'repo> = Rc>>; + let mut char_to_family = HashMap::::new(); + for chars in graph.as_bytes().chunks(2) { + let [char1, char2] = chars else { + anyhow::bail!("graph must have even chars"); + }; + match (char_to_family.get(char1), char_to_family.get(char2)) { + (None, None) => { + let mut ref_cell = RefCell::::default(); + ref_cell.get_mut().chars.insert(*char1); + ref_cell.get_mut().chars.insert(*char2); + let family_cell = Rc::new(ref_cell); + char_to_family.insert(*char1, family_cell.clone()); + char_to_family.insert(*char2, family_cell); + } + (None, Some(family_cell)) => { + family_cell.borrow_mut().chars.insert(*char1); + char_to_family.insert(*char1, family_cell.clone()); + } + (Some(family_cell), None) => { + family_cell.borrow_mut().chars.insert(*char2); + char_to_family.insert(*char2, family_cell.clone()); + } + (Some(family_cell1), Some(family_cell2)) => { + family_cell1 + .borrow_mut() + .chars + .extend(family_cell2.borrow_mut().chars.iter()); + char_to_family.insert(*char2, family_cell1.clone()); + } + } + } + let mut remote_commit_id_to_family = HashMap::::new(); + for commit_id in remote_reverse_topology.iter() { + let commit = repo.find_commit(*commit_id)?; + let message = commit.message_raw()?; + if message.get(0) == message.get(1) + && let Some(char) = message.get(0) + && let Some(family) = char_to_family.get(char) + { + family.borrow_mut().remote_commits.push(commit); + remote_commit_id_to_family.insert(*commit_id, family.clone()); + } + } + for commit_id in local_reverse_topology.iter() { + let commit = repo.find_commit(*commit_id)?; + let message = commit.message_raw()?; + if message.get(0) == message.get(1) + && let Some(char) = message.get(0) + && let Some(family) = char_to_family.get(char) + { + family.borrow_mut().local_commits.push(commit); + } + } + + fn write_parents_then_self( + repo: &gix::Repository, + gix_graph: &gix::revision::plumbing::Graph<'_, '_, RevCommit>, + remote_commit_id_to_family: &HashMap, + remote_commit_id: &gix::ObjectId, + merge_base: &gix::ObjectId, + remote_to_final_commit_id: &mut BiMap, + ) -> anyhow::Result { + if remote_commit_id == merge_base { + return Ok(*remote_commit_id); + } + if let Some(final_commit_id) = remote_to_final_commit_id.get_by_left(remote_commit_id) { + return Ok(*final_commit_id); + } + let remote_commit = repo.find_commit(*remote_commit_id)?; + let mut new_parent_ids = Vec::::new(); + for parent_id in remote_commit.parent_ids() { + new_parent_ids.push(write_parents_then_self( + repo, + gix_graph, + remote_commit_id_to_family, + &parent_id.detach(), + merge_base, + remote_to_final_commit_id, + )?); + } + let message = if let Some(family) = remote_commit_id_to_family.get(remote_commit_id) { + let local_summary = family + .borrow() + .local_commits + .iter() + .map(|commit| commit.message().expect("message should be present").title) + .join(","); + BString::from(format!( + "merge remote {} + local {}", + remote_commit.message()?.title, + local_summary + )) + } else { + BString::from(remote_commit.message_raw()?) + }; + let new_commit = gix::objs::Commit { + tree: repo.empty_tree().id, + parents: new_parent_ids.into(), + author: remote_commit.author()?.to_owned()?, + committer: remote_commit.committer()?.to_owned()?, + encoding: None, + message, + extra_headers: Vec::new(), + }; + let final_commit_id = repo.write_object(new_commit)?.detach(); + remote_to_final_commit_id.insert(*remote_commit_id, final_commit_id); + Ok(final_commit_id) + } + + let mut remote_to_final_commit_id = BiMap::::new(); + remote_to_final_commit_id.insert(merge_base, merge_base); + let mut local_to_final_commit_id = HashMap::::new(); + local_to_final_commit_id.insert(merge_base, merge_base); + + for commit_id in local_reverse_topology.iter() { + let local_commit = repo.find_commit(*commit_id)?; + let message = local_commit.message_raw()?; + // Compare the first two bytes. Clippy doesn't like get(0), hence the first(). + if message.first() == message.get(1) + && let Some(char) = message.first() + && let Some(family) = char_to_family.get(char) + { + let mut borrowed_family = family.borrow_mut(); + let remote_commits = std::mem::take(&mut borrowed_family.remote_commits); + std::mem::drop(borrowed_family); + + let mut final_commit_id: Option = None; + + for remote_commit in remote_commits { + final_commit_id = Some(write_parents_then_self( + repo, + &gix_graph, + &remote_commit_id_to_family, + &remote_commit.id, + &merge_base, + &mut remote_to_final_commit_id, + )?); + } + + if let Some(final_commit_id) = final_commit_id { + local_to_final_commit_id.insert(*commit_id, final_commit_id); + } else { + // This commit's family already had its remote commits written + // when another local commit (from the same family) was + // encountered. Reuse the information from this commit's first + // parent. + let parent_id = local_commit + .parent_ids() + .next() + .context("BUG: this descends from merge base; it should not be an orphan")? + .detach(); + local_to_final_commit_id.insert( + *commit_id, + *local_to_final_commit_id + .get(&parent_id) + .context("BUG: parent is either merge base or should have been iterated")?, + ); + } + } else { + let mut new_parent_ids = Vec::::new(); + let mut seen_new_parent_ids = HashSet::new(); + for parent_id in local_commit.parent_ids() { + let new_parent_id = *local_to_final_commit_id + .get(&parent_id.detach()) + .context("BUG: parent is either merge base or should have been iterated")?; + if seen_new_parent_ids.insert(new_parent_id) { + new_parent_ids.push(new_parent_id); + } + } + let new_commit = gix::objs::Commit { + tree: repo.empty_tree().id, + parents: new_parent_ids.into(), + author: local_commit.author()?.to_owned()?, + committer: local_commit.committer()?.to_owned()?, + encoding: None, + message: local_commit.message_raw()?.to_owned(), + extra_headers: Vec::new(), + }; + let final_commit_id = repo.write_object(new_commit)?.detach(); + local_to_final_commit_id.insert(*commit_id, final_commit_id); + + // Any remote commits that would be children of any commit in `new_parent_ids` + // should now be children of `final_commit_id` instead. + for new_parent_id in seen_new_parent_ids { + if let Some(remote_commit_id) = + remote_to_final_commit_id.get_by_right(&new_parent_id) + { + remote_to_final_commit_id.insert(*remote_commit_id, final_commit_id); + } + } + } + } + + Ok(write_parents_then_self( + repo, + &gix_graph, + &remote_commit_id_to_family, + &remote_commit_id, + &merge_base, + &mut remote_to_final_commit_id, + )?) +} + pub async fn handle( ctx: &mut Context, out: &mut OutputChannel, branch_id: &str, + remote: Option, + local: Option, + graph: Option, ) -> anyhow::Result<()> { - let mut progress = out.progress_channel(); let guard = ctx.exclusive_worktree_access(); + if let (Some(remote), Some(local), Some(graph)) = (remote, local, graph) { + println!( + "{}", + do_evo(ctx, guard.read_permission(), remote, local, graph)?.to_hex() + ); + return Ok(()); + } + + let mut progress = out.progress_channel(); let id_map = IdMap::new_from_context(ctx, None, guard.read_permission())?; // Resolve the branch ID diff --git a/crates/but/src/lib.rs b/crates/but/src/lib.rs index bd83505004f..09139e38d69 100644 --- a/crates/but/src/lib.rs +++ b/crates/but/src/lib.rs @@ -1376,9 +1376,14 @@ async fn match_subcommand( result.show_root_cause_error_then_exit_without_destructors(output) } #[cfg(feature = "legacy")] - Subcommands::Merge { branch } => { + Subcommands::Merge { + branch, + remote, + local, + graph, + } => { let mut ctx = setup::init_ctx(&args, InitCtxOptions::default(), out)?; - command::legacy::merge::handle(&mut ctx, out, &branch) + command::legacy::merge::handle(&mut ctx, out, &branch, remote, local, graph) .await .context("Failed to merge branch.") .emit_metrics(metrics_ctx) diff --git a/crates/but/tests/but/command/merge.rs b/crates/but/tests/but/command/merge.rs index 3a39ad6e567..650f55d123d 100644 --- a/crates/but/tests/but/command/merge.rs +++ b/crates/but/tests/but/command/merge.rs @@ -1,7 +1,281 @@ +use bstr::{BString, ByteSlice as _}; +use gix::{actor::Signature, date::Time}; use snapbox::str; use crate::utils::{CommandExt, Sandbox}; +#[test] +fn evo_merge_simple() -> anyhow::Result<()> { + // Simple case: swap order of 2 commits + + let env = Sandbox::open_with_default_settings("merge-gb-local-two-branches")?; + env.but("setup").assert().success(); + + env.but("branch new remote-branch").assert().success(); + env.file("I.txt", ""); + env.but("commit remote-branch -m II").assert().success(); + env.file("J.txt", ""); + env.but("commit remote-branch -m JJ").assert().success(); + + env.but("branch new local-branch").assert().success(); + env.file("Q.txt", ""); + env.but("commit local-branch -m QQ").assert().success(); + env.file("P.txt", ""); + env.but("commit local-branch -m PP").assert().success(); + + insta::assert_snapshot!(env.git_log()?, @r" + * f4d6458 (HEAD -> gitbutler/workspace) GitButler Workspace Commit + |\ + | * 096070c (local-branch) PP + | * 64848c3 QQ + * | 35c5451 (remote-branch) JJ + * | 324d9c3 II + |/ + * 85efbe4 (gb-local/main, gb-local/HEAD, main, gitbutler/target) M + "); + + // The graph is interpreted as chunks of 2 characters, parent->child + // (e.g. AI means AA evolved into II; AA is a commit from some time before + // and might have already been garbage collected). + let output = env + .but("merge --graph=AIAPBJBQ --local=local-branch --remote=remote-branch ''") + .assert() + .success() + .stdout_eq(str![[r#" +910726e00b12d9aba380fa3c347232a28c354196 + +"#]]) + .get_output() + .stdout + .clone(); + let output = env.invoke_git(&format!("log --oneline --graph {}", output.as_bstr())); + insta::assert_snapshot!(output, @" + * 910726e merge remote JJ + local QQ + * 073a8f4 merge remote II + local PP + * 85efbe4 M + "); + + Ok(()) +} + +fn commit( + repo: &gix::Repository, + message: &str, + parent_ids: [gix::ObjectId; N], +) -> anyhow::Result { + let signature = Signature { + name: BString::from("Someone"), + email: BString::from("someone@example.com"), + time: Time { + seconds: 1675176957, + offset: 0, + }, + }; + let commit = gix::objs::Commit { + tree: repo.empty_tree().id, + parents: parent_ids.to_vec().into(), + author: signature.clone(), + committer: signature, + encoding: None, + message: BString::from(message), + extra_headers: Vec::new(), + }; + Ok(repo.write_object(commit)?.detach()) +} + +#[test] +fn evo_merge_complex() -> anyhow::Result<()> { + // Complex case: non-linear on remote. One commit (A, not shown) was independently split + // by both remote (into II, JJ) and local (into PP, QQ). Remote has one novel + // commit (KK). + + let env = Sandbox::open_with_default_settings("merge-gb-local-two-branches")?; + env.but("setup").assert().success(); + + let repo = env.open_repo()?; + let base = repo.rev_parse_single("main")?.detach(); + + let ii = commit(&repo, "II", [base])?; + let jj = commit(&repo, "JJ", [ii])?; + let kk = commit(&repo, "KK", [ii])?; + let ll = commit(&repo, "LL", [jj, kk])?; + let output = env.invoke_git(&format!("log --oneline --graph {}", ll.to_hex())); + insta::assert_snapshot!(output, @r" + * 5895ebf LL + |\ + | * 9375d62 KK + * | dda220e JJ + |/ + * 1ad0fa8 II + * 85efbe4 M + "); + + let ss = commit(&repo, "SS", [base])?; + let pp = commit(&repo, "PP", [ss])?; + let qq = commit(&repo, "QQ", [pp])?; + let output = env.invoke_git(&format!("log --oneline --graph {}", qq.to_hex())); + insta::assert_snapshot!(output, @" + * 56bd561 QQ + * de4c4e7 PP + * a4ae5b7 SS + * 85efbe4 M + "); + + // II, JJ, PP, QQ have common progenitor (AA, not shown in commit graph). + // LL, SS have common progenitor (DD, not shown in commit graph). + let output = env + .but(&format!( + "merge --graph=AIAJAPAQDLDS --local={} --remote={} ''", + qq.to_hex(), + ll.to_hex() + )) + .assert() + .success() + .stdout_eq(str![[r#" +ef65b54a5409b216930cc1b3932f2ba3f16cb414 + +"#]]) + .get_output() + .stdout + .clone(); + let output = env.invoke_git(&format!("log --oneline --graph {}", output.as_bstr())); + insta::assert_snapshot!(output, @r" + * ef65b54 merge remote LL + local SS + |\ + | * 2ccec8f KK + * | d138883 merge remote JJ + local PP,QQ + |/ + * ad95fdc merge remote II + local PP,QQ + * 85efbe4 M + "); + + Ok(()) +} + +#[test] +fn evo_merge_disappearing_local_merge_rebases_children_onto_first_parent() -> anyhow::Result<()> { + let env = Sandbox::open_with_default_settings("merge-gb-local-two-branches")?; + env.but("setup").assert().success(); + + let repo = env.open_repo()?; + let base = repo.rev_parse_single("main")?.detach(); + + let ii = commit(&repo, "II", [base])?; + let jj = commit(&repo, "JJ", [ii])?; + let kk = commit(&repo, "KK", [ii])?; + let ll = commit(&repo, "LL", [jj, kk])?; + let output = env.invoke_git(&format!("log --oneline --graph {}", ll.to_hex())); + insta::assert_snapshot!(output, @r" + * 5895ebf LL + |\ + | * 9375d62 KK + * | dda220e JJ + |/ + * 1ad0fa8 II + * 85efbe4 M + "); + + let pp = commit(&repo, "PP", [base])?; + let qq = commit(&repo, "QQ", [pp])?; + let rr = commit(&repo, "RR", [pp])?; + let tt = commit(&repo, "TT", [qq, rr])?; + let uu = commit(&repo, "UU", [tt])?; + let ss = commit(&repo, "SS", [uu])?; + let output = env.invoke_git(&format!("log --oneline --graph {}", ss.to_hex())); + insta::assert_snapshot!(output, @r" + * 05969ac SS + * 9a1f997 UU + * 57193fc TT + |\ + | * feb2b3c RR + * | a0a3f7f QQ + |/ + * 115a360 PP + * 85efbe4 M + "); + + let output = env + .but(&format!( + "merge --graph=AIAPBJBQBTCKCRDLDS --local={} --remote={} ''", + ss.to_hex(), + ll.to_hex() + )) + .assert() + .success() + .stdout_eq(str![[r#" +850c8312a80985e390d77f31bb7d582e7cd46bd5 + +"#]]) + .get_output() + .stdout + .clone(); + let output = env.invoke_git(&format!("log --oneline --graph {}", output.as_bstr())); + insta::assert_snapshot!(output, @r" + * 850c831 merge remote LL + local SS + |\ + | * b6b5916 merge remote KK + local RR + * | b63528d UU + * | 92a2454 merge remote JJ + local QQ,TT + |/ + * 9e12b54 merge remote II + local PP + * 85efbe4 M + "); + + Ok(()) +} + +#[test] +fn evo_merge_local_tips_before_remote_tips() -> anyhow::Result<()> { + let env = Sandbox::open_with_default_settings("merge-gb-local-two-branches")?; + env.but("setup").assert().success(); + + let repo = env.open_repo()?; + let base = repo.rev_parse_single("main")?.detach(); + + let ii = commit(&repo, "II", [base])?; + let jj = commit(&repo, "JJ", [ii])?; + let output = env.invoke_git(&format!("log --oneline --graph {}", jj.to_hex())); + insta::assert_snapshot!(output, @" + * dda220e JJ + * 1ad0fa8 II + * 85efbe4 M + "); + + let pp = commit(&repo, "PP", [base])?; + let rr = commit(&repo, "RR", [pp])?; + let output = env.invoke_git(&format!("log --oneline --graph {}", rr.to_hex())); + insta::assert_snapshot!(output, @" + * feb2b3c RR + * 115a360 PP + * 85efbe4 M + "); + + let output = env + .but(&format!( + "merge --graph=AIAP --local={} --remote={} ''", + rr.to_hex(), + jj.to_hex() + )) + .assert() + .success() + .stdout_eq(str![[r#" +363345fc7def8ac0bc83bfd01a63fd5169ed7189 + +"#]]) + .get_output() + .stdout + .clone(); + let output = env.invoke_git(&format!("log --oneline --graph {}", output.as_bstr())); + insta::assert_snapshot!(output, @" + * 363345f JJ + * 7e2498e RR + * 9e12b54 merge remote II + local PP + * 85efbe4 M + "); + + Ok(()) +} + #[test] fn merge_first_branch_into_gb_local_and_verify_rebase() -> anyhow::Result<()> { let env = Sandbox::open_with_default_settings("merge-gb-local-two-branches")?;