Describe algorithm for graph layout (#112)

peso · web-flow · commit 8e43436a8751 · 2025-09-17T10:34:17.000+02:00
This is a start on documentation of the main algorithm (issue #6).

I have been pushing around with the code for some time and I seem to
discover more nuances all the time. I think the full algorithm
documentation will take a significant amount of time, and therefore it
would be more sensible to write it gradually.

The goal of this PR is to describe all the main steps of the algorithm
in broad terms.
diff --git a/docs/branch_assignment.md b/docs/branch_assignment.md
@@ -0,0 +1,56 @@
+
+# Overview
+
+To generate a graph, [GitGraph::new()] will read the repository
+and assign every commit to a single branch.
+
+It takes the following steps to generate the graph
+
+- Identify branches
+- Sort branches by persistence
+- Trace branches to commits
+- Filtering and indexing
+
+## Identify branches
+Local and remote git-branches and tags are used as candidates for branches.
+A branch can be identified by a merge commit, even though no current git-branch
+refers to it.
+
+## Sort branches by persistence
+Each branch is assigned a persistence which can be configured by settings.
+Think of persistence as z-order where lower values take preceedence.
+**TODO** Merge branch
+
+## Trace branches to commits
+The branches now get to pick their commits, in order of persistence. Each
+branch starts with a head, and follow the primary parent while it is
+available. It stops when the parent is a commit already assigned to a branch.
+**TODO** Duplicate branch names
+**TODO** Handle visual artifacts on merge
+
+## Filtering and indexing
+Commits that have not been assigned a branch is filtered out.
+An *index_map* is created to map from original commit index, to filtered
+commit index.
+**TODO** what? why? Would it not be better to track from child/heads instead of every single commit in repo?
+
+
+
+
+# Branch sorting
+The goal of this algorithm is to assign a column number to each tracked branch so that they can be visualized linearly without overlapping in the graph. It uses a shortest-first scheduling strategy (optionally longest-first and with forward/backward start sorting).
+
+## Initialization
+- occupied: A vector of vectors of vectors of tuples. 
+The outer vector is indexed by the branch's order_group (determined by branch_order based on the settings.branches.order). 
+Each inner vector represents a column within that order group, 
+and the tuples (start, end) store the range of commits occupied by a branch in that column. 
+
+## Preparing Branches for Sorting
+- It creates branches_sort, a vector of tuples containing the branch index, its start commit index (range.0), its end commit index (range.1), its source order group, and its target order group. 
+- It filters out branches that don't have a defined range (meaning they weren't associated with any commits). 
+## Sorting Branches
+- The branches_sort vector is sorted based on a key that prioritizes: 
+    1. The maximum of the source and target order groups. This likely aims to keep related branches (e.g., those involved in merges) closer together. 
+    2. The length of the branch's lifespan (end - start commit index), either shortest-first or longest-first based on the shortest_first setting. 
+    3. The starting commit index, either forward or backward based on the forward setting. 
diff --git a/src/graph.rs b/src/graph.rs
@@ -43,11 +43,13 @@ pub struct GitGraph {
 }
 
 impl GitGraph {
+    /// Generate a branch graph for a repository
     pub fn new(
         mut repository: Repository,
         settings: &Settings,
         max_count: Option<usize>,
     ) -> Result<Self, String> {
+        #![doc = include_str!("../docs/branch_assignment.md")]
         let mut stashes = HashSet::new();
         repository
             .stash_foreach(|_, _, oid| {
@@ -72,6 +74,8 @@ impl GitGraph {
 
         let head = HeadInfo::new(&repository.head().map_err(|err| err.message().to_string())?)?;
 
+        // commits will hold the CommitInfo for all commits covered
+        // indices maps git object id to an index into commits.
         let mut commits = Vec::new();
         let mut indices = HashMap::new();
         let mut idx = 0;
@@ -112,22 +116,26 @@ impl GitGraph {
             forward,
         );
 
+        // Remove commits not on a branch. This will give all commits a new index.
         let filtered_commits: Vec<CommitInfo> = commits
             .into_iter()
             .filter(|info| info.branch_trace.is_some())
             .collect();
 
+        // Create indices from git object id into the filtered commits
         let filtered_indices: HashMap<Oid, usize> = filtered_commits
             .iter()
             .enumerate()
             .map(|(idx, info)| (info.oid, idx))
             .collect();
 
+        // Map from old index to new index. None, if old index was removed
         let index_map: HashMap<usize, Option<&usize>> = indices
             .iter()
             .map(|(oid, index)| (*index, filtered_indices.get(oid)))
             .collect();
 
+        // Update branch.range from old to new index. Shrink if endpoints were removed.
         for branch in all_branches.iter_mut() {
             if let Some(mut start_idx) = branch.range.0 {
                 let mut idx0 = index_map[&start_idx];
diff --git a/src/print/unicode.rs b/src/print/unicode.rs
@@ -83,6 +83,8 @@ pub fn print_unicode(graph: &GitGraph, settings: &Settings) -> Result<UnicodeGra
         None
     };
 
+    // Compute commit text into text_lines and add blank rows
+    // if needed to match branch graph inserts.
     let mut index_map = vec![];
     let mut text_lines = vec![];
     let mut offset = 0;
@@ -133,6 +135,7 @@ pub fn print_unicode(graph: &GitGraph, settings: &Settings) -> Result<UnicodeGra
         [SPACE, WHITE, settings.branches.persistence.len() as u8 + 2],
     );
 
+    // Compute branch lines in grid
     for (idx, info) in graph.commits.iter().enumerate() {
         if let Some(trace) = info.branch_trace {
             let branch = &graph.all_branches[trace];
@@ -418,11 +421,38 @@ fn hline(
     }
 }
 
-/// Calculates required additional rows
+/// Calculates required additional rows to visually connect commits that
+/// are not direct descendants in the main commit list. These "inserts"
+//  represent the horizontal lines in the graph.
+///
+/// # Arguments
+///
+/// * `graph`: A reference to the `GitGraph` structure containing the
+//             commit and branch information.
+/// * `compact`: A boolean indicating whether to use a compact layout,
+//               potentially merging some insertions with commits.
+///
+/// # Returns
+///
+/// A `HashMap` where the keys are the indices of commits in the
+/// `graph.commits` vector, and the values are vectors of vectors
+/// of `Occ`. Each inner vector represents a potential row of
+/// insertions needed *before* the commit at the key index. The
+/// `Occ` enum describes what occupies a cell in that row
+/// (either a commit or a range representing a connection).
+///
 fn get_inserts(graph: &GitGraph, compact: bool) -> HashMap<usize, Vec<Vec<Occ>>> {
+    // Initialize an empty HashMap to store the required insertions. The key is the commit
+    // index, and the value is a vector of rows, where each row is a vector of Occupations (`Occ`).
     let mut inserts: HashMap<usize, Vec<Vec<Occ>>> = HashMap::new();
 
+    // First, for each commit, we initialize an entry in the `inserts`
+    // map with a single row containing the commit itself. This ensures
+    // that every commit has a position in the grid.
     for (idx, info) in graph.commits.iter().enumerate() {
+        // Get the visual column assigned to the branch of this commit. Unwrap is safe here
+        // because `branch_trace` should always point to a valid branch with an assigned column
+        // for commits that are included in the filtered graph.
         let column = graph.all_branches[info.branch_trace.unwrap()]
             .visual
             .column
@@ -431,30 +461,56 @@ fn get_inserts(graph: &GitGraph, compact: bool) -> HashMap<usize, Vec<Vec<Occ>>>
         inserts.insert(idx, vec![vec![Occ::Commit(idx, column)]]);
     }
 
+    // Now, iterate through the commits again to identify connections
+    // needed between parents that are not directly adjacent in the
+    // `graph.commits` list.
     for (idx, info) in graph.commits.iter().enumerate() {
+        // If the commit has a branch trace (meaning it belongs to a visualized branch).
         if let Some(trace) = info.branch_trace {
+            // Get the `BranchInfo` for the current commit's branch.
             let branch = &graph.all_branches[trace];
+            // Get the visual column of the current commit's branch. Unwrap is safe as explained above.
             let column = branch.visual.column.unwrap();
 
+            // Iterate through the two possible parents of the current commit.
             for p in 0..2 {
+                // If the commit has a parent at this index (0 for the first parent, 1 for the second).
                 if let Some(par_oid) = info.parents[p] {
+                    // Try to find the index of the parent commit in the `graph.commits` vector.
                     if let Some(par_idx) = graph.indices.get(&par_oid) {
                         let par_info = &graph.commits[*par_idx];
                         let par_branch = &graph.all_branches[par_info.branch_trace.unwrap()];
                         let par_column = par_branch.visual.column.unwrap();
+                        // Determine the sorted range of columns between the current commit and its parent.
                         let column_range = sorted(column, par_column);
 
+                        // If the column of the current commit is different from the column of its parent,
+                        // it means we need to draw a horizontal line (an "insert") to connect them.
                         if column != par_column {
+                            // Find the index in the `graph.commits` list where the visual connection
+                            // should deviate from the parent's line. This helps in drawing the graph
+                            // correctly when branches diverge or merge.
                             let split_index = super::get_deviate_index(graph, idx, *par_idx);
+                            // Access the entry in the `inserts` map for the `split_index`.
                             match inserts.entry(split_index) {
+                                // If there's already an entry at this `split_index` (meaning other
+                                // insertions might be needed before this commit).
                                 Occupied(mut entry) => {
+                                    // Find the first available row in the existing vector of rows
+                                    // where the new range doesn't overlap with existing occupations.
                                     let mut insert_at = entry.get().len();
                                     for (insert_idx, sub_entry) in entry.get().iter().enumerate() {
                                         let mut occ = false;
+                                        // Check for overlaps with existing `Occ` in the current row.
                                         for other_range in sub_entry {
+                                            // Check if the current column range overlaps with the other range.
                                             if other_range.overlaps(&column_range) {
                                                 match other_range {
+                                                    // If the other occupation is a commit.
                                                     Occ::Commit(target_index, _) => {
+                                                        // In compact mode, we might allow overlap with the commit itself
+                                                        // for merge commits (specifically the second parent) to keep the
+                                                        // graph tighter.
                                                         if !compact
                                                             || !info.is_merge
                                                             || idx != *target_index
@@ -464,7 +520,9 @@ fn get_inserts(graph: &GitGraph, compact: bool) -> HashMap<usize, Vec<Vec<Occ>>>
                                                             break;
                                                         }
                                                     }
+                                                    // If the other occupation is a range (another connection).
                                                     Occ::Range(o_idx, o_par_idx, _, _) => {
+                                                        // Avoid overlap with connections between the same commits.
                                                         if idx != *o_idx && par_idx != o_par_idx {
                                                             occ = true;
                                                             break;
@@ -473,12 +531,15 @@ fn get_inserts(graph: &GitGraph, compact: bool) -> HashMap<usize, Vec<Vec<Occ>>>
                                                 }
                                             }
                                         }
+                                        // If no overlap is found in this row, we can insert here.
                                         if !occ {
                                             insert_at = insert_idx;
                                             break;
                                         }
                                     }
+                                    // Get a mutable reference to the vector of rows for this `split_index`.
                                     let vec = entry.get_mut();
+                                    // If no suitable row was found, add a new row.
                                     if insert_at == vec.len() {
                                         vec.push(vec![Occ::Range(
                                             idx,
@@ -487,6 +548,7 @@ fn get_inserts(graph: &GitGraph, compact: bool) -> HashMap<usize, Vec<Vec<Occ>>>
                                             column_range.1,
                                         )]);
                                     } else {
+                                        // Otherwise, insert the new range into the found row.
                                         vec[insert_at].push(Occ::Range(
                                             idx,
                                             *par_idx,
@@ -495,7 +557,9 @@ fn get_inserts(graph: &GitGraph, compact: bool) -> HashMap<usize, Vec<Vec<Occ>>>
                                         ));
                                     }
                                 }
+                                // If there's no entry at this `split_index` yet.
                                 Vacant(entry) => {
+                                    // Create a new entry with a single row containing the range.
                                     entry.insert(vec![vec![Occ::Range(
                                         idx,
                                         *par_idx,
@@ -511,6 +575,7 @@ fn get_inserts(graph: &GitGraph, compact: bool) -> HashMap<usize, Vec<Vec<Occ>>>
         }
     }
 
+    // Return the map of required insertions.
     inserts
 }