Skip to content

Commit 6be7587

Browse files
committed
improve performance of Locate in common scenario
1 parent f29fb16 commit 6be7587

File tree

1 file changed

+62
-7
lines changed

1 file changed

+62
-7
lines changed

GitContentSearch/GitFileLocator.cs

Lines changed: 62 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,24 @@ namespace GitContentSearch
77
/// Locates files in a Git repository's history using an efficient binary search approach combined with rename tracking.
88
///
99
/// Algorithm Overview:
10-
/// 1. Binary Search Phase:
10+
/// 1. Quick HEAD Check:
11+
/// - First checks if the file exists in the latest commit (HEAD)
12+
/// - If found, returns immediately without further searching
13+
///
14+
/// 2. Binary Search Phase (if not found in HEAD):
1115
/// - Gets a list of all commit hashes in chronological order
1216
/// - Uses binary search to efficiently find any occurrence of the target file
1317
/// - For each checked commit, uses 'git ls-tree' to list files and find matches
1418
/// - Displays the file path when found and when it changes between commits
1519
///
16-
/// 2. Rename Tracking Phase:
20+
/// 3. Rename Tracking Phase:
1721
/// - Once the file is found in a commit, tracks its history forward to HEAD
1822
/// - Uses 'git log --follow --name-status' to detect renames and deletions
1923
/// - Maintains a chronological history of all paths the file has had
2024
/// - Returns the most recent valid path and commit where the file exists
2125
///
2226
/// Performance Considerations:
27+
/// - First checks HEAD to avoid expensive operations when file exists in current state
2328
/// - Uses binary search to quickly find first occurrence instead of scanning all commits
2429
/// - Only tracks renames forward from first found commit, not entire history
2530
/// - Caches commit times and reuses them to minimize git command calls
@@ -70,7 +75,7 @@ public GitFileLocator(IGitHelper gitHelper, ISearchLogger logger, IProcessWrappe
7075

7176
/// <summary>
7277
/// Uses git commands to efficiently search for a file through the repository's history.
73-
/// First gets all commit hashes, then uses binary search to locate the file.
78+
/// First checks if the file exists in HEAD, then falls back to binary search if needed.
7479
/// </summary>
7580
private (string? CommitHash, string? FilePath) LocateFileUsingGitCommand(string fileName, IProgress<double>? progress = null, CancellationToken cancellationToken = default)
7681
{
@@ -79,7 +84,57 @@ public GitFileLocator(IGitHelper gitHelper, ISearchLogger logger, IProcessWrappe
7984
// Check for cancellation before starting
8085
cancellationToken.ThrowIfCancellationRequested();
8186

82-
// Get all commit hashes first
87+
// First, check if the file exists in the latest commit (HEAD)
88+
_logger.WriteLine("Checking if file exists in the latest commit (HEAD)...");
89+
progress?.Report(0.05); // 5% progress for starting the HEAD check
90+
91+
string? headCommitHash = null;
92+
_processWrapper.StartAndProcessOutput(
93+
"rev-parse HEAD",
94+
_gitHelper.GetRepositoryPath(),
95+
line =>
96+
{
97+
if (line.Length == 40 && line.All(c => char.IsLetterOrDigit(c)))
98+
{
99+
headCommitHash = line;
100+
}
101+
},
102+
cancellationToken
103+
);
104+
105+
if (headCommitHash != null)
106+
{
107+
string? foundPath = null;
108+
_processWrapper.StartAndProcessOutput(
109+
$"ls-tree --name-only -r HEAD",
110+
_gitHelper.GetRepositoryPath(),
111+
line =>
112+
{
113+
if (line.EndsWith(fileName, StringComparison.OrdinalIgnoreCase))
114+
{
115+
foundPath = line;
116+
}
117+
},
118+
cancellationToken
119+
);
120+
121+
if (foundPath != null)
122+
{
123+
var commitTime = _gitHelper.GetCommitTime(headCommitHash);
124+
_logger.WriteLine($"File found in HEAD commit {headCommitHash} at {commitTime}");
125+
_logger.WriteLine($" Path: {foundPath}");
126+
_logger.Flush();
127+
128+
// File exists in HEAD, return immediately
129+
return (headCommitHash, foundPath);
130+
}
131+
132+
_logger.WriteLine("File not found in HEAD, searching through repository history...");
133+
}
134+
135+
progress?.Report(0.1); // 10% progress after HEAD check
136+
137+
// If not found in HEAD, get all commit hashes and perform binary search
83138
var commits = new List<string>();
84139
_processWrapper.StartAndProcessOutput(
85140
"log --all --pretty=format:%H",
@@ -104,7 +159,7 @@ public GitFileLocator(IGitHelper gitHelper, ISearchLogger logger, IProcessWrappe
104159
return (null, null);
105160
}
106161

107-
progress?.Report(0.1); // 10% progress after getting commits
162+
progress?.Report(0.2); // 20% progress after getting commits
108163

109164
// Use binary search to find the file
110165
var result = BinarySearchFile(commits, fileName, progress, cancellationToken);
@@ -163,8 +218,8 @@ public GitFileLocator(IGitHelper gitHelper, ISearchLogger logger, IProcessWrappe
163218
searchedCommits.Add(commitHash);
164219

165220
totalSearches++;
166-
// Report progress between 10% and 95%
167-
progress?.Report(0.1 + (0.85 * Math.Min(totalSearches, commits.Count) / commits.Count));
221+
// Report progress between 20% and 95%
222+
progress?.Report(0.2 + (0.75 * Math.Min(totalSearches, commits.Count) / commits.Count));
168223

169224
try
170225
{

0 commit comments

Comments
 (0)