Skip to content

Commit 8a2abbf

Browse files
#172 Improved Git Access Performance
- parallelized the capture of changes between commits in captureChangeCountByCommitTimestamp() - Reduced the number of calls to git.log in fileLog()
1 parent 2edc25a commit 8a2abbf

File tree

1 file changed

+43
-38
lines changed

1 file changed

+43
-38
lines changed

change-proneness-ranker/src/main/java/org/hjug/git/GitLogReader.java

Lines changed: 43 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
import java.io.*;
44
import java.util.*;
5+
import java.util.concurrent.ConcurrentHashMap;
6+
import java.util.concurrent.ConcurrentMap;
7+
import java.util.stream.IntStream;
58
import lombok.extern.slf4j.Slf4j;
69
import org.eclipse.jgit.api.Git;
710
import org.eclipse.jgit.api.errors.GitAPIException;
@@ -70,23 +73,23 @@ public ScmLogInfo fileLog(String path) throws GitAPIException, IOException {
7073

7174
int commitCount = 0;
7275
int earliestCommit = Integer.MAX_VALUE;
76+
int mostRecentCommit = 0;
77+
7378
for (RevCommit revCommit : revCommits) {
74-
if (revCommit.getCommitTime() < earliestCommit) {
75-
earliestCommit = revCommit.getCommitTime();
79+
int commitTime = revCommit.getCommitTime();
80+
if (commitCount == 0) {
81+
mostRecentCommit = commitTime;
82+
}
83+
if (commitTime < earliestCommit) {
84+
earliestCommit = commitTime;
7685
}
7786
commitCount++;
7887
}
7988

80-
// based on https://stackoverflow.com/a/59274329/346247
81-
Iterator<RevCommit> iterator =
82-
git.log().add(branchId).addPath(path).setMaxCount(1).call().iterator();
83-
84-
if (!iterator.hasNext()) {
89+
if (commitCount == 0) {
8590
return new ScmLogInfo(path, null, earliestCommit, earliestCommit, commitCount);
8691
}
8792

88-
int mostRecentCommit = iterator.next().getCommitTime();
89-
9093
return new ScmLogInfo(path, null, earliestCommit, mostRecentCommit, commitCount);
9194
}
9295

@@ -96,53 +99,55 @@ public TreeMap<Integer, Integer> captureChangeCountByCommitTimestamp() throws IO
9699
TreeMap<Integer, Integer> changesByCommitTimestamp = new TreeMap<>();
97100

98101
ObjectId branchId = gitRepository.resolve("HEAD");
99-
Iterable<RevCommit> commits = git.log().add(branchId).call();
102+
List<RevCommit> commitList = new ArrayList<>();
103+
git.log().add(branchId).call().forEach(commitList::add);
100104

101-
RevCommit newCommit = null;
105+
if (commitList.isEmpty()) {
106+
return changesByCommitTimestamp;
107+
}
102108

103-
for (Iterator<RevCommit> iterator = commits.iterator(); iterator.hasNext(); ) {
104-
RevCommit oldCommit = iterator.next();
109+
// Handle first / initial commit
110+
changesByCommitTimestamp.putAll(walkFirstCommit(commitList.get(commitList.size() - 1)));
105111

106-
int count = 0;
107-
if (null == newCommit && iterator.hasNext()) {
108-
newCommit = oldCommit;
109-
continue;
110-
} else if (!iterator.hasNext()) {
111-
// Handle first / initial commit
112-
changesByCommitTimestamp.putAll(walkFirstCommit(oldCommit));
113-
}
112+
if (commitList.size() < 2) {
113+
return changesByCommitTimestamp;
114+
}
114115

115-
if (null != newCommit) {
116-
for (DiffEntry entry : getDiffEntries(newCommit, oldCommit)) {
116+
// Process adjacent commit pairs in parallel; each pair is independent
117+
ConcurrentMap<Integer, Integer> concurrentResults = new ConcurrentHashMap<>();
118+
IntStream.range(0, commitList.size() - 1).parallel().forEach(i -> {
119+
RevCommit newer = commitList.get(i);
120+
RevCommit older = commitList.get(i + 1);
121+
try {
122+
int count = 0;
123+
for (DiffEntry entry : getDiffEntries(newer, older)) {
117124
if (entry.getNewPath().endsWith(JAVA_FILE_TYPE)
118125
|| entry.getOldPath().endsWith(JAVA_FILE_TYPE)) {
119126
count++;
120127
}
121128
}
122-
123129
if (count > 0) {
124-
changesByCommitTimestamp.put(newCommit.getCommitTime(), count);
130+
concurrentResults.put(newer.getCommitTime(), count);
125131
}
126-
newCommit = oldCommit;
132+
} catch (IOException e) {
133+
log.error("Error getting diff entries: {}", e.getMessage());
127134
}
128-
}
135+
});
129136

137+
changesByCommitTimestamp.putAll(concurrentResults);
130138
return changesByCommitTimestamp;
131139
}
132140

133141
private List<DiffEntry> getDiffEntries(RevCommit newCommit, RevCommit oldCommit) throws IOException {
134-
CanonicalTreeParser oldTreeIter = new CanonicalTreeParser();
135-
CanonicalTreeParser newTreeIter = new CanonicalTreeParser();
136-
try (ObjectReader reader = git.getRepository().newObjectReader()) {
137-
ObjectId oldTree = git.getRepository().resolve(newCommit.getTree().name());
138-
oldTreeIter.reset(reader, oldTree);
139-
ObjectId newTree = git.getRepository().resolve(oldCommit.getTree().name());
140-
newTreeIter.reset(reader, newTree);
142+
try (ObjectReader reader = gitRepository.newObjectReader();
143+
DiffFormatter df = new DiffFormatter(NullOutputStream.INSTANCE)) {
144+
df.setRepository(gitRepository);
145+
CanonicalTreeParser oldTreeIter = new CanonicalTreeParser();
146+
oldTreeIter.reset(reader, newCommit.getTree());
147+
CanonicalTreeParser newTreeIter = new CanonicalTreeParser();
148+
newTreeIter.reset(reader, oldCommit.getTree());
149+
return df.scan(oldTreeIter, newTreeIter);
141150
}
142-
143-
DiffFormatter df = new DiffFormatter(NullOutputStream.INSTANCE);
144-
df.setRepository(git.getRepository());
145-
return df.scan(oldTreeIter, newTreeIter);
146151
}
147152

148153
Map<Integer, Integer> walkFirstCommit(RevCommit firstCommit) throws IOException {

0 commit comments

Comments
 (0)