Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
package io.jenkins.plugins.forensics.git.miner;

import edu.hm.hafner.util.Generated;

import java.io.Serial;
import java.io.Serializable;
import java.util.Objects;
import java.util.StringJoiner;

/**
* Represents the temporal coupling between two files in a Git repository. Files that are frequently changed together
* are considered temporally coupled; the coupling is characterized by the number of shared commits and a coupling
* ratio indicating how often the less frequently modified file changes together with the other.
*
* @author Akash Manna
*/
public class TemporalCoupling implements Serializable {
@Serial
private static final long serialVersionUID = 1L;

private final String leftFile;
private final String rightFile;
private final int coChanges;
private final double couplingRatio;

/**
* Creates a new {@link TemporalCoupling} instance.
*
* @param leftFile
* the path of the first file
* @param rightFile
* the path of the second file
* @param coChanges
* the number of commits in which both files appeared together
* @param couplingRatio
* the coupling ratio (co-changes / min(commits of leftFile, commits of rightFile))
*/
public TemporalCoupling(final String leftFile, final String rightFile,
final int coChanges, final double couplingRatio) {
this.leftFile = leftFile;
this.rightFile = rightFile;
this.coChanges = coChanges;
this.couplingRatio = couplingRatio;
}

/**
* Returns the path of the first file in this coupling pair.
*
* @return the path of the first file
*/
public String getLeftFile() {
return leftFile;
}

/**
* Returns the path of the second file in this coupling pair.
*
* @return the path of the second file
*/
public String getRightFile() {
return rightFile;
}

/**
* Returns the number of commits in which both files were changed together.
*
* @return the absolute co-change count
*/
public int getCoChanges() {
return coChanges;
}

/**
* Returns the coupling ratio: the fraction of the less-frequently-committed file's commits in which the other
* file also appeared. A value of 1.0 means the two files are always changed together.
*
* @return the coupling ratio in [0.0, 1.0]
*/
public double getCouplingRatio() {
return couplingRatio;
}

@Override
@Generated
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
var that = (TemporalCoupling) o;
return coChanges == that.coChanges
&& Double.compare(that.couplingRatio, couplingRatio) == 0
&& leftFile.equals(that.leftFile)
&& rightFile.equals(that.rightFile);
}

@Override
@Generated
public int hashCode() {
return Objects.hash(leftFile, rightFile, coChanges, couplingRatio);
}

@Override
@Generated
public String toString() {
return new StringJoiner(", ", TemporalCoupling.class.getSimpleName() + "[", "]")
.add("leftFile='" + leftFile + "'")
.add("rightFile='" + rightFile + "'")
.add("coChanges=" + coChanges)
.add("couplingRatio=" + couplingRatio)
.toString();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
package io.jenkins.plugins.forensics.git.miner;

import edu.umd.cs.findbugs.annotations.CheckForNull;
import edu.umd.cs.findbugs.annotations.NonNull;

import edu.hm.hafner.util.FilteredLog;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

import io.jenkins.plugins.forensics.miner.CommitDiffItem;

/**
* Computes the temporal coupling between files based on a list of {@link CommitDiffItem} objects. Files that are
* frequently modified together are considered temporally coupled. For each file pair, this miner records the number of
* shared commits and the coupling ratio, and returns only pairs with at least the specified minimum number of co-changes.
*
* @author Akash Manna
*/
public class TemporalCouplingMiner {
/**
* Default minimum number of co-changes required before a file pair is considered temporally coupled.
*/
public static final int DEFAULT_MINIMUM_CO_CHANGES = 2;

private static final int MINIMUM_FILES_FOR_COUPLING = 2;
private static final String NO_FILE_NAME = "/dev/null";
private static final String PAIR_KEY_SEPARATOR = "\u0000";

private final int minimumCoChanges;

/**
* Creates a new {@link TemporalCouplingMiner} using the {@link #DEFAULT_MINIMUM_CO_CHANGES} threshold.
*/
public TemporalCouplingMiner() {
this(DEFAULT_MINIMUM_CO_CHANGES);
}

/**
* Creates a new {@link TemporalCouplingMiner} with a custom minimum co-change threshold.
*
* @param minimumCoChanges
* the minimum number of commits in which two files must have appeared together before they are reported
* as coupled; must be ≥ 1
* @throws IllegalArgumentException
* if {@code minimumCoChanges} is less than 1
*/
public TemporalCouplingMiner(final int minimumCoChanges) {
if (minimumCoChanges < 1) {
throw new IllegalArgumentException(
"minimumCoChanges must be >= 1, but was: " + minimumCoChanges);
}
this.minimumCoChanges = minimumCoChanges;
}

/**
* Computes temporal coupling for all file pairs in the supplied commit diff items. The algorithm groups diff
* items by commit ID, then counts for each unordered file pair how many commits contain both files. Pairs that
* occur fewer than {@code minimumCoChanges} times are discarded.
*
* @param commitDiffItems
* the list of commit diff items as produced by {@link CommitAnalyzer}; must not be {@code null}
* @param logger
* a logger for informational and error messages; must not be {@code null}
*
* @return an unordered list of {@link TemporalCoupling} entries, one per qualifying file pair
*/
public List<TemporalCoupling> compute(final List<CommitDiffItem> commitDiffItems, final FilteredLog logger) {
Map<String, Set<String>> commitToFiles = groupFilesByCommit(commitDiffItems);
Map<String, Integer> fileCommitCounts = countCommitsPerFile(commitToFiles);
Map<String, Integer> pairCoChangeCounts = countCoChanges(commitToFiles);

List<TemporalCoupling> result = buildCouplings(pairCoChangeCounts, fileCommitCounts);

logger.logInfo("Computed temporal coupling: found %d qualifying file pair(s) with at least %d co-change(s)",
result.size(), minimumCoChanges);
return result;
}

private Map<String, Set<String>> groupFilesByCommit(final Collection<CommitDiffItem> items) {
Map<String, Set<String>> commitToFiles = new HashMap<>();
for (CommitDiffItem item : items) {
String fileName = resolveFileName(item);
if (fileName == null) {
continue;
}
commitToFiles.computeIfAbsent(item.getId(), k -> new HashSet<>()).add(fileName);
}
return commitToFiles;
}

@CheckForNull
private String resolveFileName(@NonNull final CommitDiffItem item) {
String newPath = item.getNewPath();
if (!NO_FILE_NAME.equals(newPath)) {
return newPath;
}
String oldPath = item.getOldPath();
if (!NO_FILE_NAME.equals(oldPath)) {
return oldPath;
}
return null;
}

private Map<String, Integer> countCommitsPerFile(final Map<String, Set<String>> commitToFiles) {
Map<String, Integer> counts = new HashMap<>();
for (Set<String> files : commitToFiles.values()) {
for (String file : files) {
counts.merge(file, 1, Integer::sum);
}
}
return counts;
}

private Map<String, Integer> countCoChanges(final Map<String, Set<String>> commitToFiles) {
Map<String, Integer> pairCounts = new HashMap<>();
for (Set<String> files : commitToFiles.values()) {
if (files.size() < MINIMUM_FILES_FOR_COUPLING) {
continue;
}
List<String> sorted = files.stream().sorted().collect(Collectors.toList());
for (int i = 0; i < sorted.size(); i++) {
for (int j = i + 1; j < sorted.size(); j++) {
String key = pairKey(sorted.get(i), sorted.get(j));
pairCounts.merge(key, 1, Integer::sum);
}
}
}
return pairCounts;
}

private List<TemporalCoupling> buildCouplings(final Map<String, Integer> pairCoChangeCounts,
final Map<String, Integer> fileCommitCounts) {
List<TemporalCoupling> result = new ArrayList<>();
for (Map.Entry<String, Integer> entry : pairCoChangeCounts.entrySet()) {
int coChanges = entry.getValue();
if (coChanges < minimumCoChanges) {
continue;
}
String[] parts = splitPairKey(entry.getKey());
String leftFile = parts[0];
String rightFile = parts[1];

int leftCommits = fileCommitCounts.getOrDefault(leftFile, 1);
int rightCommits = fileCommitCounts.getOrDefault(rightFile, 1);
int minCommits = Math.min(leftCommits, rightCommits);

double ratio = minCommits > 0 ? (double) coChanges / minCommits : 0.0;
result.add(new TemporalCoupling(leftFile, rightFile, coChanges, ratio));
}
return result;
}

private String pairKey(final String fileA, final String fileB) {
return fileA + PAIR_KEY_SEPARATOR + fileB;
}

private String[] splitPairKey(final String key) {
return key.split(PAIR_KEY_SEPARATOR, 2);
}
}
Loading
Loading