Skip to content

Commit dc66567

Browse files
committed
Merge branch 'utils-git' - improve some commands, add new one
* utils-git: utils.git: Add and test GitRepo.oldest_root_metadata() utils.git: Improve GitRepo.find_roots() method utils.git: Improve GitRepo.count_commits() method
2 parents 54e19f0 + e32f80c commit dc66567

2 files changed

Lines changed: 140 additions & 17 deletions

File tree

src/diffannotator/utils/git.py

Lines changed: 109 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2081,8 +2081,8 @@ def changes_survival(self, commit: str, prev: Optional[str] = None,
20812081
return all_commits_data, lines_survival
20822082

20832083
def count_commits(self,
2084-
start_from: str = StartLogFrom.CURRENT,
2085-
until_commit: str = None,
2084+
start_from: str|list[str] = StartLogFrom.CURRENT,
2085+
until_commit: None|str|list[str] = None,
20862086
first_parent: bool = False) -> int:
20872087
"""Count number of commits in the repository
20882088
@@ -2094,14 +2094,14 @@ def count_commits(self,
20942094
20952095
Parameters
20962096
----------
2097-
start_from : str or StartLogFrom
2098-
where to start from to follow 'parent' links
2099-
until_commit : str or None
2097+
start_from : str or list[str] or StartLogFrom
2098+
where to start from to follow 'parent' links;
2099+
can be used to provide additional options to `git rev-list` command.
2100+
until_commit : str or list[str] or None
21002101
where to stop following 'parent' links; also ensures that we
2101-
follow ancestry path to it, optional
2102+
follow ancestry path to it if `until_commit` is a single str, optional
21022103
first_parent : bool
2103-
follow only the first parent commit upon seeing a merge
2104-
commit
2104+
follow only the first parent commit upon seeing a merge commit
21052105
21062106
Returns
21072107
-------
@@ -2110,12 +2110,17 @@ def count_commits(self,
21102110
"""
21112111
if hasattr(start_from, 'value'):
21122112
start_from = start_from.value
2113+
if not isinstance(start_from, (list, tuple)):
2114+
start_from = [ str(start_from) ]
21132115
cmd = [
21142116
'git', '-C', self.repo,
2115-
'rev-list', '--count', str(start_from),
2117+
'rev-list', '--count', *start_from,
21162118
]
21172119
if until_commit is not None:
2118-
cmd.extend(['--not', until_commit, f'--ancestry-path={until_commit}', '--boundary'])
2120+
if isinstance(until_commit, (list, tuple)):
2121+
cmd.extend(['--not', *until_commit, '--boundary'])
2122+
else:
2123+
cmd.extend(['--not', until_commit, f'--ancestry-path={until_commit}', '--boundary'])
21192124
if first_parent:
21202125
cmd.append('--first-parent')
21212126
process = subprocess.run(cmd,
@@ -2189,28 +2194,38 @@ def list_core_authors(self, start_from: str|StartLogFrom = StartLogFrom.ALL,
21892194
perc
21902195
)
21912196

2192-
def find_roots(self, start_from: str = StartLogFrom.CURRENT) -> list[str]:
2193-
"""Find root commits (commits without parents), starting from `start_from`
2197+
def find_roots(self,
2198+
start_from: str|list[str]|StartLogFrom|None = StartLogFrom.CURRENT) -> list[str]:
2199+
"""Find root commits (commits without parents), starting from `start_from`.
2200+
2201+
You can provide multiple starting points by passing a list of them;
2202+
you can then also use `start_from` to provide additional options to `git rev-list`
2203+
command.
21942204
21952205
Parameters
21962206
----------
2197-
start_from : str or StartLogFrom
2198-
where to start from to follow 'parent' links
2207+
start_from : str or list[str] or StartLogFrom, optional
2208+
where to start from to follow 'parent' links;
2209+
can be used to provide additional options to `git rev-list` command.
2210+
2211+
If None, defaults to 'HEAD' (current commit).
21992212
22002213
Returns
22012214
-------
22022215
list[str]
22032216
list of root commits, as SHA-1
22042217
"""
22052218
if hasattr(start_from, 'value'):
2206-
start_from = start_from.value
2219+
start_from = [ str(start_from.value) ]
22072220
elif start_from is None:
2208-
start_from = 'HEAD'
2221+
start_from = [ 'HEAD' ]
2222+
elif not isinstance(start_from, (list, tuple)):
2223+
start_from = [ str(start_from) ]
22092224

22102225
cmd = [
22112226
'git', '-C', self.repo,
22122227
'rev-list', '--max-parents=0', # gives all root commits
2213-
str(start_from),
2228+
*start_from,
22142229
]
22152230
process = subprocess.run(cmd,
22162231
capture_output=True, check=True,
@@ -2219,6 +2234,83 @@ def find_roots(self, start_from: str = StartLogFrom.CURRENT) -> list[str]:
22192234
text=True, errors=self.encoding_errors)
22202235
return process.stdout.splitlines()
22212236

2237+
def oldest_root_metadata(
2238+
self,
2239+
start_from: str|list[str]|StartLogFrom|None = StartLogFrom.CURRENT
2240+
) -> dict[str, Union[str, dict, list]]:
2241+
"""Get the metadata of the oldest root commit in the Git repository.
2242+
2243+
This function determines the root commit(s) in a Git repository and provides
2244+
metadata about the oldest one, according to committer date.
2245+
2246+
Parameters
2247+
----------
2248+
start_from : str or list[str] or StartLogFrom, optional
2249+
Specifies the starting reference point(s) for the `git rev-list` command.
2250+
Important if there are orphaned branches with separate lines of history.
2251+
Acceptable values include:
2252+
- A single string representing a commit reference.
2253+
- A list of strings representing multiple commit references.
2254+
- A `StartLogFrom` enumeration value.
2255+
- `None`, in which case the default reference `HEAD` is used.
2256+
2257+
Can be also used to pass additional options to the `git rev-list` command;
2258+
note that in this case you would still need to provide at least one
2259+
starting reference.
2260+
2261+
Returns
2262+
-------
2263+
dict
2264+
Information about selected parts of commit metadata, in the
2265+
following format:
2266+
2267+
{
2268+
'id': 'f8ffd4067d1f1b902ae06c52db4867f57a424f38',
2269+
'parents': ['fe4a622e5202cd990c8ec853d56e25922f263243'],
2270+
'tree': '5347fe7b8606e7a164ab5cd355ee5d86c99796c0'
2271+
'author': {
2272+
'author': 'A U Thor <author@example.com>',
2273+
'name': 'A U Thor',
2274+
'email': 'author@example.com',
2275+
'timestamp': 1112912053,
2276+
'tz_info': '-0600',
2277+
},
2278+
'committer': {
2279+
'committer': 'C O Mitter <committer@example.com>'
2280+
'name': 'C O Mitter',
2281+
'email': 'committer@example.com',
2282+
'timestamp': 1693598847,
2283+
'tz_info': '+0200',
2284+
},
2285+
'message': 'Commit summary\n\nOptional longer description\n',
2286+
}
2287+
"""
2288+
if hasattr(start_from, 'value'):
2289+
start_from = [ str(start_from.value) ]
2290+
elif start_from is None:
2291+
start_from = [ 'HEAD' ]
2292+
elif not isinstance(start_from, (list, tuple)):
2293+
start_from = [ str(start_from) ]
2294+
2295+
cmd = [
2296+
'git', '-C', str(self.repo),
2297+
'rev-list',
2298+
'--max-parents=0', # gives all root commits
2299+
'--date-order', # sorts by committer date, in reverse chronological order, most recent first
2300+
'--reverse', # reverse chronological order, which makes it oldest first
2301+
'--parents', '--header', # for easier parsing of commit metadata
2302+
'-z', # separate the commits with NULs instead of newlines.
2303+
*start_from,
2304+
'--',
2305+
]
2306+
2307+
process = subprocess.run(cmd, capture_output=True, check=True)
2308+
return _parse_commit_text(
2309+
process.stdout.decode(GitRepo.log_encoding, errors=self.encoding_errors).split('\0', maxsplit=1)[0],
2310+
# next parameters depend on the git command used
2311+
with_parents_line=True, indented_body=True
2312+
)
2313+
22222314
def get_config(self, name: str, value_type: Optional[str] = None) -> Union[str, None]:
22232315
"""Query specific git config option
22242316

tests/test_utils_git.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,13 @@ def test_count_commits(example_repo):
474474
actual = example_repo.count_commits('HEAD')
475475
assert expected == actual, "number of commits in repository matches (start_from='HEAD')"
476476

477+
actual = example_repo.count_commits(start_from=['HEAD', 'v1.5'])
478+
assert expected == actual, "number of commits in repository matches (start_from=['HEAD', 'v1.5'])"
479+
480+
expected = 2 # v1.5, v2
481+
actual = example_repo.count_commits(start_from='HEAD', until_commit='v1.5')
482+
assert expected == actual, "number of commits in repository matches (start_from='HEAD', until_commit='v1.5')"
483+
477484

478485
def test_list_authors(example_repo):
479486
"""Test GitRepo.list_authors_shortlog() and related methods"""
@@ -501,10 +508,34 @@ def test_find_roots(example_repo):
501508
roots_list = example_repo.find_roots()
502509
assert len(roots_list) == 1, "has a single root commit"
503510

511+
roots_list = example_repo.find_roots(None)
512+
assert len(roots_list) == 1, "has a single root commit, when passed None"
513+
514+
roots_list = example_repo.find_roots('HEAD')
515+
assert len(roots_list) == 1, "has a single root commit, when passed 'HEAD'"
516+
517+
roots_list = example_repo.find_roots(['HEAD', 'v2'])
518+
assert len(roots_list) == 1, "has a single root commit when passed ['HEAD', 'v2']"
519+
520+
roots_list = example_repo.find_roots(['HEAD', '--date-order'])
521+
assert len(roots_list) == 1, "has a single root commit when passed ['HEAD', '--date-order']"
522+
504523
v1_oid = example_repo.to_oid("v1")
505524
assert roots_list[0] == v1_oid, "root commit is v1"
506525

507526

527+
def test_oldest_root_metadata(example_repo):
528+
"""Test GitRepo.oldest_root_metadata() method"""
529+
commit_info = example_repo.oldest_root_metadata()
530+
531+
assert commit_info['tree'] == 'a5928e3b2666774922364c3d5e16232e1b7f4114', \
532+
"'tree' field did not change"
533+
assert commit_info['message'] == 'Initial commit\n', \
534+
"commit message matches"
535+
assert commit_info['committer']['committer'] == 'A U Thor <author@example.com>', \
536+
"committer matches repository setup"
537+
538+
508539
def test_get_config(example_repo):
509540
"""Test GitRepo.get_config() method"""
510541
expected = 'A U Thor' # set up in setUpClass() class method

0 commit comments

Comments
 (0)