Skip to content

Commit 5debefe

Browse files
committed
Merge branch 'utils-git' - close_batch_command(), docstrings
* utils-git: utils.git: Document GitRepo attributes utils.git: Add close_batch_command() method to GitRepo class utils.git: Add missing param to filter_valid_commits() docstring
2 parents 3a27ee2 + c6c3fc9 commit 5debefe

2 files changed

Lines changed: 93 additions & 3 deletions

File tree

src/diffannotator/utils/git.py

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import os
2424
import re
2525
import subprocess
26+
import weakref
2627
from collections import defaultdict
2728
from contextlib import contextmanager
2829
from enum import Enum
@@ -506,8 +507,39 @@ def decode_c_quoted_str(text: str) -> str:
506507
return text
507508

508509

510+
def maybe_close_subprocess(process: Optional[subprocess.Popen]) -> None:
511+
"""Closes a subprocess safely to avoid resource warnings and resource starvation
512+
513+
This function ensures the safe termination of a subprocess by properly closing its
514+
standard input and output streams, waiting for it to exit, and forcefully
515+
killing it if necessary. It is designed to handle `git cat-file --batch-command`
516+
and similar persistent subprocesses.
517+
518+
It is designed to be used as a ` weakref.finalize ` callback.
519+
520+
Parameters
521+
----------
522+
process
523+
The subprocess instance to close. If None, the function does nothing.
524+
"""
525+
if process is None:
526+
return
527+
528+
# closing stdin and stdout should end the persistent `git cat-file` process
529+
process.stdout.close() # to avoid ResourceWarning: unclosed file <_io.BufferedReader name=3>
530+
process.stdin.close() # just in case, see above
531+
process.wait() # to avoid ResourceWarning: subprocess NNN is still running
532+
process.kill() # just in case
533+
534+
509535
class GitRepo:
510-
"""Class representing Git repository, for performing operations on"""
536+
"""Class representing Git repository, for performing operations on
537+
538+
Attributes
539+
----------
540+
repo : Path
541+
stores Path to the Git repository
542+
"""
511543
path_encoding = 'utf8'
512544
default_file_encoding = 'utf8'
513545
log_encoding = 'utf8'
@@ -529,6 +561,8 @@ def __init__(self, repo_dir: PathLike):
529561
# TODO: check that `git_directory` is a path to git repository
530562
# TODO: remember absolute path (it is safer)
531563
self.repo = Path(repo_dir)
564+
self._cat_file: Optional[subprocess.Popen] = None
565+
self._finalizer = weakref.finalize(self, maybe_close_subprocess, self._cat_file)
532566

533567
def __repr__(self):
534568
class_name = type(self).__name__
@@ -673,7 +707,7 @@ def batch_command(self) -> subprocess.Popen:
673707
in the `--batch-command` mode, buffered (because of `--buffer`),
674708
see https://git-scm.com/docs/git-cat-file
675709
"""
676-
return subprocess.Popen(
710+
self._cat_file = subprocess.Popen(
677711
[
678712
'git', '-C', str(self.repo),
679713
'cat-file', '--batch-command', '--buffer',
@@ -684,6 +718,18 @@ def batch_command(self) -> subprocess.Popen:
684718
text=True,
685719
bufsize=1, # line buffered
686720
)
721+
return self._cat_file
722+
723+
def close_batch_command(self) -> None:
724+
"""Close persistent connection to `git cat-file --batch-command --buffer`
725+
726+
This should be done only when you are finished with using it, because
727+
at least with the current implementation calling this method would make
728+
`.are_valid_objects()` and `.filter_valid_commits()` methods fail;
729+
they rely on persistence of the cached `.batch_command` property.
730+
"""
731+
self._finalizer()
732+
self._cat_file = None
687733

688734
def format_patch(self,
689735
output_dir: Optional[PathLike] = None,
@@ -1476,6 +1522,9 @@ def filter_valid_commits(self, commits: Iterable[str], to_oid: bool = False) ->
14761522
----------
14771523
commits
14781524
A list of commit identifiers to check
1525+
to_oid
1526+
Whether to convert elements in `commits` to SHA-1 object identifiers,
1527+
for example, "HEAD" to "3a27ee24b37a3e9572a0acc0aaecd22cc9c10bc7"
14791528
14801529
Yields
14811530
------

tests/test_utils_git.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22
"""Test cases for 'src/diffannotator/utils/git.py' module"""
33
import subprocess
44
import textwrap
5+
from typing import Optional
56

67
import pytest
78
from unidiff import PatchSet
89

9-
from diffannotator.utils.git import decode_c_quoted_str, GitRepo, DiffSide, AuthorStat, parse_shortlog_count, ChangeSet
10+
from diffannotator.utils.git import decode_c_quoted_str, GitRepo, DiffSide, AuthorStat, parse_shortlog_count, ChangeSet, \
11+
maybe_close_subprocess
1012
from tests.conftest import default_branch, example_repo, example_repo_utf8
1113

1214

@@ -229,6 +231,45 @@ def test_is_valid_commit(example_repo):
229231
assert not example_repo.is_valid_commit("HEAD~20"), "HEAD~20 is invalid"
230232

231233

234+
def test_batch_command(example_repo):
235+
"""Test that the GitRepo.batch_command property behaves sanely"""
236+
assert example_repo._cat_file is None, "the property is not initialized yet"
237+
238+
maybe_close_subprocess(example_repo._cat_file) # no error
239+
240+
proc_1 = example_repo.batch_command
241+
proc_2 = example_repo.batch_command
242+
assert proc_1 is proc_2, ".batch_command property is cached"
243+
244+
proc: Optional[subprocess.Popen] = example_repo._cat_file
245+
assert proc is not None, "process was generated by .batch_command, and is not None"
246+
assert isinstance(proc, subprocess.Popen), "process is a Popen object"
247+
assert proc.returncode is None, "the `git cat-file` didn't return (process is live)"
248+
249+
actual = example_repo.are_valid_objects(["HEAD"])
250+
expected = [True]
251+
assert actual == expected, ".are_valid_object('HEAD') returns True"
252+
253+
maybe_close_subprocess(example_repo._cat_file) # no error
254+
255+
proc = example_repo.batch_command
256+
assert isinstance(proc, subprocess.Popen), ".batch_command returns a Popen object"
257+
assert proc.returncode == 0, "the `git cat-file` returns success (process ended)"
258+
259+
with pytest.raises(ValueError) as exc_info:
260+
example_repo.are_valid_objects(["HEAD"])
261+
assert "closed file" in str(exc_info.value), "the `git cat-file` process is closed"
262+
263+
example_repo.close_batch_command() # no errors
264+
265+
#print(f"{example_repo._finalizer=}")
266+
#print(f"{example_repo._finalizer.alive=}")
267+
#print(f"{example_repo._finalizer.peek()=}")
268+
#print(f"{example_repo._finalizer()=}")
269+
#print(f"{example_repo._finalizer.detach()=}")
270+
#print(f"{example_repo._finalizer.alive=}")
271+
272+
232273
def test_are_valid_objects(example_repo):
233274
"""Test that GitRepo.are_valid_objects returns the correct answer"""
234275
actual = example_repo.are_valid_objects(['HEAD', 'v1', 'v2'], object_type='commit')

0 commit comments

Comments
 (0)