@@ -66,6 +66,12 @@ def _get_or_create_pool(self) -> ProcessPoolExecutor:
6666 self .process_pool = ProcessPoolExecutor (max_workers = max_workers )
6767 return self .process_pool
6868
69+ def cleanup_process_pool (self ):
70+ """Cleanup process pool to prevent resource leaks"""
71+ if self .process_pool :
72+ self .process_pool .shutdown (wait = False )
73+ self .process_pool = None
74+
6975 @property
7076 def git_log_format (self ) -> str :
7177 """Git log format string with commit splitter"""
@@ -703,34 +709,46 @@ def _validate_commit_data(commit_dict: dict[str, Any]) -> bool:
703709
704710 @staticmethod
705711 def get_insertions_deletions (commit_hash : str , repo_path : str ) -> tuple [int , int ]:
712+ """Get insertions and deletions for a commit with retry logic and fallback."""
706713 try :
707- # Use git show which works for all cases: normal commits, root commits, and shallow boundary commits
708- result = subprocess .run (
709- ["git" , "-C" , repo_path , "show" , "--numstat" , "--format=" , commit_hash ],
710- capture_output = True ,
711- text = True ,
712- check = True ,
714+ return CommitService ._get_insertions_deletions_with_retry (commit_hash , repo_path )
715+ except Exception as e :
716+ logger .error (
717+ f"All retries failed for insertions/deletions for commit { commit_hash } : { e } "
713718 )
714-
715- insertions , deletions = 0 , 0
716- # Process the multi-line output directly in Python.
717- for line in result .stdout .splitlines ():
718- if line .strip (): # Skip empty lines
719- parts = line .split ("\t " ) # --numstat uses tabs
720- if len (parts ) >= 2 :
721- try :
722- insertions += int (parts [0 ])
723- deletions += int (parts [1 ])
724- except ValueError :
725- # Skip lines that don't have numeric values (e.g., binary files)
726- continue
727-
728- return insertions , deletions
729-
730- except (subprocess .CalledProcessError , ValueError , IndexError ) as e :
731- logger .error (f"Error getting insertions/deletions for commit { commit_hash } : { e } " )
732719 return 0 , 0
733720
721+ @retry (
722+ stop = stop_after_attempt (3 ),
723+ wait = wait_fixed (2 ),
724+ )
725+ @staticmethod
726+ def _get_insertions_deletions_with_retry (commit_hash : str , repo_path : str ) -> tuple [int , int ]:
727+ """Internal method that performs the actual git operation with retries."""
728+ # Use git show which works for all cases: normal commits, root commits, and shallow boundary commits
729+ result = subprocess .run (
730+ ["git" , "-C" , repo_path , "show" , "--numstat" , "--format=" , commit_hash ],
731+ capture_output = True ,
732+ text = True ,
733+ check = True ,
734+ timeout = 120 ,
735+ )
736+
737+ insertions , deletions = 0 , 0
738+ # Process the multi-line output directly in Python.
739+ for line in result .stdout .splitlines ():
740+ if line .strip (): # Skip empty lines
741+ parts = line .split ("\t " ) # --numstat uses tabs
742+ if len (parts ) >= 2 :
743+ try :
744+ insertions += int (parts [0 ])
745+ deletions += int (parts [1 ])
746+ except ValueError :
747+ # Skip lines that don't have numeric values (e.g., binary files)
748+ continue
749+
750+ return insertions , deletions
751+
734752 @staticmethod
735753 def _is_valid_email (email : str ) -> bool :
736754 """Check if a string is a valid email format using Pydantic validation."""
0 commit comments