@@ -600,23 +600,37 @@ def get_file_diff_for_release(
600600
601601
602602def get_changed_files_summary (
603- repo_owner : str , repo_name : str , start_tag : str , end_tag : str
603+ repo_owner : str ,
604+ repo_name : str ,
605+ start_tag : str ,
606+ end_tag : str ,
607+ local_repo_path : Optional [str ] = None ,
604608) -> Dict [str , Any ]:
605609 """Gets a summary of changed files between two releases without patches.
606610
607- This is a lighter-weight version of get_changed_files_between_releases
608- that only returns file paths and metadata, without the actual diff content .
609- Use this for planning which files to analyze .
611+ This function uses local git commands when local_repo_path is provided,
612+ which avoids the GitHub API's 300-file limit for large comparisons .
613+ Falls back to GitHub API if local_repo_path is not provided or invalid .
610614
611615 Args:
612616 repo_owner: The name of the repository owner.
613617 repo_name: The name of the repository.
614618 start_tag: The older tag (base) for the comparison.
615619 end_tag: The newer tag (head) for the comparison.
620+ local_repo_path: Optional absolute path to local git repo. If provided
621+ and valid, uses git diff instead of GitHub API to get complete
622+ file list (avoids 300-file limit).
616623
617624 Returns:
618625 A dictionary containing the status and a summary of changed files.
619626 """
627+ # Use local git if valid path is provided (avoids GitHub API 300-file limit)
628+ if local_repo_path and os .path .isdir (os .path .join (local_repo_path , ".git" )):
629+ return _get_changed_files_from_local_git (
630+ local_repo_path , start_tag , end_tag , repo_owner , repo_name
631+ )
632+
633+ # Fall back to GitHub API (limited to 300 files)
620634 url = f"{ GITHUB_BASE_URL } /repos/{ repo_owner } /{ repo_name } /compare/{ start_tag } ...{ end_tag } "
621635
622636 try :
@@ -654,8 +668,140 @@ def get_changed_files_summary(
654668 f"https://github.com/{ repo_owner } /{ repo_name } "
655669 f"/compare/{ start_tag } ...{ end_tag } "
656670 ),
671+ "note" : (
672+ (
673+ "Using GitHub API which is limited to 300 files. "
674+ "Provide local_repo_path to get complete file list."
675+ )
676+ if len (formatted_files ) >= 300
677+ else None
678+ ),
657679 }
658680 except requests .exceptions .HTTPError as e :
659681 return error_response (f"HTTP Error: { e } " )
660682 except requests .exceptions .RequestException as e :
661683 return error_response (f"Request Error: { e } " )
684+
685+
686+ def _get_changed_files_from_local_git (
687+ local_repo_path : str ,
688+ start_tag : str ,
689+ end_tag : str ,
690+ repo_owner : str ,
691+ repo_name : str ,
692+ ) -> Dict [str , Any ]:
693+ """Gets changed files using local git commands (no file limit).
694+
695+ Args:
696+ local_repo_path: Path to local git repository.
697+ start_tag: The older tag (base) for the comparison.
698+ end_tag: The newer tag (head) for the comparison.
699+ repo_owner: Repository owner for compare URL.
700+ repo_name: Repository name for compare URL.
701+
702+ Returns:
703+ A dictionary containing the status and a summary of changed files.
704+ """
705+ try :
706+ # Fetch tags to ensure we have them
707+ subprocess .run (
708+ ["git" , "fetch" , "--tags" ],
709+ cwd = local_repo_path ,
710+ capture_output = True ,
711+ text = True ,
712+ check = False ,
713+ )
714+
715+ # Get list of changed files with their status
716+ diff_result = subprocess .run (
717+ ["git" , "diff" , "--name-status" , f"{ start_tag } ...{ end_tag } " ],
718+ cwd = local_repo_path ,
719+ capture_output = True ,
720+ text = True ,
721+ check = True ,
722+ )
723+
724+ # Get numstat for additions/deletions
725+ numstat_result = subprocess .run (
726+ ["git" , "diff" , "--numstat" , f"{ start_tag } ...{ end_tag } " ],
727+ cwd = local_repo_path ,
728+ capture_output = True ,
729+ text = True ,
730+ check = True ,
731+ )
732+
733+ # Parse numstat output (additions, deletions, filename)
734+ file_stats : Dict [str , Dict [str , int ]] = {}
735+ for line in numstat_result .stdout .strip ().split ("\n " ):
736+ if not line :
737+ continue
738+ parts = line .split ("\t " )
739+ if len (parts ) >= 3 :
740+ additions = int (parts [0 ]) if parts [0 ] != "-" else 0
741+ deletions = int (parts [1 ]) if parts [1 ] != "-" else 0
742+ filename = parts [2 ]
743+ file_stats [filename ] = {
744+ "additions" : additions ,
745+ "deletions" : deletions ,
746+ "changes" : additions + deletions ,
747+ }
748+
749+ # Parse name-status output and combine with numstat
750+ status_map = {
751+ "A" : "added" ,
752+ "D" : "removed" ,
753+ "M" : "modified" ,
754+ "R" : "renamed" ,
755+ "C" : "copied" ,
756+ }
757+
758+ files_by_dir : Dict [str , List [Dict [str , Any ]]] = {}
759+ formatted_files = []
760+
761+ for line in diff_result .stdout .strip ().split ("\n " ):
762+ if not line :
763+ continue
764+ parts = line .split ("\t " )
765+ if len (parts ) >= 2 :
766+ status_code = parts [0 ][0 ] # First char is the status
767+ filename = parts [- 1 ] # Last part is filename (handles renames)
768+
769+ stats = file_stats .get (
770+ filename ,
771+ {
772+ "additions" : 0 ,
773+ "deletions" : 0 ,
774+ "changes" : 0 ,
775+ },
776+ )
777+
778+ file_info = {
779+ "relative_path" : filename ,
780+ "status" : status_map .get (status_code , "modified" ),
781+ "additions" : stats ["additions" ],
782+ "deletions" : stats ["deletions" ],
783+ "changes" : stats ["changes" ],
784+ }
785+ formatted_files .append (file_info )
786+
787+ # Group by top-level directory
788+ dir_parts = filename .split ("/" )
789+ top_dir = dir_parts [0 ] if dir_parts else "root"
790+ if top_dir not in files_by_dir :
791+ files_by_dir [top_dir ] = []
792+ files_by_dir [top_dir ].append (file_info )
793+
794+ return {
795+ "status" : "success" ,
796+ "total_files" : len (formatted_files ),
797+ "files" : formatted_files ,
798+ "files_by_directory" : files_by_dir ,
799+ "compare_url" : (
800+ f"https://github.com/{ repo_owner } /{ repo_name } "
801+ f"/compare/{ start_tag } ...{ end_tag } "
802+ ),
803+ }
804+ except subprocess .CalledProcessError as e :
805+ return error_response (f"Git command failed: { e .stderr } " )
806+ except (OSError , ValueError ) as e :
807+ return error_response (f"Error getting changed files: { e } " )
0 commit comments