Skip to content

Commit 83c24d2

Browse files
xuanyang15copybara-github
authored andcommitted
docs: Fix ADK release analyzer agent for large releases
Co-authored-by: Xuan Yang <xygoogle@google.com> PiperOrigin-RevId: 860187312
1 parent e8f7aa3 commit 83c24d2

2 files changed

Lines changed: 152 additions & 4 deletions

File tree

contributing/samples/adk_documentation/adk_release_analyzer/agent.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,8 @@ def get_release_context(tool_context: ToolContext) -> dict[str, Any]:
272272
273273
3. Call `get_changed_files_summary` to get the list of changed files WITHOUT
274274
the full patches (to save context space).
275+
- **IMPORTANT**: Pass `local_repo_path="{LOCAL_REPOS_DIR_PATH}/{CODE_REPO}"`
276+
to use local git and avoid GitHub API's 300-file limit.
275277
276278
4. Filter and organize the files:
277279
- **INCLUDE** only files in `src/google/adk/` directory

contributing/samples/adk_documentation/tools.py

Lines changed: 150 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -600,23 +600,37 @@ def get_file_diff_for_release(
600600

601601

602602
def get_changed_files_summary(
603-
repo_owner: str, repo_name: str, start_tag: str, end_tag: str
603+
repo_owner: str,
604+
repo_name: str,
605+
start_tag: str,
606+
end_tag: str,
607+
local_repo_path: Optional[str] = None,
604608
) -> Dict[str, Any]:
605609
"""Gets a summary of changed files between two releases without patches.
606610
607-
This is a lighter-weight version of get_changed_files_between_releases
608-
that only returns file paths and metadata, without the actual diff content.
609-
Use this for planning which files to analyze.
611+
This function uses local git commands when local_repo_path is provided,
612+
which avoids the GitHub API's 300-file limit for large comparisons.
613+
Falls back to GitHub API if local_repo_path is not provided or invalid.
610614
611615
Args:
612616
repo_owner: The name of the repository owner.
613617
repo_name: The name of the repository.
614618
start_tag: The older tag (base) for the comparison.
615619
end_tag: The newer tag (head) for the comparison.
620+
local_repo_path: Optional absolute path to local git repo. If provided
621+
and valid, uses git diff instead of GitHub API to get complete
622+
file list (avoids 300-file limit).
616623
617624
Returns:
618625
A dictionary containing the status and a summary of changed files.
619626
"""
627+
# Use local git if valid path is provided (avoids GitHub API 300-file limit)
628+
if local_repo_path and os.path.isdir(os.path.join(local_repo_path, ".git")):
629+
return _get_changed_files_from_local_git(
630+
local_repo_path, start_tag, end_tag, repo_owner, repo_name
631+
)
632+
633+
# Fall back to GitHub API (limited to 300 files)
620634
url = f"{GITHUB_BASE_URL}/repos/{repo_owner}/{repo_name}/compare/{start_tag}...{end_tag}"
621635

622636
try:
@@ -654,8 +668,140 @@ def get_changed_files_summary(
654668
f"https://github.com/{repo_owner}/{repo_name}"
655669
f"/compare/{start_tag}...{end_tag}"
656670
),
671+
"note": (
672+
(
673+
"Using GitHub API which is limited to 300 files. "
674+
"Provide local_repo_path to get complete file list."
675+
)
676+
if len(formatted_files) >= 300
677+
else None
678+
),
657679
}
658680
except requests.exceptions.HTTPError as e:
659681
return error_response(f"HTTP Error: {e}")
660682
except requests.exceptions.RequestException as e:
661683
return error_response(f"Request Error: {e}")
684+
685+
686+
def _get_changed_files_from_local_git(
687+
local_repo_path: str,
688+
start_tag: str,
689+
end_tag: str,
690+
repo_owner: str,
691+
repo_name: str,
692+
) -> Dict[str, Any]:
693+
"""Gets changed files using local git commands (no file limit).
694+
695+
Args:
696+
local_repo_path: Path to local git repository.
697+
start_tag: The older tag (base) for the comparison.
698+
end_tag: The newer tag (head) for the comparison.
699+
repo_owner: Repository owner for compare URL.
700+
repo_name: Repository name for compare URL.
701+
702+
Returns:
703+
A dictionary containing the status and a summary of changed files.
704+
"""
705+
try:
706+
# Fetch tags to ensure we have them
707+
subprocess.run(
708+
["git", "fetch", "--tags"],
709+
cwd=local_repo_path,
710+
capture_output=True,
711+
text=True,
712+
check=False,
713+
)
714+
715+
# Get list of changed files with their status
716+
diff_result = subprocess.run(
717+
["git", "diff", "--name-status", f"{start_tag}...{end_tag}"],
718+
cwd=local_repo_path,
719+
capture_output=True,
720+
text=True,
721+
check=True,
722+
)
723+
724+
# Get numstat for additions/deletions
725+
numstat_result = subprocess.run(
726+
["git", "diff", "--numstat", f"{start_tag}...{end_tag}"],
727+
cwd=local_repo_path,
728+
capture_output=True,
729+
text=True,
730+
check=True,
731+
)
732+
733+
# Parse numstat output (additions, deletions, filename)
734+
file_stats: Dict[str, Dict[str, int]] = {}
735+
for line in numstat_result.stdout.strip().split("\n"):
736+
if not line:
737+
continue
738+
parts = line.split("\t")
739+
if len(parts) >= 3:
740+
additions = int(parts[0]) if parts[0] != "-" else 0
741+
deletions = int(parts[1]) if parts[1] != "-" else 0
742+
filename = parts[2]
743+
file_stats[filename] = {
744+
"additions": additions,
745+
"deletions": deletions,
746+
"changes": additions + deletions,
747+
}
748+
749+
# Parse name-status output and combine with numstat
750+
status_map = {
751+
"A": "added",
752+
"D": "removed",
753+
"M": "modified",
754+
"R": "renamed",
755+
"C": "copied",
756+
}
757+
758+
files_by_dir: Dict[str, List[Dict[str, Any]]] = {}
759+
formatted_files = []
760+
761+
for line in diff_result.stdout.strip().split("\n"):
762+
if not line:
763+
continue
764+
parts = line.split("\t")
765+
if len(parts) >= 2:
766+
status_code = parts[0][0] # First char is the status
767+
filename = parts[-1] # Last part is filename (handles renames)
768+
769+
stats = file_stats.get(
770+
filename,
771+
{
772+
"additions": 0,
773+
"deletions": 0,
774+
"changes": 0,
775+
},
776+
)
777+
778+
file_info = {
779+
"relative_path": filename,
780+
"status": status_map.get(status_code, "modified"),
781+
"additions": stats["additions"],
782+
"deletions": stats["deletions"],
783+
"changes": stats["changes"],
784+
}
785+
formatted_files.append(file_info)
786+
787+
# Group by top-level directory
788+
dir_parts = filename.split("/")
789+
top_dir = dir_parts[0] if dir_parts else "root"
790+
if top_dir not in files_by_dir:
791+
files_by_dir[top_dir] = []
792+
files_by_dir[top_dir].append(file_info)
793+
794+
return {
795+
"status": "success",
796+
"total_files": len(formatted_files),
797+
"files": formatted_files,
798+
"files_by_directory": files_by_dir,
799+
"compare_url": (
800+
f"https://github.com/{repo_owner}/{repo_name}"
801+
f"/compare/{start_tag}...{end_tag}"
802+
),
803+
}
804+
except subprocess.CalledProcessError as e:
805+
return error_response(f"Git command failed: {e.stderr}")
806+
except (OSError, ValueError) as e:
807+
return error_response(f"Error getting changed files: {e}")

0 commit comments

Comments
 (0)