Skip to content

Commit d3dd066

Browse files
authored
chore(version-scanner): split file path in output and format reporting columns (#17476)
This pull request polishes the output format of the version_scanner. It splits the scanned file's path into directory and filename columns, and places the dependency and target version columns upfront to help reviewers quickly scan the findings. Key changes: - Adds `file_name`, `dependency`, and `version` columns to the report formats. - Re-orders output columns in CSV and Google Sheets uploads. - Updates unit tests to verify the new column ordering.
1 parent 32f7d84 commit d3dd066

2 files changed

Lines changed: 62 additions & 22 deletions

File tree

scripts/version_scanner/tests/unit/test_version_scanner.py

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,9 @@ def test_write_csv_report(tmp_path):
151151
"rule_name": "python_requires_check",
152152
"line_number": 1,
153153
"matched_string": "python_requires = '>=3.7'",
154-
"context_line": "python_requires = '>=3.7'"
154+
"context_line": "python_requires = '>=3.7'",
155+
"dependency": "python",
156+
"version": "3.7"
155157
}
156158
]
157159

@@ -164,11 +166,14 @@ def test_write_csv_report(tmp_path):
164166
rows = list(reader)
165167

166168
assert len(rows) == 1
169+
assert rows[0]["file_name"] == "setup.py"
167170
assert rows[0]["file_path"] == "./setup.py"
168171
assert rows[0]["rule_name"] == "python_requires_check"
169172
assert rows[0]["line_number"] == "1"
170173
assert rows[0]["matched_string"] == "python_requires = '>=3.7'"
171174
assert rows[0]["context_line"] == "python_requires = '>=3.7'"
175+
assert rows[0]["dependency"] == "python"
176+
assert rows[0]["version"] == "3.7"
172177

173178

174179
def test_load_config(tmp_path):
@@ -227,7 +232,6 @@ def test_main_package_file_permission_error(tmp_path, capsys):
227232
package_file = tmp_path / "packages.txt"
228233
package_file.write_text("packages/pkg_a")
229234

230-
import sys
231235
test_args = ["version_scanner.py", "-d", "python", "-v", "3.7", "--package-file", str(package_file)]
232236

233237
real_open = open
@@ -246,7 +250,6 @@ def side_effect(file, *args, **kwargs):
246250
captured = capsys.readouterr()
247251
assert "Error: Permission denied reading package file" in captured.err
248252
def test_main_package_file_not_found(capsys):
249-
import sys
250253
test_args = ["version_scanner.py", "-d", "python", "-v", "3.7", "--package-file", "non_existent_file.txt"]
251254

252255
with patch("sys.argv", test_args):
@@ -323,7 +326,6 @@ def test_main_loads_ignore_from_script_dir(mock_scan, mock_load_ignore):
323326
mock_load_ignore.return_value = []
324327
mock_scan.return_value = []
325328

326-
import sys
327329
test_args = ["version_scanner.py", "-d", "python", "-v", "3.7"]
328330

329331
with mock.patch('sys.argv', test_args):
@@ -339,7 +341,8 @@ def test_main_loads_ignore_from_script_dir(mock_scan, mock_load_ignore):
339341

340342

341343
try:
342-
import googleapiclient
344+
# Ruff linter F401: Imported solely to detect Google API Client library presence for test skipping
345+
import googleapiclient # noqa: F401
343346
HAS_GOOGLE_API = True
344347
except ImportError:
345348
HAS_GOOGLE_API = False
@@ -392,7 +395,7 @@ def test_upload_to_drive(mock_auth, mock_build):
392395
body = kwargs.get('body', {})
393396
values = body.get('values', [])
394397
assert len(values) > 1
395-
assert "HYPERLINK" in values[1][3] # line_number is at index 3
398+
assert "HYPERLINK" in values[1][6] # line_number is at index 6
396399

397400

398401
def test_regex_examples_from_config():
@@ -638,39 +641,67 @@ def test_format_for_raw_csv_handles_empty_line_number():
638641

639642
def test_format_for_raw_csv():
640643
match = {
644+
"file_name": "setup.py",
641645
"file_path": "google-cloud-python/main/packages/pkg_a/setup.py",
642646
"repo_path": "packages/pkg_a/setup.py",
643647
"package_name": "pkg_a",
644648
"rule_name": "python_requires_check",
645649
"line_number": "123",
646650
"matched_string": "3.7",
647-
"context_line": "python_requires = '>=3.7'"
651+
"context_line": "python_requires = '>=3.7'",
652+
"dependency": "python",
653+
"version": "3.7"
648654
}
649655

650656
formatted = format_for_raw_csv(match)
651657

658+
assert formatted["file_name"] == "setup.py"
652659
assert formatted["file_path"] == "google-cloud-python/main/packages/pkg_a/setup.py"
653660
assert formatted["package_name"] == "pkg_a"
654661
assert formatted["rule_name"] == "python_requires_check"
655662
assert formatted["line_number"] == 123 # Int conversion
656663
assert formatted["matched_string"] == "3.7" # No formula wrapping
657664
assert formatted["context_line"] == "python_requires = '>=3.7'"
665+
assert formatted["dependency"] == "python"
666+
assert formatted["version"] == "3.7"
667+
668+
def test_format_for_raw_csv_fallback_filename():
669+
match = {
670+
"file_path": "google-cloud-python/main/packages/pkg_a/setup.py",
671+
"repo_path": "packages/pkg_a/setup.py",
672+
"package_name": "pkg_a",
673+
"rule_name": "python_requires_check",
674+
"line_number": "123",
675+
"matched_string": "3.7",
676+
"context_line": "python_requires = '>=3.7'",
677+
"dependency": "python",
678+
"version": "3.7"
679+
}
680+
681+
formatted = format_for_raw_csv(match)
682+
assert formatted["file_name"] == "setup.py"
658683

659684
def test_format_for_spreadsheet():
660685
match = {
686+
"file_name": "setup.py",
661687
"file_path": "google-cloud-python/main/packages/pkg_a/setup.py",
662688
"repo_path": "packages/pkg_a/setup.py",
663689
"package_name": "pkg_a",
664690
"rule_name": "python_requires_check",
665691
"line_number": 123,
666692
"matched_string": "3.7",
667-
"context_line": "python_requires = '>=3.7'"
693+
"context_line": "python_requires = '>=3.7'",
694+
"dependency": "python",
695+
"version": "3.7"
668696
}
669697

670698
# Without github_repo
671699
formatted_no_repo = format_for_spreadsheet(match)
700+
assert formatted_no_repo["file_name"] == "setup.py"
672701
assert formatted_no_repo["line_number"] == 123
673702
assert formatted_no_repo["matched_string"] == '="3.7"' # Decimal protection formula
703+
assert formatted_no_repo["dependency"] == "python"
704+
assert formatted_no_repo["version"] == "3.7"
674705

675706
# With github_repo
676707
formatted_repo = format_for_spreadsheet(match, github_repo="https://github.com/user/repo", branch="main")

scripts/version_scanner/version_scanner.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def load_config(self) -> List[Dict[str, str]]:
146146

147147
return resolved_rules
148148

149-
def scan_file(file_path: str, compiled_rules: List[Dict[str, re.Pattern]]) -> List[Dict[str, str]]:
149+
def scan_file(file_path: str, compiled_rules: List[Dict[str, re.Pattern]]) -> List[Dict[str, Any]]:
150150
"""
151151
Scan a single file for matching patterns.
152152
@@ -239,23 +239,29 @@ def _safe_int(value: Any, default: int = 0) -> int:
239239
return default
240240

241241

242-
def format_for_raw_csv(match: Dict[str, str]) -> Dict[str, str]:
242+
def format_for_raw_csv(match: Dict[str, Any]) -> Dict[str, Any]:
243243
"""Prepares a full raw dataset (n + x columns) with clean text values."""
244+
file_name = match.get("file_name")
245+
if not file_name and match.get("file_path"):
246+
file_name = os.path.basename(match.get("file_path"))
244247
return {
248+
"file_name": file_name or "",
245249
"file_path": match.get("file_path", ""),
246250
"package_name": match.get("package_name", ""),
247251
"rule_name": match.get("rule_name", ""),
248252
"line_number": _safe_int(match.get("line_number")),
249253
"matched_string": match.get("matched_string", ""),
250-
"context_line": _truncate_context(match.get("context_line", ""), match.get("matched_string", ""))
254+
"context_line": _truncate_context(match.get("context_line", ""), match.get("matched_string", "")),
255+
"dependency": match.get("dependency", ""),
256+
"version": match.get("version", "")
251257
}
252258

253259

254260
def format_for_spreadsheet(
255-
match: Dict[str, str],
261+
match: Dict[str, Any],
256262
github_repo: str = None,
257263
branch: str = "main"
258-
) -> Dict[str, str]:
264+
) -> Dict[str, Any]:
259265
"""Builds on top of raw CSV but applies Sheets-specific formulas."""
260266
formatted = format_for_raw_csv(match)
261267

@@ -270,7 +276,7 @@ def format_for_spreadsheet(
270276
return formatted
271277

272278

273-
def format_for_console(match: Dict[str, str]) -> str:
279+
def format_for_console(match: Dict[str, Any]) -> str:
274280
"""Prepares a slim, readable string representation (n columns) for stdout/logs."""
275281
file_path = match.get("file_path", "")
276282
line_number = match.get("line_number", "")
@@ -280,7 +286,7 @@ def format_for_console(match: Dict[str, str]) -> str:
280286

281287

282288

283-
def get_match_counts(matches: List[Dict[str, str]]) -> Tuple[Dict[str, int], Dict[str, int]]:
289+
def get_match_counts(matches: List[Dict[str, Any]]) -> Tuple[Dict[str, int], Dict[str, int]]:
284290
"""
285291
Aggregate matches by rule and by package.
286292
"""
@@ -333,7 +339,7 @@ def load_ignore_file(file_path: str) -> List[str]:
333339

334340
def write_csv_report(
335341
output_path: str,
336-
matches: List[Dict[str, str]]
342+
matches: List[Dict[str, Any]]
337343
) -> None:
338344
"""
339345
Write the collected matches to a CSV file.
@@ -342,7 +348,7 @@ def write_csv_report(
342348
output_path: Path to the output CSV file.
343349
matches: A list of dictionaries containing match details.
344350
"""
345-
fieldnames = ["file_path", "package_name", "rule_name", "line_number", "matched_string", "context_line"]
351+
fieldnames = ["file_name", "file_path", "package_name", "rule_name", "dependency", "version", "line_number", "matched_string", "context_line"]
346352

347353
try:
348354
with open(output_path, 'w', encoding='utf-8', newline='') as f:
@@ -360,7 +366,7 @@ def write_csv_report(
360366
print(f"Error writing CSV report: {e}", file=sys.stderr)
361367

362368

363-
def upload_to_drive(csv_path: str, matches: List[Dict[str, str]], github_repo: str = None, branch: str = "main") -> str:
369+
def upload_to_drive(csv_path: str, matches: List[Dict[str, Any]], github_repo: str = None, branch: str = "main") -> str:
364370
"""
365371
Upload matches to a Google Sheet in Drive.
366372
"""
@@ -391,13 +397,16 @@ def upload_to_drive(csv_path: str, matches: List[Dict[str, str]], github_repo: s
391397
spreadsheet_id = spreadsheet.get('spreadsheetId')
392398

393399
# Prepare data
394-
values = [["file_path", "package_name", "rule_name", "line_number", "matched_string", "context_line"]]
400+
values = [["file_name", "file_path", "package_name", "rule_name", "dependency", "version", "line_number", "matched_string", "context_line"]]
395401
for m in matches:
396402
formatted_m = format_for_spreadsheet(m, github_repo=github_repo, branch=branch)
397403
values.append([
404+
formatted_m.get("file_name", ""),
398405
formatted_m.get("file_path", ""),
399406
formatted_m.get("package_name", ""),
400407
formatted_m.get("rule_name", ""),
408+
formatted_m.get("dependency", ""),
409+
formatted_m.get("version", ""),
401410
str(formatted_m.get("line_number", "")),
402411
formatted_m.get("matched_string", ""),
403412
formatted_m.get("context_line", "")
@@ -460,7 +469,7 @@ def scan_repository(
460469
target_packages: List[str] = None,
461470
ignore_dirs: List[str] = None,
462471
version_string: str = None
463-
) -> List[Dict[str, str]]:
472+
) -> List[Dict[str, Any]]:
464473
"""
465474
Scans the repository directory tree applying resolved regex patterns to files.
466475
@@ -510,7 +519,6 @@ def scan_repository(
510519
files = [f for f in files if f.lower() not in ignore_lower]
511520

512521
rel_root = os.path.relpath(root, root_path)
513-
parts = rel_root.split(os.sep)
514522

515523
# Layout-agnostic generic subdirectory filtering
516524
if target_packages:
@@ -559,6 +567,7 @@ def scan_repository(
559567
display_path = rel_file_path
560568

561569
for m in matches:
570+
m["file_name"] = file
562571
m["file_path"] = display_path
563572
m["repo_path"] = rel_file_path
564573
m["package_name"] = package_name
@@ -663,7 +672,7 @@ def main():
663672

664673
print(f"Starting scan for dependency: {args.dependency} version: {args.version}")
665674
print(f"Root path: {args.path}")
666-
print(f"Targets to scan:")
675+
print("Targets to scan:")
667676
if target_packages:
668677
for pkg in target_packages:
669678
print(f" - {os.path.join(args.path, pkg)}")

0 commit comments

Comments
 (0)