2727import yaml
2828
2929class ConfigManager :
30- """Handles loading and interpolation of regex configurations."""
30+ """
31+ Handles loading, validation, and interpolation of the regex configuration rules.
32+
33+ Uses template-based regex configurations from a YAML file and interpolates them
34+ with dependency properties and computed version segments (e.g., major, minor) to
35+ generate active regex search patterns dynamically.
36+ """
3137
3238 def __init__ (self , config_path : str , dependency : str , version : str ):
39+ """
40+ Initializes the configuration manager.
41+
42+ Args:
43+ config_path: Path to the YAML configuration file containing regex templates.
44+ dependency: Name of the dependency to search for (e.g., "python", "protobuf").
45+ version: Specific target version string to search for (e.g., "3.7", "4.25.8").
46+ """
3347 self .config_path = config_path
3448 self .dependency = dependency
3549 self .version = version
3650 self .variables = self ._compute_variables ()
3751
3852 def _compute_variables (self ) -> Dict [str , str ]:
39- """Compute variables for interpolation from version string."""
53+ """
54+ Parses the version string and computes variables for template interpolation.
55+
56+ Splits the version string by '.' and generates segments like major, minor,
57+ patch, minor+1, and minor-1. These variables are used in regex templates
58+ (e.g., `{minor_plus_one}` to search for Python 3.8 when EOLing 3.7).
59+
60+ Examples:
61+ If version is "3.7" and dependency is "python":
62+ vars = {
63+ "name": "python",
64+ "version": "3.7",
65+ "major": "3",
66+ "minor": "7",
67+ "minor_plus_one": "8",
68+ "minor_minus_one": "6"
69+ }
70+ If version is "4.25.8" and dependency is "protobuf":
71+ vars = {
72+ "name": "protobuf",
73+ "version": "4.25.8",
74+ "major": "4",
75+ "minor": "25",
76+ "patch": "8",
77+ "minor_plus_one": "26",
78+ "minor_minus_one": "24"
79+ }
80+
81+ Returns:
82+ A dictionary mapping variable placeholder names to their resolved string values.
83+ """
4084 vars = {
4185 "name" : self .dependency ,
4286 "version" : self .version ,
@@ -148,7 +192,22 @@ def format_match_for_csv(
148192 branch : str = "main"
149193) -> Dict [str , str ]:
150194 """
151- Format a match result for CSV output, adding GitHub links if requested.
195+ Formats a raw match dictionary for clean CSV presentation and imports.
196+
197+ Cleans long context lines by truncating them around the match location to prevent
198+ extreme cell overflow in spreadsheets. Optionally transforms line numbers into
199+ clickable `=HYPERLINK(...)` formulas linking directly to the exact file and line
200+ number in GitHub.
201+
202+ Args:
203+ match: A match dictionary containing 'file_path', 'repo_path', 'rule_name',
204+ 'line_number', 'matched_string', and 'context_line'.
205+ github_repo: Optional GitHub repository base URL (e.g., "https://github.com/user/repo").
206+ If provided, triggers the hyperlink generation.
207+ branch: Optional branch name to build the GitHub blob URL (defaults to "main").
208+
209+ Returns:
210+ A copy of the match dictionary with formatted/truncated values, suitable for CSV writing.
152211 """
153212 formatted = match .copy ()
154213
@@ -368,16 +427,26 @@ def scan_repository(
368427 version_string : str = None
369428) -> List [Dict [str , str ]]:
370429 """
371- Scan repository for matching patterns.
430+ Scans the repository directory tree applying resolved regex patterns to files.
431+
432+ Walks the directory structure starting at the root path, checking filenames and
433+ file contents line-by-line against compiled patterns. Supports case-insensitive
434+ directory/file ignore patterns, dynamic package filter checks for layout-agnostic
435+ subfolders, and filename-based version string matching.
372436
373437 Args:
374- root_path: Path to the repository root.
375- rules: A list of dictionaries containing 'name' and 'pattern'.
376- target_packages: A list of package paths to include (e.g., ['packages/pkg_a']).
377- If None or empty, all packages are scanned.
438+ root_path: Absolute or relative path to the directory tree root to scan.
439+ rules: A list of dictionaries containing 'name' (rule name) and 'pattern'
440+ (regex search pattern string).
441+ target_packages: Optional list of specific subdirectory paths to restrict scanning
442+ (e.g., ['packages/pkg_a', 'generated/pkg_b']). If None or empty,
443+ performs a full recursive scan of the repository.
444+ ignore_dirs: Optional list of directory names or glob-like files to ignore (case-insensitive).
445+ version_string: Optional target version string (e.g. "3.7") to scan for in filenames.
378446
379447 Returns:
380- A list of match details.
448+ A list of dictionaries detailing each match: 'file_path', 'repo_path',
449+ 'package_name', 'rule_name', 'line_number', 'matched_string', 'context_line'.
381450 """
382451 ignore_lower = {i .lower () for i in ignore_dirs } if ignore_dirs else set ()
383452 results = []
@@ -408,17 +477,21 @@ def scan_repository(
408477 rel_root = os .path .relpath (root , root_path )
409478 parts = rel_root .split (os .sep )
410479
411- # Monorepo filtering
412- if target_packages and parts [0 ] == "packages" :
413- if len (parts ) >= 2 :
414- current_package_path = os .path .join (parts [0 ], parts [1 ])
415- if current_package_path not in target_packages :
416- # Skip this directory and all subdirectories
417- dirs [:] = []
418- continue
419- else :
420- # We are in the "packages" directory itself. Continue to walk.
421- pass
480+ # Layout-agnostic generic subdirectory filtering
481+ if target_packages :
482+ norm_targets = {os .path .normpath (tp ) for tp in target_packages }
483+ is_valid_path = False
484+ for target in norm_targets :
485+ if (rel_root == "." or
486+ rel_root == target or
487+ rel_root .startswith (target + os .sep ) or
488+ target .startswith (rel_root + os .sep )):
489+ is_valid_path = True
490+ break
491+ if not is_valid_path :
492+ # Skip searching this directory and all its descendants
493+ dirs [:] = []
494+ continue
422495
423496 for file in files :
424497 file_path = os .path .join (root , file )
@@ -438,7 +511,9 @@ def scan_repository(
438511
439512 package_name = ""
440513 path_parts = rel_file_path .split (os .sep )
441- if len (path_parts ) >= 2 and path_parts [0 ] == "packages" :
514+ # Assume package name is the folder directly under standard package root directories
515+ package_roots = {"packages" , "generated" , "handwritten" , "third_party" }
516+ if len (path_parts ) >= 2 and path_parts [0 ] in package_roots :
442517 package_name = path_parts [1 ]
443518
444519 root_parts = os .path .abspath (root_path ).split (os .sep )
@@ -531,7 +606,11 @@ def main():
531606 # Resolve target packages if filtering is requested
532607 target_packages = []
533608 if args .package :
534- target_packages .append (os .path .join ("packages" , args .package ))
609+ # If the folder exists under root path as-is, use it. Otherwise fallback to packages/ prefix.
610+ if os .path .exists (os .path .join (args .path , args .package )):
611+ target_packages .append (args .package )
612+ else :
613+ target_packages .append (os .path .join ("packages" , args .package ))
535614 elif args .package_file :
536615 target_packages = read_package_file (args .package_file )
537616
0 commit comments