Skip to content

Commit ed9c2a2

Browse files
hoe-jocastler
authored andcommitted
[ai checker] fix input files aggregation
1 parent 965fd86 commit ed9c2a2

3 files changed

Lines changed: 104 additions & 38 deletions

File tree

validation/ai_checker/ai_checker.bzl

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ load("//bazel/rules/rules_score:providers.bzl", "ArchitecturalDesignInfo")
2323
# Shared implementation
2424
# ============================================================================
2525

26-
def _run_ai_analysis(ctx, analysis_files, all_input_files, input_dirs, dep_dirs):
26+
def _run_ai_analysis(ctx, analysis_files, all_input_files, input_dirs, dep_dirs, req_files = None):
2727
"""Common implementation for all AI artefact analysis test rules.
2828
2929
Args:
@@ -32,6 +32,10 @@ def _run_ai_analysis(ctx, analysis_files, all_input_files, input_dirs, dep_dirs)
3232
all_input_files: All files needed as action inputs (incl. deps for resolution).
3333
input_dirs: Dict of directories containing analysis files.
3434
dep_dirs: Dict of dependency directories (for link resolution).
35+
req_files: Optional list of individual files to register with TRLC instead
36+
of scanning the entire input directory. When set, only these files are
37+
parsed; other files present in the same directory are ignored. This
38+
avoids picking up unreferenced files that may fail TRLC validation.
3539
3640
Returns:
3741
List of providers (DefaultInfo).
@@ -63,6 +67,14 @@ def _run_ai_analysis(ctx, analysis_files, all_input_files, input_dirs, dep_dirs)
6367
if extra_dir != input_dir:
6468
args.add("--deps", extra_dir)
6569

70+
# When individual req files are provided, pass them explicitly so the
71+
# extractor registers only those files and ignores other files present in
72+
# the same directory (e.g. files not declared in Bazel srcs that may fail
73+
# TRLC validation).
74+
if req_files:
75+
for f in req_files:
76+
args.add("--req-file", f)
77+
6678
args.add("--output", json_report.path)
6779
args.add("--html", html_report.path)
6880
args.add("--guidelines-output", guidelines_output_dir.path)
@@ -212,7 +224,7 @@ def _trlc_requirements_ai_test_impl(ctx):
212224
for f in dep_reqs + spec_files:
213225
dep_dirs[f.dirname] = True
214226

215-
return _run_ai_analysis(ctx, analysis_files, all_files, input_dirs, dep_dirs)
227+
return _run_ai_analysis(ctx, analysis_files, all_files, input_dirs, dep_dirs, req_files = analysis_files)
216228

217229
trlc_requirements_ai_test = rule(
218230
implementation = _trlc_requirements_ai_test_impl,

validation/ai_checker/src/ai_checker/orchestrator.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,10 @@ def __init__(
166166
self._extracted_artefacts = None
167167

168168
def analyze_directory(
169-
self, input_dir: str, dependency_dirs: list[str] | None = None
169+
self,
170+
input_dir: str,
171+
dependency_dirs: list[str] | None = None,
172+
req_files: list[str] | None = None,
170173
) -> AnalysisResults:
171174
"""
172175
Extract and analyze artefacts from a directory using TRLC
@@ -176,12 +179,20 @@ def analyze_directory(
176179
input_dir: Path to directory containing files to analyze
177180
dependency_dirs: Optional list of directories containing
178181
dependencies for link resolution
182+
req_files: Optional list of individual TRLC files to register
183+
instead of scanning the entire input directory. When set,
184+
only these files are parsed so that unreferenced files
185+
present in the same directory are not picked up.
179186
180187
Returns:
181188
AnalysisResults containing structured analyses for each artefact
182189
"""
183190
# Initialize TRLC requirement extractor
184-
self.artefact_extractor = RequirementExtractor(input_dir, dependency_dirs)
191+
self.artefact_extractor = RequirementExtractor(
192+
input_dir,
193+
dependency_dirs,
194+
req_files=req_files or [],
195+
)
185196

186197
# Extract artefacts
187198
artefacts = self.artefact_extractor.extract()
@@ -256,6 +267,18 @@ def argument_parser() -> argparse.ArgumentParser:
256267
parser = argparse.ArgumentParser(
257268
description="Analyze TRLC requirements against engineering guidelines"
258269
)
270+
parser.add_argument(
271+
"--req-file",
272+
action="append",
273+
default=[],
274+
dest="req_file",
275+
help=(
276+
"Individual TRLC file to register for analysis "
277+
"(can be specified multiple times). When provided, only these "
278+
"files are registered from the input directory instead of "
279+
"scanning the entire directory."
280+
),
281+
)
259282
parser.add_argument(
260283
"-i",
261284
"--input",
@@ -362,7 +385,11 @@ def main() -> None:
362385
max_concurrent_requests=args.max_concurrent_requests,
363386
max_batch_chars=args.max_batch_chars,
364387
)
365-
analysis_results = orchestrator.analyze_directory(args.input, args.deps)
388+
analysis_results = orchestrator.analyze_directory(
389+
args.input,
390+
args.deps,
391+
req_files=args.req_file or None,
392+
)
366393

367394
# Format and output results
368395
orchestrator.format_and_output(

validation/ai_checker/src/ai_checker/requirement_extractor.py

Lines changed: 60 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@ class RequirementExtractor(ArtefactExtractor):
3232
"""Extracts structured requirement data from TRLC files."""
3333

3434
def __init__(
35-
self, input_directory: str, dependency_directories: list[str] | None = None
35+
self,
36+
input_directory: str,
37+
dependency_directories: list[str] | None = None,
38+
req_files: list[str] | None = None,
3639
):
3740
"""
3841
Initialize the RequirementExtractor with directory paths.
@@ -42,18 +45,28 @@ def __init__(
4245
analyze
4346
dependency_directories: Optional list of additional
4447
directories for link resolution
48+
req_files: Optional list of individual TRLC files to register
49+
instead of scanning the entire input directory. When set,
50+
only these files are registered so that other files present
51+
in the same directory (e.g. files not declared in Bazel
52+
srcs) are not picked up by TRLC.
4553
"""
4654
self.input_directory = os.path.abspath(input_directory)
4755
self.dependency_directories = [
4856
os.path.abspath(d) for d in (dependency_directories or [])
4957
]
58+
self.req_files = [os.path.abspath(f) for f in (req_files or [])]
5059
self.symbols: trlc.ast.Symbol_Table | None = None
5160

5261
def parse_trlc_files(self) -> trlc.ast.Symbol_Table:
5362
"""
5463
Parse TRLC files in the specified directories.
5564
56-
Registers all directories (input + dependencies) with TRLC for link resolution.
65+
When ``req_files`` was supplied at construction time, only those
66+
individual files are registered from the input directory; dependency
67+
directories are still registered in full for link resolution.
68+
When ``req_files`` is empty (the default), all directories including
69+
the input directory are registered (original behaviour).
5770
5871
Returns:
5972
Symbol table containing parsed TRLC objects
@@ -64,37 +77,51 @@ def parse_trlc_files(self) -> trlc.ast.Symbol_Table:
6477
message_handler = Message_Handler()
6578
source_manager = Source_Manager(message_handler)
6679

67-
# Collect all directories and filter out overlapping ones
68-
all_dirs = [self.input_directory] + self.dependency_directories
69-
70-
# Remove duplicates and filter out directories that are
71-
# subdirectories of others
72-
unique_dirs = []
73-
for dir_path in sorted(set(all_dirs)):
74-
# Check if this directory is a subdirectory of any already
75-
# registered directory
76-
is_subdir = False
77-
for existing_dir in unique_dirs:
78-
if dir_path.startswith(existing_dir + os.sep):
79-
is_subdir = True
80-
break
81-
82-
# Also check if any existing directory is a subdirectory of this one
83-
# In that case, remove the existing one and add this one
84-
dirs_to_remove = []
85-
for i, existing_dir in enumerate(unique_dirs):
86-
if existing_dir.startswith(dir_path + os.sep):
87-
dirs_to_remove.append(i)
88-
89-
for i in reversed(dirs_to_remove):
90-
unique_dirs.pop(i)
91-
92-
if not is_subdir:
93-
unique_dirs.append(dir_path)
94-
95-
# Register all unique, non-overlapping directories
96-
for dir_path in unique_dirs:
97-
source_manager.register_directory(dir_path)
80+
if self.req_files:
81+
# Register only the specific req files declared in the Bazel target.
82+
# This avoids picking up extra .trlc files in the same directory that
83+
# are not part of the target and may fail TRLC validation.
84+
for file_path in self.req_files:
85+
source_manager.register_file(file_path)
86+
87+
# Register dependency directories in full for cross-reference / link
88+
# resolution (these dirs are controlled by Bazel deps and are expected
89+
# to be valid).
90+
for dep_dir in self.dependency_directories:
91+
source_manager.register_directory(dep_dir)
92+
else:
93+
# Original behaviour: register all directories (input + deps).
94+
# Collect all directories and filter out overlapping ones.
95+
all_dirs = [self.input_directory] + self.dependency_directories
96+
97+
# Remove duplicates and filter out directories that are
98+
# subdirectories of others
99+
unique_dirs = []
100+
for dir_path in sorted(set(all_dirs)):
101+
# Check if this directory is a subdirectory of any already
102+
# registered directory
103+
is_subdir = False
104+
for existing_dir in unique_dirs:
105+
if dir_path.startswith(existing_dir + os.sep):
106+
is_subdir = True
107+
break
108+
109+
# Also check if any existing directory is a subdirectory of this one
110+
# In that case, remove the existing one and add this one
111+
dirs_to_remove = []
112+
for i, existing_dir in enumerate(unique_dirs):
113+
if existing_dir.startswith(dir_path + os.sep):
114+
dirs_to_remove.append(i)
115+
116+
for i in reversed(dirs_to_remove):
117+
unique_dirs.pop(i)
118+
119+
if not is_subdir:
120+
unique_dirs.append(dir_path)
121+
122+
# Register all unique, non-overlapping directories
123+
for dir_path in unique_dirs:
124+
source_manager.register_directory(dir_path)
98125

99126
symbols = source_manager.process()
100127
if symbols is None:

0 commit comments

Comments
 (0)