3030 "test-windows" : r"^Test (win-64|windows) / " ,
3131}
3232
33- INDEX_FILENAME = "job_index.json"
34-
3533ANSI_ESCAPE = re .compile (r"\x1B\[[0-9;]*[A-Za-z]" )
3634PYTEST_NODE_ID = re .compile (r"tests/\S+\.py::\S+" )
3735PYTEST_TEST_OUTCOME = re .compile (r"(tests/\S+\.py::\S+)\s+(PASSED|FAILED|ERROR|SKIPPED|XFAIL|XPASS)\b" )
3836
37+ # GHA log format markers used to identify which test suite is active.
38+ # `gh api` logs: ##[group]<step-name> opens a section, ##[endgroup] closes it.
39+ GHA_GROUP = re .compile (r"##\[group\](.+)" )
40+ # `gh run view --log` logs: tab-separated <job>\t<step>\t<timestamp>\t<content>
41+ GHA_LOG_LINE = re .compile (r"^[^\t]+\t([^\t]+)\t[^\t]+\t(.*)" , re .DOTALL )
42+
43+ # Map step-name substrings to canonical test suite names.
44+ STEP_SUITE_PATTERNS : list [tuple [re .Pattern [str ], str ]] = [
45+ (re .compile (r"run cuda\.bindings tests" , re .IGNORECASE ), "cuda_bindings" ),
46+ (re .compile (r"run cuda\.core tests" , re .IGNORECASE ), "cuda_core" ),
47+ (re .compile (r"run cuda\.pathfinder tests" , re .IGNORECASE ), "cuda_pathfinder" ),
48+ ]
49+
50+
51+ def step_name_to_suite (step_name : str ) -> str :
52+ for pattern , suite in STEP_SUITE_PATTERNS :
53+ if pattern .search (step_name ):
54+ return suite
55+ return ""
56+
3957
4058@dataclasses .dataclass (frozen = True )
4159class ConfigResult :
@@ -52,8 +70,6 @@ class ConfigLogs:
5270 name : str
5371 job_ids : list [int ]
5472 log_paths : list [Path ]
55- # job_id -> suite name extracted from the job name
56- job_names : dict [int , str ] = dataclasses .field (default_factory = dict )
5773
5874
5975def run_gh (* args : str , check : bool = True ) -> subprocess .CompletedProcess [str ]:
@@ -103,17 +119,36 @@ def download_job_log(repo: str, run_id: str, job_id: int, out_path: Path) -> boo
103119 return False
104120
105121
106- def extract_test_status_sets (text : str ) -> tuple [set [str ], set [str ]]:
122+ def extract_test_status_sets (text : str ) -> tuple [set [str ], set [str ], dict [str , str ]]:
123+ """Parse pytest output and return (skipped, non_skipped, test_id->suite)."""
107124 skipped : set [str ] = set ()
108125 non_skipped : set [str ] = set ()
126+ test_suites : dict [str , str ] = {}
127+ current_suite = ""
109128
110129 for raw_line in text .splitlines ():
111- line = ANSI_ESCAPE .sub ("" , raw_line ).replace ("\\ " , "/" )
130+ # Handle `gh run view --log` tab-separated format.
131+ # Each line: <job>\t<step>\t<timestamp>\t<content>
132+ if log_match := GHA_LOG_LINE .match (raw_line ):
133+ suite = step_name_to_suite (log_match .group (1 ))
134+ if suite :
135+ current_suite = suite
136+ line = ANSI_ESCAPE .sub ("" , log_match .group (2 )).replace ("\\ " , "/" )
137+ else :
138+ line = ANSI_ESCAPE .sub ("" , raw_line ).replace ("\\ " , "/" )
139+ # Handle `gh api` log format: ##[group]<step-name> opens a section.
140+ if group_match := GHA_GROUP .search (line ):
141+ suite = step_name_to_suite (group_match .group (1 ))
142+ if suite :
143+ current_suite = suite
144+ continue
112145
113146 # Parse per-test outcomes first so PASS/FAIL lines disqualify tests.
114147 for test_id , outcome in PYTEST_TEST_OUTCOME .findall (line ):
115148 if outcome == "SKIPPED" :
116149 skipped .add (test_id )
150+ if current_suite :
151+ test_suites .setdefault (test_id , current_suite )
117152 else :
118153 non_skipped .add (test_id )
119154
@@ -124,31 +159,10 @@ def extract_test_status_sets(text: str) -> tuple[set[str], set[str]]:
124159 # include a node id but don't match the strict outcome pattern above.
125160 for test_id in PYTEST_NODE_ID .findall (line ):
126161 skipped .add (test_id )
162+ if current_suite :
163+ test_suites .setdefault (test_id , current_suite )
127164
128- return skipped , non_skipped
129-
130-
131- def extract_suite_name (job_name : str , config_name : str ) -> str :
132- """Return the test suite portion of a job name (first word after the config prefix)."""
133- pattern = CONFIG_PATTERNS .get (config_name , "" )
134- if pattern :
135- match = re .match (pattern , job_name )
136- if match :
137- remainder = job_name [match .end () :]
138- parts = remainder .split ()
139- return parts [0 ] if parts else job_name
140- return job_name
141-
142-
143- def save_job_index (logs_root : Path , index : dict [str , dict [str , str ]]) -> None :
144- (logs_root / INDEX_FILENAME ).write_text (json .dumps (index , indent = 2 ), encoding = "utf-8" )
145-
146-
147- def load_job_index (logs_root : Path ) -> dict [str , dict [str , str ]]:
148- index_path = logs_root / INDEX_FILENAME
149- if index_path .exists ():
150- return json .loads (index_path .read_text (encoding = "utf-8" ))
151- return {}
165+ return skipped , non_skipped , test_suites
152166
153167
154168def match_job_ids (jobs : Iterable [dict ], pattern : str ) -> list [int ]:
@@ -158,58 +172,35 @@ def match_job_ids(jobs: Iterable[dict], pattern: str) -> list[int]:
158172
159173def discover_config_logs (logs_root : Path ) -> list [ConfigLogs ]:
160174 configs : list [ConfigLogs ] = []
161- index = load_job_index (logs_root )
162175
163176 for config in CONFIG_PATTERNS :
164177 config_dir = logs_root / config
165178 log_paths = sorted (config_dir .glob ("*.log" )) if config_dir .exists () else []
166179 job_ids : list [int ] = []
167- job_names : dict [int , str ] = {}
168- config_index = index .get (config , {})
169-
170180 for log_path in log_paths :
171181 with contextlib .suppress (ValueError ):
172- job_id = int (log_path .stem )
173- job_ids .append (job_id )
174- suite = config_index .get (str (job_id ), "" )
175- if suite :
176- job_names [job_id ] = suite
177-
178- configs .append (ConfigLogs (name = config , job_ids = job_ids , log_paths = log_paths , job_names = job_names ))
182+ job_ids .append (int (log_path .stem ))
183+ configs .append (ConfigLogs (name = config , job_ids = job_ids , log_paths = log_paths ))
179184
180185 return configs
181186
182187
183188def download_config_logs (jobs : list [dict ], repo : str , run_id : str , logs_root : Path ) -> list [ConfigLogs ]:
184189 configs : list [ConfigLogs ] = []
185- index : dict [str , dict [str , str ]] = {}
186190
187191 for config , pattern in CONFIG_PATTERNS .items ():
188192 config_dir = logs_root / config
189193 job_ids = match_job_ids (jobs , pattern )
190194 log_paths : list [Path ] = []
191195
192- # Build job_id -> suite_name from job metadata before downloading logs.
193- regex = re .compile (pattern )
194- job_names : dict [int , str ] = {}
195- for job in jobs :
196- job_name = str (job .get ("name" , "" ))
197- if not regex .search (job_name ):
198- continue
199- job_id = int (job ["id" ])
200- if job_id in job_ids :
201- job_names [job_id ] = extract_suite_name (job_name , config )
202-
203196 for job_id in job_ids :
204197 log_path = config_dir / f"{ job_id } .log"
205198 if not log_path .exists () and not download_job_log (repo , run_id , job_id , log_path ):
206199 continue
207200 log_paths .append (log_path )
208201
209- configs .append (ConfigLogs (name = config , job_ids = job_ids , log_paths = log_paths , job_names = job_names ))
210- index [config ] = {str (jid ): name for jid , name in job_names .items ()}
202+ configs .append (ConfigLogs (name = config , job_ids = job_ids , log_paths = log_paths ))
211203
212- save_job_index (logs_root , index )
213204 return configs
214205
215206
@@ -224,17 +215,12 @@ def analyze_config_logs(config_logs: list[ConfigLogs]) -> list[ConfigResult]:
224215 for log_path in config .log_paths :
225216 text = log_path .read_text (encoding = "utf-8" , errors = "replace" )
226217
227- skipped_in_log , non_skipped_in_log = extract_test_status_sets (text )
218+ skipped_in_log , non_skipped_in_log , suites_in_log = extract_test_status_sets (text )
228219 skipped_any .update (skipped_in_log )
229220 non_skipped_any .update (non_skipped_in_log )
230-
231- # Associate skipped test IDs with the suite derived from the job name.
232- with contextlib .suppress (ValueError ):
233- job_id = int (log_path .stem )
234- suite = config .job_names .get (job_id , "" )
235- if suite :
236- for test_id in skipped_in_log :
237- test_suites .setdefault (test_id , suite )
221+ # First log to identify a test's suite wins (setdefault semantics).
222+ for test_id , suite in suites_in_log .items ():
223+ test_suites .setdefault (test_id , suite )
238224
239225 # For sharded matrices, a test may only appear in one log. Treat it as
240226 # config-skipped if it is skipped at least once and never non-skipped
@@ -280,7 +266,7 @@ def build_summary(results: list[ConfigResult]) -> str:
280266 "_Note: the test `tests/test_cuda.py::test_always_skip` is expected to be skipped in all configurations, but is missing._"
281267 )
282268
283- # Merge test->suite mappings across all configs (first one seen wins).
269+ # Merge test->suite mappings across all configs (first config to identify wins).
284270 test_suites : dict [str , str ] = {}
285271 for result in results :
286272 for test_id , suite in result .test_suites .items ():
0 commit comments