Skip to content

Commit 221c3b7

Browse files
committed
Try again
1 parent dbe203c commit 221c3b7

1 file changed

Lines changed: 52 additions & 66 deletions

File tree

ci/tools/report_universally_skipped_tests.py

Lines changed: 52 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,30 @@
3030
"test-windows": r"^Test (win-64|windows) / ",
3131
}
3232

33-
INDEX_FILENAME = "job_index.json"
34-
3533
ANSI_ESCAPE = re.compile(r"\x1B\[[0-9;]*[A-Za-z]")
3634
PYTEST_NODE_ID = re.compile(r"tests/\S+\.py::\S+")
3735
PYTEST_TEST_OUTCOME = re.compile(r"(tests/\S+\.py::\S+)\s+(PASSED|FAILED|ERROR|SKIPPED|XFAIL|XPASS)\b")
3836

37+
# GHA log format markers used to identify which test suite is active.
38+
# `gh api` logs: ##[group]<step-name> opens a section, ##[endgroup] closes it.
39+
GHA_GROUP = re.compile(r"##\[group\](.+)")
40+
# `gh run view --log` logs: tab-separated <job>\t<step>\t<timestamp>\t<content>
41+
GHA_LOG_LINE = re.compile(r"^[^\t]+\t([^\t]+)\t[^\t]+\t(.*)", re.DOTALL)
42+
43+
# Map step-name substrings to canonical test suite names.
44+
STEP_SUITE_PATTERNS: list[tuple[re.Pattern[str], str]] = [
45+
(re.compile(r"run cuda\.bindings tests", re.IGNORECASE), "cuda_bindings"),
46+
(re.compile(r"run cuda\.core tests", re.IGNORECASE), "cuda_core"),
47+
(re.compile(r"run cuda\.pathfinder tests", re.IGNORECASE), "cuda_pathfinder"),
48+
]
49+
50+
51+
def step_name_to_suite(step_name: str) -> str:
52+
for pattern, suite in STEP_SUITE_PATTERNS:
53+
if pattern.search(step_name):
54+
return suite
55+
return ""
56+
3957

4058
@dataclasses.dataclass(frozen=True)
4159
class ConfigResult:
@@ -52,8 +70,6 @@ class ConfigLogs:
5270
name: str
5371
job_ids: list[int]
5472
log_paths: list[Path]
55-
# job_id -> suite name extracted from the job name
56-
job_names: dict[int, str] = dataclasses.field(default_factory=dict)
5773

5874

5975
def run_gh(*args: str, check: bool = True) -> subprocess.CompletedProcess[str]:
@@ -103,17 +119,36 @@ def download_job_log(repo: str, run_id: str, job_id: int, out_path: Path) -> boo
103119
return False
104120

105121

106-
def extract_test_status_sets(text: str) -> tuple[set[str], set[str]]:
122+
def extract_test_status_sets(text: str) -> tuple[set[str], set[str], dict[str, str]]:
123+
"""Parse pytest output and return (skipped, non_skipped, test_id->suite)."""
107124
skipped: set[str] = set()
108125
non_skipped: set[str] = set()
126+
test_suites: dict[str, str] = {}
127+
current_suite = ""
109128

110129
for raw_line in text.splitlines():
111-
line = ANSI_ESCAPE.sub("", raw_line).replace("\\", "/")
130+
# Handle `gh run view --log` tab-separated format.
131+
# Each line: <job>\t<step>\t<timestamp>\t<content>
132+
if log_match := GHA_LOG_LINE.match(raw_line):
133+
suite = step_name_to_suite(log_match.group(1))
134+
if suite:
135+
current_suite = suite
136+
line = ANSI_ESCAPE.sub("", log_match.group(2)).replace("\\", "/")
137+
else:
138+
line = ANSI_ESCAPE.sub("", raw_line).replace("\\", "/")
139+
# Handle `gh api` log format: ##[group]<step-name> opens a section.
140+
if group_match := GHA_GROUP.search(line):
141+
suite = step_name_to_suite(group_match.group(1))
142+
if suite:
143+
current_suite = suite
144+
continue
112145

113146
# Parse per-test outcomes first so PASS/FAIL lines disqualify tests.
114147
for test_id, outcome in PYTEST_TEST_OUTCOME.findall(line):
115148
if outcome == "SKIPPED":
116149
skipped.add(test_id)
150+
if current_suite:
151+
test_suites.setdefault(test_id, current_suite)
117152
else:
118153
non_skipped.add(test_id)
119154

@@ -124,31 +159,10 @@ def extract_test_status_sets(text: str) -> tuple[set[str], set[str]]:
124159
# include a node id but don't match the strict outcome pattern above.
125160
for test_id in PYTEST_NODE_ID.findall(line):
126161
skipped.add(test_id)
162+
if current_suite:
163+
test_suites.setdefault(test_id, current_suite)
127164

128-
return skipped, non_skipped
129-
130-
131-
def extract_suite_name(job_name: str, config_name: str) -> str:
132-
"""Return the test suite portion of a job name (first word after the config prefix)."""
133-
pattern = CONFIG_PATTERNS.get(config_name, "")
134-
if pattern:
135-
match = re.match(pattern, job_name)
136-
if match:
137-
remainder = job_name[match.end() :]
138-
parts = remainder.split()
139-
return parts[0] if parts else job_name
140-
return job_name
141-
142-
143-
def save_job_index(logs_root: Path, index: dict[str, dict[str, str]]) -> None:
144-
(logs_root / INDEX_FILENAME).write_text(json.dumps(index, indent=2), encoding="utf-8")
145-
146-
147-
def load_job_index(logs_root: Path) -> dict[str, dict[str, str]]:
148-
index_path = logs_root / INDEX_FILENAME
149-
if index_path.exists():
150-
return json.loads(index_path.read_text(encoding="utf-8"))
151-
return {}
165+
return skipped, non_skipped, test_suites
152166

153167

154168
def match_job_ids(jobs: Iterable[dict], pattern: str) -> list[int]:
@@ -158,58 +172,35 @@ def match_job_ids(jobs: Iterable[dict], pattern: str) -> list[int]:
158172

159173
def discover_config_logs(logs_root: Path) -> list[ConfigLogs]:
160174
configs: list[ConfigLogs] = []
161-
index = load_job_index(logs_root)
162175

163176
for config in CONFIG_PATTERNS:
164177
config_dir = logs_root / config
165178
log_paths = sorted(config_dir.glob("*.log")) if config_dir.exists() else []
166179
job_ids: list[int] = []
167-
job_names: dict[int, str] = {}
168-
config_index = index.get(config, {})
169-
170180
for log_path in log_paths:
171181
with contextlib.suppress(ValueError):
172-
job_id = int(log_path.stem)
173-
job_ids.append(job_id)
174-
suite = config_index.get(str(job_id), "")
175-
if suite:
176-
job_names[job_id] = suite
177-
178-
configs.append(ConfigLogs(name=config, job_ids=job_ids, log_paths=log_paths, job_names=job_names))
182+
job_ids.append(int(log_path.stem))
183+
configs.append(ConfigLogs(name=config, job_ids=job_ids, log_paths=log_paths))
179184

180185
return configs
181186

182187

183188
def download_config_logs(jobs: list[dict], repo: str, run_id: str, logs_root: Path) -> list[ConfigLogs]:
184189
configs: list[ConfigLogs] = []
185-
index: dict[str, dict[str, str]] = {}
186190

187191
for config, pattern in CONFIG_PATTERNS.items():
188192
config_dir = logs_root / config
189193
job_ids = match_job_ids(jobs, pattern)
190194
log_paths: list[Path] = []
191195

192-
# Build job_id -> suite_name from job metadata before downloading logs.
193-
regex = re.compile(pattern)
194-
job_names: dict[int, str] = {}
195-
for job in jobs:
196-
job_name = str(job.get("name", ""))
197-
if not regex.search(job_name):
198-
continue
199-
job_id = int(job["id"])
200-
if job_id in job_ids:
201-
job_names[job_id] = extract_suite_name(job_name, config)
202-
203196
for job_id in job_ids:
204197
log_path = config_dir / f"{job_id}.log"
205198
if not log_path.exists() and not download_job_log(repo, run_id, job_id, log_path):
206199
continue
207200
log_paths.append(log_path)
208201

209-
configs.append(ConfigLogs(name=config, job_ids=job_ids, log_paths=log_paths, job_names=job_names))
210-
index[config] = {str(jid): name for jid, name in job_names.items()}
202+
configs.append(ConfigLogs(name=config, job_ids=job_ids, log_paths=log_paths))
211203

212-
save_job_index(logs_root, index)
213204
return configs
214205

215206

@@ -224,17 +215,12 @@ def analyze_config_logs(config_logs: list[ConfigLogs]) -> list[ConfigResult]:
224215
for log_path in config.log_paths:
225216
text = log_path.read_text(encoding="utf-8", errors="replace")
226217

227-
skipped_in_log, non_skipped_in_log = extract_test_status_sets(text)
218+
skipped_in_log, non_skipped_in_log, suites_in_log = extract_test_status_sets(text)
228219
skipped_any.update(skipped_in_log)
229220
non_skipped_any.update(non_skipped_in_log)
230-
231-
# Associate skipped test IDs with the suite derived from the job name.
232-
with contextlib.suppress(ValueError):
233-
job_id = int(log_path.stem)
234-
suite = config.job_names.get(job_id, "")
235-
if suite:
236-
for test_id in skipped_in_log:
237-
test_suites.setdefault(test_id, suite)
221+
# First log to identify a test's suite wins (setdefault semantics).
222+
for test_id, suite in suites_in_log.items():
223+
test_suites.setdefault(test_id, suite)
238224

239225
# For sharded matrices, a test may only appear in one log. Treat it as
240226
# config-skipped if it is skipped at least once and never non-skipped
@@ -280,7 +266,7 @@ def build_summary(results: list[ConfigResult]) -> str:
280266
"_Note: the test `tests/test_cuda.py::test_always_skip` is expected to be skipped in all configurations, but is missing._"
281267
)
282268

283-
# Merge test->suite mappings across all configs (first one seen wins).
269+
# Merge test->suite mappings across all configs (first config to identify wins).
284270
test_suites: dict[str, str] = {}
285271
for result in results:
286272
for test_id, suite in result.test_suites.items():

0 commit comments

Comments
 (0)