From a2880ebe0650e65241e129398e93048d9a207148 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Wed, 10 Jun 2026 18:48:58 -0700 Subject: [PATCH 1/5] [Utils] Add ci_results script Adds a script that can help inspecting and root cause CI results. We provide two commands, `current-status` and `failure-range`. The first summarizes the results from the most recent run on all CI bots, and the second helps bisect where a particular test started failing. This all works by using the `gh` tool to look at the logs from github workflow runs, with some smarts that have to do with how our runs are set up. --- utils/ci_results.py | 496 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 496 insertions(+) create mode 100644 utils/ci_results.py diff --git a/utils/ci_results.py b/utils/ci_results.py new file mode 100644 index 000000000..7177796bf --- /dev/null +++ b/utils/ci_results.py @@ -0,0 +1,496 @@ +#!/usr/bin/env python3 +"""Utilities to inspect and root cause recent CI results. +""" + +import argparse +import bisect +import json +import pathlib +import re +import shutil +import subprocess +import sys +import textwrap + + +class CIResultsError(Exception): + pass + + +def main(argv): + parser = get_argument_parser(argv[0]) + args = parser.parse_args(argv[1:]) + + if not shutil.which('gh'): + print('error: gh utility not found, cannot continue') + return 1 + try: + args.func(args) + except CIResultsError as e: + print(f'error: {e}', file=sys.stderr) + return 1 + except subprocess.CalledProcessError as e: + print(f'error: {e}', file=sys.stderr) + return 1 + + return 0 + + +class ArgumentParserWithSubcommandUsage(argparse.ArgumentParser): + """Version of argparse.ArgumentParser that prints usage of each subcommand. + """ + + def format_usage(self): + lines = [] + has_subparsers = False + for action in self._actions: + if isinstance(action, argparse._SubParsersAction): + if not has_subparsers: + lines.append('usage:') + has_subparsers = True + for choice, subparser in action.choices.items(): + usage = subparser.format_usage().strip()[len('usage: '):] + lines.append(f' {usage}') + if not has_subparsers: + lines.append(super().format_usage().strip()) + lines.append('') + return '\n'.join(lines) + + +def get_argument_parser(prog_name): + parser = ArgumentParserWithSubcommandUsage( + prog_name, description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + subparsers = parser.add_subparsers(required=True) + + parser_status = subparsers.add_parser( + 'current-status', + help=summarize_docstring(analyze_current_status), + description=dedent_docstring(analyze_current_status), + formatter_class=argparse.RawDescriptionHelpFormatter) + parser_status.add_argument( + 'workflows', nargs='*', + help='Limit to particular workflow .yaml files') + parser_status.add_argument( + '--regressions', action='store_true', + help='Only show workflows that have regressed') + parser_status.set_defaults( + func=lambda args: analyze_current_status( + args.workflows, regressions_only=args.regressions)) + + parser_range = subparsers.add_parser( + 'failure-range', + help=summarize_docstring(find_failure_range), + description=dedent_docstring(find_failure_range), + formatter_class=argparse.RawDescriptionHelpFormatter) + parser_range.add_argument( + 'workflow', + help='Workflow .yaml file to inspect') + parser_range.add_argument( + 'test_path', + help='Test case to narrow down range for') + parser_range.add_argument( + '--run-limit', type=int, default=100, + help='Number of runs to consider in search (default: %(default)s)') + parser_range.add_argument( + '--old-result', default='XFAIL|PASS', + help='"old" state results, separated by `|` (default: %(default)s)') + parser_range.add_argument( + '--new-result', default='FAIL|XPASS', + help='"new" state results, separated by `|` (default: %(default)s)') + parser_range.add_argument( + '--old-runid', type=int, + help='Run ID to start searching from') + parser_range.add_argument( + '--new-runid', type=int, + help='Run ID to search up to') + parser_range.set_defaults( + func=lambda args: find_failure_range( + args.workflow, args.test_path, run_limit=args.run_limit, + old_result=args.old_result, new_result=args.new_result, + old_runid=args.old_runid, new_runid=args.new_runid)) + + return parser + + +def summarize_docstring(function): + return function.__doc__.split('\n')[0] + + +def dedent_docstring(function): + lines = function.__doc__.split('\n') + return '\n'.join([lines[0], textwrap.dedent('\n'.join(lines[1:]))]) + + +def analyze_current_status(workflows, regressions_only=False): + """Show the current CI status for one or more workflows. + + For each given workflow, show the status of the most recent run and + indicate any failing or unexpectedly passing tests. If no workflows are + provided, heuristically attempt to run all workflows. + + If `regressions_only` is set, runs that have never succeeded will be + omitted. + """ + if not workflows: + for path in pathlib.Path(".github/workflows").glob("*.yaml"): + if any((path.name.startswith("build-"), + path.name.startswith("pr-"), + path.name.startswith("validate-"))): + continue + workflows.append(path.name) + workflows.sort(key=workflow_status_key) + if not workflows: + raise CIResultsError( + f'No workflows found in .github/workflows/. ' + f'Please run from the top level directory of the repository.') + + printer = ResultPrinter() + + for workflow in workflows: + project = 'llvm/llvm-project' + if '-dxc-' in workflow: + project = 'Microsoft/DirectXShaderCompiler' + + last_completed = get_last_run(workflow, status='completed') + last_success = get_last_run(workflow, status='success') + + if regressions_only and not last_success: + if not last_success: + continue + if last_completed['databaseId'] == last_success['databaseId']: + continue + + if last_success: + proc = get_log_proc(last_success['databaseId']) + success_hash = read_until_githash(proc, project) + suite_success_hash = read_until_githash(proc, + 'llvm/offload-test-suite') + proc.terminate() + else: + success_hash = None + suite_success_hash = None + + printer.print_header(workflow, last_completed['conclusion']) + printer.print_metadata(f"Name: {last_completed['name']}") + printer.print_metadata(f"Timestamp: {last_completed['createdAt']}") + + if (last_success and + last_completed['databaseId'] == last_success['databaseId']): + # The most recent run passed, we have nothing more to do. + printer.print_commit(project, success_hash) + printer.print_commit('llvm/offload-test-suite', suite_success_hash) + printer.printline() + continue + + proc = get_log_proc(last_completed['databaseId']) + failed_hash = read_until_githash(proc, project) + suite_failed_hash = read_until_githash(proc, 'llvm/offload-test-suite') + + printer.print_commit(project, failed_hash) + printer.print_commit('llvm/offload-test-suite', suite_failed_hash) + if success_hash and failed_hash: + printer.print_commit_range(project, success_hash, failed_hash) + printer.print_commit_range('llvm/offload-test-suite', + suite_success_hash, suite_failed_hash) + printer.printline() + + test_re = re.compile(r'\b(?PXPASS|FAIL): .* :: (?P.*) \(') + for line in proc.stdout: + found = test_re.search(line) + if found: + printer.print_result(found.group('test'), found.group('result')) + proc.wait() + printer.printline() + + +def workflow_status_key(workflow): + parts = workflow[:-len('.yaml')].split('-') + if not parts: + return None + parts.reverse() + + host = parts.pop() + if host == 'macos': + target = 'metal' + else: + target = parts.pop() + compiler = parts.pop() + driver = parts.pop() + + # We label warp warp-d3d12 and the variant warp-preview-d3d12. + if driver == 'warp' and parts[-1] == 'd3d12': + parts.pop() + + variant = True if parts else False + + # Match the order that we print the results in the offload test suite + # README for easier correlation. Unfortunately I don't see an obvious way + # to automate this. + tier_list = [ + ('d3d12', 'intel'), + ('d3d12', 'nvidia'), + ('warp', 'amd'), + ('warp', 'qc'), + ('vk', 'intel'), + ('mtl', 'metal'), + ('d3d12', 'amd'), + ('d3d12', 'qc'), + ('vk', 'amd'), + ('vk', 'nvidia'), + ('vk', 'qc'), + ] + try: + tier = tier_list.index((driver, target)) + except ValueError: + tier = len(tier_list) + + compiler_key = 0 if compiler == 'dxc' else 1 + + return (variant, tier, driver, target, host, compiler_key) + + +def find_failure_range(workflow, test_path, *, + run_limit, old_result, new_result, + old_runid=None, new_runid=None): + """Find the git range where a test started to fail. + + Given a workflow and test path, attempt to find the CI run where it first + started failing and report the git ranges from the previous success. These + ranges can then be used to further investigate or bisect locally. + + If old_runid is not provided, the search will trot backwards from new_runid + or the latest run until it finds a success to start bisecting from. This + will abort if we exceed run_limit. + + old_result and new_result may be set to regexes to search for test status + changes other than failure and unexpected passes. + """ + + # Sanitize result arguments + old_result = re.compile( + '|'.join(re.escape(x) for x in old_result.split('|'))) + new_result = re.compile( + '|'.join(re.escape(x) for x in new_result.split('|'))) + + # Lookup the runs we're going to work with. + # TODO: Use paging instead of a run_limit? + runs = get_recent_runs(workflow, run_limit=run_limit) + if not runs: + raise CIResultsError(f'could not find any runs for {workflow}') + + runids = [run['databaseId'] for run in runs] + start_index = runid_index(runids, new_runid) if new_runid else 0 + end_index = runid_index(runids, old_runid) if old_runid else 0 + + # Sanity check that the starting index has the right state + new_hash, result = get_test_result(runids[start_index], test_path) + print(f'{start_index} - ' + f'Run {runids[start_index]} ({new_hash}): {result}', + file=sys.stderr) + if not new_result.match(result): + raise CIResultsError( + f'Current result is {result}, not {new_result.pattern}') + + if end_index == 0: + # We don't have a range yet. "Gallop" until we find an end index. + offset = 1 + while start_index + offset < len(runids): + end_index = start_index + offset + old_hash, result = get_test_result(runids[end_index], test_path) + print(f'{end_index} - ' + f'Run {runids[end_index]} ({old_hash}): {result}', + file=sys.stderr) + if old_result.match(result): + break + if not new_result.match(result): + raise CIResultsError( + f'Unhandled result in run {runids[end_index]}: {result}') + start_index = end_index + new_hash = old_hash + offset *= 2 + else: + raise CIResultsError( + f'Did not find run with result matching {old_result.pattern}. ' + f'Try higher --run-limit and --new-runid={runids[start_index]}') + else: + # Sanity check that the end index has the right state + old_hash, result = get_test_result(runids[end_index], test_path) + print(f'{end_index} - ' + f'Run {runids[end_index]} ({old_hash}): {result}', + file=sys.stderr) + if not old_result.match(result): + raise CIResultsError( + f'Old result is {result}, not {old_result.pattern}') + + # Finally, we can bisect the logs. + while end_index > start_index + 1: + current_index = int((start_index + end_index) / 2) + git_hash, result = get_test_result(runids[current_index], test_path) + + print(f'{current_index} - ' + f'Run {runids[current_index]} ({git_hash}): {result}', + file=sys.stderr) + if old_result.match(result): + old_hash = git_hash + end_index = current_index + elif new_result.match(result): + new_hash = git_hash + start_index = current_index + else: + raise CIResultsError( + f'Unhandled result in run {runids[end_index]}: {result}') + + if (end_index != start_index + 1): + raise CIResultsError( + f'Bisection ended early? Range is {start_index} to {end_index}') + + print() + print(f'dxc range: {old_hash.dxc_hash}..{new_hash.dxc_hash}') + print(f'llvm range: {old_hash.llvm_hash}..{new_hash.llvm_hash}') + print(f'test suite range: {old_hash.offload_hash}..{new_hash.offload_hash}') + + +def runid_index(runids, runid): + try: + return runids.index(runid) + except ValueError: + raise CIResultsError( + f'Could not find runid ({runid}). Try raising `run_limit`.') + + +def get_last_run(workflow, status='completed'): + output = subprocess.run( + ['gh', 'run', '-R', 'llvm/offload-test-suite', 'list', + '--workflow', workflow, '--status', status, + '--json', 'name,databaseId,createdAt,conclusion', + '--jq', 'max_by(.createdAt)'], + check=True, stdout=subprocess.PIPE).stdout.strip() + if not output: + return None + return json.loads(output) + + +def get_recent_runs(workflow, *, run_limit): + output = subprocess.run( + ['gh', 'run', '-R', 'llvm/offload-test-suite', 'list', + '-L', str(run_limit), '--workflow', workflow, '--status', 'completed', + '--json', 'name,databaseId,createdAt,conclusion'], + check=True, stdout=subprocess.PIPE).stdout + if not output: + return None + return json.loads(output) + + +def get_log_proc(databaseId): + return subprocess.Popen( + ['gh', 'run', '-R', 'llvm/offload-test-suite', + 'view', '--log', str(databaseId)], + text=True, bufsize=1, stdout=subprocess.PIPE) + + +def read_until_githash(proc, repo): + checkout_action_re = re.compile(r'Run actions/checkout') + repo_re = re.compile(f'repository: {repo}') + git_log_hash_re = re.compile('log -1 --format=%H') + hash_re = re.compile('[0-9a-f]{40}') + + state = None + for line in proc.stdout: + if checkout_action_re.search(line): + state = 'checkout' + elif state == 'checkout' and repo_re.search(line): + state = 'repo' + elif state == 'repo': + if git_log_hash_re.search(line): + state = 'hash' + elif state == 'hash': + found = hash_re.search(line) + if not found: + raise CIResultsError('Hash not printed from git log command?') + return found.group(0) + return None + + +class Hashes(object): + def __init__(self, dxc_hash, llvm_hash, offload_hash): + # Note: Without looking at the repos we can't actually calculate what's + # sufficiently long for a short hash, so we just use something large. + self.dxc_hash = dxc_hash[:12] + self.llvm_hash = llvm_hash[:12] + self.offload_hash = offload_hash[:12] + + def __repr__(self): + return (f'Hashes(dxc_hash={self.dxc_hash}, llvm_hash={self.llvm_hash}, ' + f'offload_hash={self.offload_hash})') + + def __str__(self): + return ', '.join([f'dxc: {self.dxc_hash or '-'}', + f'llvm: {self.llvm_hash or '-'}', + f'test-suite: {self.offload_hash or '-'}']) + + +def get_test_result(databaseId, test_path): + proc = get_log_proc(databaseId) + dxc_hash = read_until_githash(proc, 'Microsoft/DirectXShaderCompiler') + llvm_hash = read_until_githash(proc, 'llvm/llvm-project') + offload_hash = read_until_githash(proc, 'llvm/offload-test-suite') + if not dxc_hash or not llvm_hash or not offload_hash: + raise CIResultsError(f'Failed to find repo hashes for run {databaseId}') + + test_re = re.compile( + r'\b(?PPASS|XPASS|FAIL|XFAIL|UNSUPPORTED): .* :: ' + + re.escape(test_path) + r' \(') + result = None + for line in proc.stdout: + found = test_re.search(line) + if found: + result = found.group('result') + break + proc.terminate() + + return Hashes(dxc_hash, llvm_hash, offload_hash), result + + +class ResultPrinter: + def __init__(self): + self.use_color = sys.stdout.isatty() + self.bold = '\033[1m' if self.use_color else '' + self.reset = '\033[0m' if self.use_color else '' + self.red = '\033[0;31m' if self.use_color else '' + self.green = '\033[0;32m' if self.use_color else '' + self.yellow = '\033[0;33m' if self.use_color else '' + + def print_header(self, job_name, status): + status_color = self.yellow + if status == 'success': + status_color = self.green + elif status == 'failure': + status_color = self.red + + print(f'{self.bold}## {job_name}{self.reset} ' + f'({status_color}{status}{self.reset})') + + def print_metadata(self, info): + print(f' - {info}') + + def print_commit(self, project, commit): + print(f' - {project}: {commit or 'unknown'}') + + def print_commit_range(self, project, success_hash, failed_hash): + print(f' - {project} range: {success_hash[:12]}..{failed_hash[:12]}') + + def print_result(self, test, result): + color = self.reset + if result == 'FAIL': + color = self.red + elif result == 'XPASS': + color = self.yellow + print(f"{color}{result}{self.reset}: {test}") + + def printline(self): + print() + + +if __name__ == '__main__': + sys.exit(main(sys.argv)) From b71a6ba1ff265e6a984b1f0ba20ca83e0de86165 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Thu, 11 Jun 2026 11:16:39 -0700 Subject: [PATCH 2/5] Limit git hashes to the relevant project in failure-range --- utils/ci_results.py | 52 +++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/utils/ci_results.py b/utils/ci_results.py index 7177796bf..01913541b 100644 --- a/utils/ci_results.py +++ b/utils/ci_results.py @@ -148,9 +148,7 @@ def analyze_current_status(workflows, regressions_only=False): printer = ResultPrinter() for workflow in workflows: - project = 'llvm/llvm-project' - if '-dxc-' in workflow: - project = 'Microsoft/DirectXShaderCompiler' + project = get_project_for_workflow(workflow) last_completed = get_last_run(workflow, status='completed') last_success = get_last_run(workflow, status='success') @@ -188,9 +186,10 @@ def analyze_current_status(workflows, regressions_only=False): suite_failed_hash = read_until_githash(proc, 'llvm/offload-test-suite') printer.print_commit(project, failed_hash) - printer.print_commit('llvm/offload-test-suite', suite_failed_hash) if success_hash and failed_hash: printer.print_commit_range(project, success_hash, failed_hash) + printer.print_commit('llvm/offload-test-suite', suite_failed_hash) + if success_hash and failed_hash: printer.print_commit_range('llvm/offload-test-suite', suite_success_hash, suite_failed_hash) printer.printline() @@ -204,6 +203,14 @@ def analyze_current_status(workflows, regressions_only=False): printer.printline() +def get_project_for_workflow(workflow): + if '-clang-' in workflow: + return 'llvm/llvm-project' + if '-dxc-' in workflow: + return 'Microsoft/DirectXShaderCompiler' + raise CIResultsError(f'Workflow {workflow} is neither clang nor dxc') + + def workflow_status_key(workflow): parts = workflow[:-len('.yaml')].split('-') if not parts: @@ -283,8 +290,10 @@ def find_failure_range(workflow, test_path, *, start_index = runid_index(runids, new_runid) if new_runid else 0 end_index = runid_index(runids, old_runid) if old_runid else 0 + project = get_project_for_workflow(workflow) + # Sanity check that the starting index has the right state - new_hash, result = get_test_result(runids[start_index], test_path) + new_hash, result = get_test_result(runids[start_index], project, test_path) print(f'{start_index} - ' f'Run {runids[start_index]} ({new_hash}): {result}', file=sys.stderr) @@ -297,7 +306,8 @@ def find_failure_range(workflow, test_path, *, offset = 1 while start_index + offset < len(runids): end_index = start_index + offset - old_hash, result = get_test_result(runids[end_index], test_path) + old_hash, result = get_test_result( + runids[end_index], project, test_path) print(f'{end_index} - ' f'Run {runids[end_index]} ({old_hash}): {result}', file=sys.stderr) @@ -315,7 +325,8 @@ def find_failure_range(workflow, test_path, *, f'Try higher --run-limit and --new-runid={runids[start_index]}') else: # Sanity check that the end index has the right state - old_hash, result = get_test_result(runids[end_index], test_path) + old_hash, result = get_test_result( + runids[end_index], project, test_path) print(f'{end_index} - ' f'Run {runids[end_index]} ({old_hash}): {result}', file=sys.stderr) @@ -326,7 +337,8 @@ def find_failure_range(workflow, test_path, *, # Finally, we can bisect the logs. while end_index > start_index + 1: current_index = int((start_index + end_index) / 2) - git_hash, result = get_test_result(runids[current_index], test_path) + git_hash, result = get_test_result( + runids[current_index], project, test_path) print(f'{current_index} - ' f'Run {runids[current_index]} ({git_hash}): {result}', @@ -346,8 +358,7 @@ def find_failure_range(workflow, test_path, *, f'Bisection ended early? Range is {start_index} to {end_index}') print() - print(f'dxc range: {old_hash.dxc_hash}..{new_hash.dxc_hash}') - print(f'llvm range: {old_hash.llvm_hash}..{new_hash.llvm_hash}') + print(f'{project} range: {old_hash.project_hash}..{new_hash.project_hash}') print(f'test suite range: {old_hash.offload_hash}..{new_hash.offload_hash}') @@ -413,29 +424,28 @@ def read_until_githash(proc, repo): class Hashes(object): - def __init__(self, dxc_hash, llvm_hash, offload_hash): + def __init__(self, project, project_hash, offload_hash): + self.project = project # Note: Without looking at the repos we can't actually calculate what's # sufficiently long for a short hash, so we just use something large. - self.dxc_hash = dxc_hash[:12] - self.llvm_hash = llvm_hash[:12] + self.project_hash = project_hash[:12] self.offload_hash = offload_hash[:12] def __repr__(self): - return (f'Hashes(dxc_hash={self.dxc_hash}, llvm_hash={self.llvm_hash}, ' + return (f'Hashes(project={self.project}, ' + f'project_hash={self.project_hash}, ' f'offload_hash={self.offload_hash})') def __str__(self): - return ', '.join([f'dxc: {self.dxc_hash or '-'}', - f'llvm: {self.llvm_hash or '-'}', + return ', '.join([f'{self.project}: {self.project_hash or '-'}', f'test-suite: {self.offload_hash or '-'}']) -def get_test_result(databaseId, test_path): +def get_test_result(databaseId, project, test_path): proc = get_log_proc(databaseId) - dxc_hash = read_until_githash(proc, 'Microsoft/DirectXShaderCompiler') - llvm_hash = read_until_githash(proc, 'llvm/llvm-project') + project_hash = read_until_githash(proc, project) offload_hash = read_until_githash(proc, 'llvm/offload-test-suite') - if not dxc_hash or not llvm_hash or not offload_hash: + if not project_hash or not offload_hash: raise CIResultsError(f'Failed to find repo hashes for run {databaseId}') test_re = re.compile( @@ -449,7 +459,7 @@ def get_test_result(databaseId, test_path): break proc.terminate() - return Hashes(dxc_hash, llvm_hash, offload_hash), result + return Hashes(project, project_hash, offload_hash), result class ResultPrinter: From 151e32c9df2aad56e75f18d4a3c9a44a682baeb6 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Mon, 15 Jun 2026 11:53:54 -0700 Subject: [PATCH 3/5] Attempt to work around buggy formatter It seems the code formatter we have in pre-commit CI can't handle f-strings properly... --- utils/ci_results.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/ci_results.py b/utils/ci_results.py index 01913541b..78ba1b610 100644 --- a/utils/ci_results.py +++ b/utils/ci_results.py @@ -437,8 +437,8 @@ def __repr__(self): f'offload_hash={self.offload_hash})') def __str__(self): - return ', '.join([f'{self.project}: {self.project_hash or '-'}', - f'test-suite: {self.offload_hash or '-'}']) + return ', '.join([f"{self.project}: {self.project_hash or '-'}", + f"test-suite: {self.offload_hash or '-'}"]) def get_test_result(databaseId, project, test_path): @@ -485,7 +485,7 @@ def print_metadata(self, info): print(f' - {info}') def print_commit(self, project, commit): - print(f' - {project}: {commit or 'unknown'}') + print(f" - {project}: {commit or 'unknown'}") def print_commit_range(self, project, success_hash, failed_hash): print(f' - {project} range: {success_hash[:12]}..{failed_hash[:12]}') From 2c373c6129dd03292936b64e8825ba84cd44e1f1 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Mon, 15 Jun 2026 14:46:37 -0700 Subject: [PATCH 4/5] Appease the formatter --- utils/ci_results.py | 437 ++++++++++++++++++++++++++------------------ 1 file changed, 263 insertions(+), 174 deletions(-) diff --git a/utils/ci_results.py b/utils/ci_results.py index 78ba1b610..f4dcf37ea 100644 --- a/utils/ci_results.py +++ b/utils/ci_results.py @@ -21,24 +21,23 @@ def main(argv): parser = get_argument_parser(argv[0]) args = parser.parse_args(argv[1:]) - if not shutil.which('gh'): - print('error: gh utility not found, cannot continue') + if not shutil.which("gh"): + print("error: gh utility not found, cannot continue") return 1 try: args.func(args) except CIResultsError as e: - print(f'error: {e}', file=sys.stderr) + print(f"error: {e}", file=sys.stderr) return 1 except subprocess.CalledProcessError as e: - print(f'error: {e}', file=sys.stderr) + print(f"error: {e}", file=sys.stderr) return 1 return 0 class ArgumentParserWithSubcommandUsage(argparse.ArgumentParser): - """Version of argparse.ArgumentParser that prints usage of each subcommand. - """ + """Version of ArgumentParser that prints usage of each subcommand.""" def format_usage(self): lines = [] @@ -46,80 +45,99 @@ def format_usage(self): for action in self._actions: if isinstance(action, argparse._SubParsersAction): if not has_subparsers: - lines.append('usage:') + lines.append("usage:") has_subparsers = True for choice, subparser in action.choices.items(): - usage = subparser.format_usage().strip()[len('usage: '):] - lines.append(f' {usage}') + usage = subparser.format_usage().strip()[len("usage: ") :] + lines.append(f" {usage}") if not has_subparsers: lines.append(super().format_usage().strip()) - lines.append('') - return '\n'.join(lines) + lines.append("") + return "\n".join(lines) def get_argument_parser(prog_name): parser = ArgumentParserWithSubcommandUsage( - prog_name, description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) + prog_name, + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) subparsers = parser.add_subparsers(required=True) parser_status = subparsers.add_parser( - 'current-status', + "current-status", help=summarize_docstring(analyze_current_status), description=dedent_docstring(analyze_current_status), - formatter_class=argparse.RawDescriptionHelpFormatter) + formatter_class=argparse.RawDescriptionHelpFormatter, + ) parser_status.add_argument( - 'workflows', nargs='*', - help='Limit to particular workflow .yaml files') + "workflows", nargs="*", help="Limit to particular workflow .yaml files" + ) parser_status.add_argument( - '--regressions', action='store_true', - help='Only show workflows that have regressed') + "--regressions", + action="store_true", + help="Only show workflows that have regressed", + ) parser_status.set_defaults( func=lambda args: analyze_current_status( - args.workflows, regressions_only=args.regressions)) + args.workflows, regressions_only=args.regressions + ) + ) parser_range = subparsers.add_parser( - 'failure-range', + "failure-range", help=summarize_docstring(find_failure_range), description=dedent_docstring(find_failure_range), - formatter_class=argparse.RawDescriptionHelpFormatter) - parser_range.add_argument( - 'workflow', - help='Workflow .yaml file to inspect') + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser_range.add_argument("workflow", help="Workflow .yaml file to inspect") parser_range.add_argument( - 'test_path', - help='Test case to narrow down range for') + "test_path", help="Test case to find git commit ranges for" + ) parser_range.add_argument( - '--run-limit', type=int, default=100, - help='Number of runs to consider in search (default: %(default)s)') + "--run-limit", + type=int, + default=100, + help="Number of runs to consider in search (default: %(default)s)", + ) parser_range.add_argument( - '--old-result', default='XFAIL|PASS', - help='"old" state results, separated by `|` (default: %(default)s)') + "--old-result", + default="XFAIL|PASS", + help='"old" state results, separated by `|` (default: %(default)s)', + ) parser_range.add_argument( - '--new-result', default='FAIL|XPASS', - help='"new" state results, separated by `|` (default: %(default)s)') + "--new-result", + default="FAIL|XPASS", + help='"new" state results, separated by `|` (default: %(default)s)', + ) parser_range.add_argument( - '--old-runid', type=int, - help='Run ID to start searching from') + "--old-runid", type=int, help="Run ID to start searching from" + ) parser_range.add_argument( - '--new-runid', type=int, - help='Run ID to search up to') + "--new-runid", type=int, help="Run ID to search up to" + ) parser_range.set_defaults( func=lambda args: find_failure_range( - args.workflow, args.test_path, run_limit=args.run_limit, - old_result=args.old_result, new_result=args.new_result, - old_runid=args.old_runid, new_runid=args.new_runid)) + args.workflow, + args.test_path, + run_limit=args.run_limit, + old_result=args.old_result, + new_result=args.new_result, + old_runid=args.old_runid, + new_runid=args.new_runid, + ) + ) return parser def summarize_docstring(function): - return function.__doc__.split('\n')[0] + return function.__doc__.split("\n")[0] def dedent_docstring(function): - lines = function.__doc__.split('\n') - return '\n'.join([lines[0], textwrap.dedent('\n'.join(lines[1:]))]) + lines = function.__doc__.split("\n") + return "\n".join([lines[0], textwrap.dedent("\n".join(lines[1:]))]) def analyze_current_status(workflows, regressions_only=False): @@ -134,99 +152,105 @@ def analyze_current_status(workflows, regressions_only=False): """ if not workflows: for path in pathlib.Path(".github/workflows").glob("*.yaml"): - if any((path.name.startswith("build-"), + if any( + ( + path.name.startswith("build-"), path.name.startswith("pr-"), - path.name.startswith("validate-"))): + path.name.startswith("validate-"), + ) + ): continue workflows.append(path.name) workflows.sort(key=workflow_status_key) if not workflows: raise CIResultsError( - f'No workflows found in .github/workflows/. ' - f'Please run from the top level directory of the repository.') + f"No workflows found in .github/workflows/. " + f"Please run from the top level directory of the repository." + ) printer = ResultPrinter() for workflow in workflows: project = get_project_for_workflow(workflow) + offload_project = "llvm/offload-test-suite" - last_completed = get_last_run(workflow, status='completed') - last_success = get_last_run(workflow, status='success') + last_completed = get_last_run(workflow, status="completed") + last_success = get_last_run(workflow, status="success") if regressions_only and not last_success: if not last_success: continue - if last_completed['databaseId'] == last_success['databaseId']: + if last_completed["databaseId"] == last_success["databaseId"]: continue if last_success: - proc = get_log_proc(last_success['databaseId']) + proc = get_log_proc(last_success["databaseId"]) success_hash = read_until_githash(proc, project) - suite_success_hash = read_until_githash(proc, - 'llvm/offload-test-suite') + suite_success_hash = read_until_githash(proc, offload_project) proc.terminate() else: success_hash = None suite_success_hash = None - printer.print_header(workflow, last_completed['conclusion']) + printer.print_header(workflow, last_completed["conclusion"]) printer.print_metadata(f"Name: {last_completed['name']}") printer.print_metadata(f"Timestamp: {last_completed['createdAt']}") - if (last_success and - last_completed['databaseId'] == last_success['databaseId']): - # The most recent run passed, we have nothing more to do. - printer.print_commit(project, success_hash) - printer.print_commit('llvm/offload-test-suite', suite_success_hash) - printer.printline() - continue + if last_success: + if last_completed["databaseId"] == last_success["databaseId"]: + # The most recent run passed, we have nothing more to do. + printer.print_commit(project, success_hash) + printer.print_commit(offload_project, suite_success_hash) + printer.printline() + continue - proc = get_log_proc(last_completed['databaseId']) + proc = get_log_proc(last_completed["databaseId"]) failed_hash = read_until_githash(proc, project) - suite_failed_hash = read_until_githash(proc, 'llvm/offload-test-suite') + suite_failed_hash = read_until_githash(proc, offload_project) printer.print_commit(project, failed_hash) if success_hash and failed_hash: printer.print_commit_range(project, success_hash, failed_hash) - printer.print_commit('llvm/offload-test-suite', suite_failed_hash) + printer.print_commit(offload_project, suite_failed_hash) if success_hash and failed_hash: - printer.print_commit_range('llvm/offload-test-suite', - suite_success_hash, suite_failed_hash) + printer.print_commit_range( + offload_project, suite_success_hash, suite_failed_hash + ) printer.printline() - test_re = re.compile(r'\b(?PXPASS|FAIL): .* :: (?P.*) \(') + test_re = re.compile(r"\b(?PXPASS|FAIL): .* :: (?P.*) \(") for line in proc.stdout: found = test_re.search(line) if found: - printer.print_result(found.group('test'), found.group('result')) + printer.print_result(found.group("test"), found.group("result")) proc.wait() printer.printline() def get_project_for_workflow(workflow): - if '-clang-' in workflow: - return 'llvm/llvm-project' - if '-dxc-' in workflow: - return 'Microsoft/DirectXShaderCompiler' - raise CIResultsError(f'Workflow {workflow} is neither clang nor dxc') + if "-clang-" in workflow: + return "llvm/llvm-project" + if "-dxc-" in workflow: + return "Microsoft/DirectXShaderCompiler" + raise CIResultsError(f"Workflow {workflow} is neither clang nor dxc") def workflow_status_key(workflow): - parts = workflow[:-len('.yaml')].split('-') + parts = workflow[: -len(".yaml")].split("-") if not parts: return None parts.reverse() host = parts.pop() - if host == 'macos': - target = 'metal' + if host == "macos": + target = "metal" else: target = parts.pop() compiler = parts.pop() driver = parts.pop() # We label warp warp-d3d12 and the variant warp-preview-d3d12. - if driver == 'warp' and parts[-1] == 'd3d12': + if driver == "warp" and parts[-1] == "d3d12": parts.pop() variant = True if parts else False @@ -235,31 +259,38 @@ def workflow_status_key(workflow): # README for easier correlation. Unfortunately I don't see an obvious way # to automate this. tier_list = [ - ('d3d12', 'intel'), - ('d3d12', 'nvidia'), - ('warp', 'amd'), - ('warp', 'qc'), - ('vk', 'intel'), - ('mtl', 'metal'), - ('d3d12', 'amd'), - ('d3d12', 'qc'), - ('vk', 'amd'), - ('vk', 'nvidia'), - ('vk', 'qc'), + ("d3d12", "intel"), + ("d3d12", "nvidia"), + ("warp", "amd"), + ("warp", "qc"), + ("vk", "intel"), + ("mtl", "metal"), + ("d3d12", "amd"), + ("d3d12", "qc"), + ("vk", "amd"), + ("vk", "nvidia"), + ("vk", "qc"), ] try: tier = tier_list.index((driver, target)) except ValueError: tier = len(tier_list) - compiler_key = 0 if compiler == 'dxc' else 1 + compiler_key = 0 if compiler == "dxc" else 1 return (variant, tier, driver, target, host, compiler_key) -def find_failure_range(workflow, test_path, *, - run_limit, old_result, new_result, - old_runid=None, new_runid=None): +def find_failure_range( + workflow, + test_path, + *, + run_limit, + old_result, + new_result, + old_runid=None, + new_runid=None, +): """Find the git range where a test started to fail. Given a workflow and test path, attempt to find the CI run where it first @@ -276,17 +307,19 @@ def find_failure_range(workflow, test_path, *, # Sanitize result arguments old_result = re.compile( - '|'.join(re.escape(x) for x in old_result.split('|'))) + "|".join(re.escape(result) for result in old_result.split("|")) + ) new_result = re.compile( - '|'.join(re.escape(x) for x in new_result.split('|'))) + "|".join(re.escape(result) for result in new_result.split("|")) + ) # Lookup the runs we're going to work with. # TODO: Use paging instead of a run_limit? runs = get_recent_runs(workflow, run_limit=run_limit) if not runs: - raise CIResultsError(f'could not find any runs for {workflow}') + raise CIResultsError(f"could not find any runs for {workflow}") - runids = [run['databaseId'] for run in runs] + runids = [run["databaseId"] for run in runs] start_index = runid_index(runids, new_runid) if new_runid else 0 end_index = runid_index(runids, old_runid) if old_runid else 0 @@ -294,55 +327,61 @@ def find_failure_range(workflow, test_path, *, # Sanity check that the starting index has the right state new_hash, result = get_test_result(runids[start_index], project, test_path) - print(f'{start_index} - ' - f'Run {runids[start_index]} ({new_hash}): {result}', - file=sys.stderr) + print( + f"{start_index} - Run {runids[start_index]} ({new_hash}): {result}", + file=sys.stderr, + ) if not new_result.match(result): raise CIResultsError( - f'Current result is {result}, not {new_result.pattern}') + f"Current result is {result}, expected {new_result.pattern}" + ) if end_index == 0: # We don't have a range yet. "Gallop" until we find an end index. offset = 1 while start_index + offset < len(runids): end_index = start_index + offset - old_hash, result = get_test_result( - runids[end_index], project, test_path) - print(f'{end_index} - ' - f'Run {runids[end_index]} ({old_hash}): {result}', - file=sys.stderr) + run = runids[end_index] + old_hash, result = get_test_result(run, project, test_path) + print( + f"{end_index} - Run {run} ({old_hash}): {result}", + file=sys.stderr, + ) if old_result.match(result): break if not new_result.match(result): - raise CIResultsError( - f'Unhandled result in run {runids[end_index]}: {result}') + raise CIResultsError(f"Unhandled result in run {run}: {result}") start_index = end_index new_hash = old_hash offset *= 2 else: raise CIResultsError( - f'Did not find run with result matching {old_result.pattern}. ' - f'Try higher --run-limit and --new-runid={runids[start_index]}') + f"Did not find run with result matching {old_result.pattern}. " + f"Try higher --run-limit and --new-runid={runids[start_index]}" + ) else: # Sanity check that the end index has the right state - old_hash, result = get_test_result( - runids[end_index], project, test_path) - print(f'{end_index} - ' - f'Run {runids[end_index]} ({old_hash}): {result}', - file=sys.stderr) + run = runids[end_index] + old_hash, result = get_test_result(run, project, test_path) + print( + f"{end_index} - Run {run} ({old_hash}): {result}", + file=sys.stderr, + ) if not old_result.match(result): raise CIResultsError( - f'Old result is {result}, not {old_result.pattern}') + f"Old result is {result}, expected {old_result.pattern}" + ) # Finally, we can bisect the logs. while end_index > start_index + 1: current_index = int((start_index + end_index) / 2) - git_hash, result = get_test_result( - runids[current_index], project, test_path) + run = runids[current_index] + git_hash, result = get_test_result(run, project, test_path) - print(f'{current_index} - ' - f'Run {runids[current_index]} ({git_hash}): {result}', - file=sys.stderr) + print( + f"{current_index} - Run {run} ({git_hash}): {result}", + file=sys.stderr, + ) if old_result.match(result): old_hash = git_hash end_index = current_index @@ -351,15 +390,17 @@ def find_failure_range(workflow, test_path, *, start_index = current_index else: raise CIResultsError( - f'Unhandled result in run {runids[end_index]}: {result}') + f"Unhandled result in run {run}: {result}" + ) - if (end_index != start_index + 1): + if end_index != start_index + 1: raise CIResultsError( - f'Bisection ended early? Range is {start_index} to {end_index}') + f"Bisection ended early? Range is {start_index} to {end_index}" + ) print() - print(f'{project} range: {old_hash.project_hash}..{new_hash.project_hash}') - print(f'test suite range: {old_hash.offload_hash}..{new_hash.offload_hash}') + print(f"{project} range: {old_hash.project_hash}..{new_hash.project_hash}") + print(f"test suite range: {old_hash.offload_hash}..{new_hash.offload_hash}") def runid_index(runids, runid): @@ -367,16 +408,30 @@ def runid_index(runids, runid): return runids.index(runid) except ValueError: raise CIResultsError( - f'Could not find runid ({runid}). Try raising `run_limit`.') + f"Could not find runid ({runid}). Try raising `run_limit`." + ) -def get_last_run(workflow, status='completed'): +def get_last_run(workflow, status="completed"): output = subprocess.run( - ['gh', 'run', '-R', 'llvm/offload-test-suite', 'list', - '--workflow', workflow, '--status', status, - '--json', 'name,databaseId,createdAt,conclusion', - '--jq', 'max_by(.createdAt)'], - check=True, stdout=subprocess.PIPE).stdout.strip() + [ + "gh", + "run", + "-R", + "llvm/offload-test-suite", + "list", + "--workflow", + workflow, + "--status", + status, + "--json", + "name,databaseId,createdAt,conclusion", + "--jq", + "max_by(.createdAt)", + ], + check=True, + stdout=subprocess.PIPE, + ).stdout.strip() if not output: return None return json.loads(output) @@ -384,10 +439,24 @@ def get_last_run(workflow, status='completed'): def get_recent_runs(workflow, *, run_limit): output = subprocess.run( - ['gh', 'run', '-R', 'llvm/offload-test-suite', 'list', - '-L', str(run_limit), '--workflow', workflow, '--status', 'completed', - '--json', 'name,databaseId,createdAt,conclusion'], - check=True, stdout=subprocess.PIPE).stdout + [ + "gh", + "run", + "-R", + "llvm/offload-test-suite", + "list", + "-L", + str(run_limit), + "--workflow", + workflow, + "--status", + "completed", + "--json", + "name,databaseId,createdAt,conclusion", + ], + check=True, + stdout=subprocess.PIPE, + ).stdout if not output: return None return json.loads(output) @@ -395,30 +464,40 @@ def get_recent_runs(workflow, *, run_limit): def get_log_proc(databaseId): return subprocess.Popen( - ['gh', 'run', '-R', 'llvm/offload-test-suite', - 'view', '--log', str(databaseId)], - text=True, bufsize=1, stdout=subprocess.PIPE) + [ + "gh", + "run", + "-R", + "llvm/offload-test-suite", + "view", + "--log", + str(databaseId), + ], + text=True, + bufsize=1, + stdout=subprocess.PIPE, + ) def read_until_githash(proc, repo): - checkout_action_re = re.compile(r'Run actions/checkout') - repo_re = re.compile(f'repository: {repo}') - git_log_hash_re = re.compile('log -1 --format=%H') - hash_re = re.compile('[0-9a-f]{40}') + checkout_action_re = re.compile(r"Run actions/checkout") + repo_re = re.compile(f"repository: {repo}") + git_log_hash_re = re.compile("log -1 --format=%H") + hash_re = re.compile("[0-9a-f]{40}") state = None for line in proc.stdout: if checkout_action_re.search(line): - state = 'checkout' - elif state == 'checkout' and repo_re.search(line): - state = 'repo' - elif state == 'repo': + state = "checkout" + elif state == "checkout" and repo_re.search(line): + state = "repo" + elif state == "repo": if git_log_hash_re.search(line): - state = 'hash' - elif state == 'hash': + state = "hash" + elif state == "hash": found = hash_re.search(line) if not found: - raise CIResultsError('Hash not printed from git log command?') + raise CIResultsError("Hash not printed from git log command?") return found.group(0) return None @@ -432,30 +511,38 @@ def __init__(self, project, project_hash, offload_hash): self.offload_hash = offload_hash[:12] def __repr__(self): - return (f'Hashes(project={self.project}, ' - f'project_hash={self.project_hash}, ' - f'offload_hash={self.offload_hash})') + return ( + f"Hashes(project={self.project}, " + f"project_hash={self.project_hash}, " + f"offload_hash={self.offload_hash})" + ) def __str__(self): - return ', '.join([f"{self.project}: {self.project_hash or '-'}", - f"test-suite: {self.offload_hash or '-'}"]) + return ", ".join( + [ + f"{self.project}: {self.project_hash or '-'}", + f"test-suite: {self.offload_hash or '-'}", + ] + ) def get_test_result(databaseId, project, test_path): proc = get_log_proc(databaseId) project_hash = read_until_githash(proc, project) - offload_hash = read_until_githash(proc, 'llvm/offload-test-suite') + offload_hash = read_until_githash(proc, "llvm/offload-test-suite") if not project_hash or not offload_hash: - raise CIResultsError(f'Failed to find repo hashes for run {databaseId}') + raise CIResultsError(f"Failed to find repo hashes for run {databaseId}") test_re = re.compile( - r'\b(?PPASS|XPASS|FAIL|XFAIL|UNSUPPORTED): .* :: ' + - re.escape(test_path) + r' \(') + r"\b(?PPASS|XPASS|FAIL|XFAIL|UNSUPPORTED): .* :: " + + re.escape(test_path) + + r" \(" + ) result = None for line in proc.stdout: found = test_re.search(line) if found: - result = found.group('result') + result = found.group("result") break proc.terminate() @@ -465,36 +552,38 @@ def get_test_result(databaseId, project, test_path): class ResultPrinter: def __init__(self): self.use_color = sys.stdout.isatty() - self.bold = '\033[1m' if self.use_color else '' - self.reset = '\033[0m' if self.use_color else '' - self.red = '\033[0;31m' if self.use_color else '' - self.green = '\033[0;32m' if self.use_color else '' - self.yellow = '\033[0;33m' if self.use_color else '' + self.bold = "\033[1m" if self.use_color else "" + self.reset = "\033[0m" if self.use_color else "" + self.red = "\033[0;31m" if self.use_color else "" + self.green = "\033[0;32m" if self.use_color else "" + self.yellow = "\033[0;33m" if self.use_color else "" def print_header(self, job_name, status): status_color = self.yellow - if status == 'success': + if status == "success": status_color = self.green - elif status == 'failure': + elif status == "failure": status_color = self.red - print(f'{self.bold}## {job_name}{self.reset} ' - f'({status_color}{status}{self.reset})') + print( + f"{self.bold}## {job_name}{self.reset} " + f"({status_color}{status}{self.reset})" + ) def print_metadata(self, info): - print(f' - {info}') + print(f" - {info}") def print_commit(self, project, commit): print(f" - {project}: {commit or 'unknown'}") def print_commit_range(self, project, success_hash, failed_hash): - print(f' - {project} range: {success_hash[:12]}..{failed_hash[:12]}') + print(f" - {project} range: {success_hash[:12]}..{failed_hash[:12]}") def print_result(self, test, result): color = self.reset - if result == 'FAIL': + if result == "FAIL": color = self.red - elif result == 'XPASS': + elif result == "XPASS": color = self.yellow print(f"{color}{result}{self.reset}: {test}") @@ -502,5 +591,5 @@ def printline(self): print() -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main(sys.argv)) From 99c6f372e4f0cf20d01a7818cac3f8e1106db374 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Mon, 15 Jun 2026 14:53:05 -0700 Subject: [PATCH 5/5] More formatting --- utils/ci_results.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/utils/ci_results.py b/utils/ci_results.py index f4dcf37ea..5bce7bae3 100644 --- a/utils/ci_results.py +++ b/utils/ci_results.py @@ -111,10 +111,10 @@ def get_argument_parser(prog_name): help='"new" state results, separated by `|` (default: %(default)s)', ) parser_range.add_argument( - "--old-runid", type=int, help="Run ID to start searching from" + "--old-runid", type=int, help="Run ID to start the bisection range" ) parser_range.add_argument( - "--new-runid", type=int, help="Run ID to search up to" + "--new-runid", type=int, help="Run ID to end the bisection range" ) parser_range.set_defaults( func=lambda args: find_failure_range( @@ -389,9 +389,7 @@ def find_failure_range( new_hash = git_hash start_index = current_index else: - raise CIResultsError( - f"Unhandled result in run {run}: {result}" - ) + raise CIResultsError(f"Unhandled result in run {run}: {result}") if end_index != start_index + 1: raise CIResultsError(