diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..1ec4fb0f --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,14 @@ +## Unreleased + +## v2.0.0 + +Most of the work has moved from GitHub Actions `.yml` files to Python code in `workflow.py`. +In the future, this will allow supporting more workflow engines beyond just GitHub Actions. + +**Migration note**: After running `python -m bench_runner install` to update your local files, but sure to add the new `workflow_bootstrap.py` file to your git repository. + +### New configuration + +Runners have a new configuration `use_cores` to control the number of CPU cores +used to build CPython. By default, this will use all available cores, but some +Cloud VMs require using fewer. diff --git a/README.md b/README.md index 416fc158..816f3f27 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,12 @@ If you don't want a machine to be included when the user selects "machine == 'al include_in_all = false ``` +You may limit the number of cores used to build Python with the `use_cores` option. This may be necessary, for example, on cloud VMs. + +``` +use_cores = 2 +``` + ### Try a benchmarking run There are instructions for running a benchmarking action already in the `README.md` of your repo. Look there and give it a try! diff --git a/bench_runner/__main__.py b/bench_runner/__main__.py index 8465fbb5..b843b9d3 100644 --- a/bench_runner/__main__.py +++ b/bench_runner/__main__.py @@ -14,13 +14,13 @@ "Get the merge base of the selected commit, and determine if it should run" ), "install": "Install the workflow files into a results repository", + "notify": "Send a notification about the completion of the workflow", "profiling_plot": "Generate the profiling plots from raw data", "purge": "Purge old results from a results repository", "remove_benchmark": "Remove specific benchmarks from the data set", "run_benchmarks": "Run benchmarks (in timing, pyperf or perf modes)", - "should_run": "Determine whether we need to rerun results for the current commit", "synthesize_loops_file": "Create a loops file from multiple benchmark results", - "notify": "Send a notification about the completion of the workflow", + "workflow": "Run the full compile/benchmark workflow", } if __name__ == "__main__": diff --git a/bench_runner/benchmark_definitions.py b/bench_runner/benchmark_definitions.py new file mode 100644 index 00000000..5ea214b4 --- /dev/null +++ b/bench_runner/benchmark_definitions.py @@ -0,0 +1,41 @@ +from __future__ import annotations + + +import dataclasses +import hashlib +from pathlib import Path + + +from . import git + + +@dataclasses.dataclass +class BenchmarkRepo: + hash: str + url: str + dirname: str + + +BENCHMARK_REPOS = [ + BenchmarkRepo( + "56d12a8fd7cc1432835965d374929bfa7f6f7a07", + "https://github.com/python/pyperformance.git", + "pyperformance", + ), + BenchmarkRepo( + "265655e7f03ace13ec1e00e1ba299179e69f8a00", + "https://github.com/pyston/python-macrobenchmarks.git", + "pyston-benchmarks", + ), +] + + +def get_benchmark_hash() -> str: + hash = hashlib.sha256() + for repo in BENCHMARK_REPOS: + if Path(repo.dirname).is_dir(): + current_hash = git.get_git_hash(Path(repo.dirname)) + else: + current_hash = repo.hash + hash.update(current_hash.encode("ascii")[:7]) + return hash.hexdigest()[:6] diff --git a/bench_runner/config.py b/bench_runner/config.py index 1ebd6b2d..2fffc7ae 100644 --- a/bench_runner/config.py +++ b/bench_runner/config.py @@ -4,6 +4,7 @@ import functools from pathlib import Path +from typing import Any try: import tomllib @@ -11,9 +12,21 @@ import tomli as tomllib # type: ignore +from . import runners + + @functools.cache def get_bench_runner_config( filepath: Path | str = Path("bench_runner.toml"), ): with Path(filepath).open("rb") as fd: return tomllib.load(fd) + + +def get_config_for_current_runner() -> dict[str, Any]: + config = get_bench_runner_config() + runner = runners.get_runner_for_hostname() + all_runners = config.get("runners", []) + if len(all_runners) >= 1: + return all_runners[0].get(runner.nickname, {}) + return {} diff --git a/bench_runner/git.py b/bench_runner/git.py index e6209e59..7f8120ed 100644 --- a/bench_runner/git.py +++ b/bench_runner/git.py @@ -2,9 +2,12 @@ from __future__ import annotations +import contextlib import datetime from pathlib import Path +import shutil import subprocess +import re import rich @@ -128,3 +131,40 @@ def get_commits_between(dirname: PathLike, ref1: str, ref2: str) -> list[str]: def bisect_commits(dirname: PathLike, ref1: str, ref2: str) -> str: commits = get_commits_between(dirname, ref1, ref2) return commits[len(commits) // 2] + + +def clone( + dirname: PathLike, + url: str, + *, + branch: str | None = None, + depth: int = 1, +) -> None: + is_hash = re.match(r"^[0-9a-f]{40}$", branch) if branch else False + + dirname = Path(dirname) + if dirname.is_dir(): + if is_hash and (dirname / ".git").is_dir() and get_git_hash(dirname) == branch: + # This is a git repo, and the hash matches + return + shutil.rmtree(dirname) + + # Fetching a hash and fetching a branch require different approaches + + if is_hash: + assert branch is not None + dirname.mkdir() + with contextlib.chdir(dirname): + subprocess.check_call(["git", "init"]) + subprocess.check_call(["git", "remote", "add", "origin", url]) + subprocess.check_call( + ["git", "fetch", "--depth", str(depth), "origin", branch] + ) + subprocess.check_call(["git", "checkout", branch]) + else: + args = ["git", "clone", url, str(dirname)] + if branch is not None: + args += ["--branch", branch] + if depth is not None: + args += ["--depth", str(depth)] + subprocess.check_call(args) diff --git a/bench_runner/result.py b/bench_runner/result.py index 2a47b759..9b3274a0 100644 --- a/bench_runner/result.py +++ b/bench_runner/result.py @@ -9,7 +9,6 @@ from operator import itemgetter from pathlib import Path import re -import socket import subprocess import sys from typing import Any, Callable, Iterable, Sequence @@ -524,7 +523,7 @@ def from_scratch( flags: Iterable[str] | None = None, ) -> "Result": result = cls( - _clean(runners.get_nickname_for_hostname(socket.gethostname())), + _clean(runners.get_nickname_for_hostname()), _clean(_get_architecture(python)), _clean_for_url(fork), _clean(ref[:20]), diff --git a/bench_runner/runners.py b/bench_runner/runners.py index dc96b4b3..f8b4e5eb 100644 --- a/bench_runner/runners.py +++ b/bench_runner/runners.py @@ -3,6 +3,7 @@ import functools import os +import socket from . import config @@ -80,13 +81,19 @@ def get_runners_by_nickname() -> dict[str, Runner]: return {x.nickname: x for x in get_runners()} -def get_nickname_for_hostname(hostname: str) -> str: +def get_nickname_for_hostname(hostname: str | None = None) -> str: # The envvar BENCHMARK_MACHINE_NICKNAME is used to override the machine that # results are reported for. if "BENCHMARK_MACHINE_NICKNAME" in os.environ: return os.environ["BENCHMARK_MACHINE_NICKNAME"] - return get_runners_by_hostname().get(hostname, unknown_runner).nickname + return get_runner_for_hostname(hostname).nickname def get_runner_by_nickname(nickname: str) -> Runner: return get_runners_by_nickname().get(nickname, unknown_runner) + + +def get_runner_for_hostname(hostname: str | None = None) -> Runner: + if hostname is None: + hostname = socket.gethostname() + return get_runners_by_hostname().get(hostname, unknown_runner) diff --git a/bench_runner/scripts/generate_results.py b/bench_runner/scripts/generate_results.py index f8f55361..cfb101b3 100644 --- a/bench_runner/scripts/generate_results.py +++ b/bench_runner/scripts/generate_results.py @@ -126,7 +126,11 @@ def sort_runner_names(runner_names: Iterable[str]) -> list[str]: def sorter(val): if val is None: return () - return order.index(val.split()[0]), val + try: + idx = order.index(val.split()[0]) + except ValueError: + idx = -1 + return idx, val return sorted(runner_names, key=sorter) diff --git a/bench_runner/scripts/get_merge_base.py b/bench_runner/scripts/get_merge_base.py index 2ea7cfd6..4209d4bd 100644 --- a/bench_runner/scripts/get_merge_base.py +++ b/bench_runner/scripts/get_merge_base.py @@ -6,10 +6,10 @@ import rich_argparse +from bench_runner import benchmark_definitions from bench_runner import flags as mflags from bench_runner import git from bench_runner.result import has_result -from bench_runner import util from bench_runner.util import PathLike @@ -55,7 +55,7 @@ def _main( machine, pystats, flags, - util.get_benchmark_hash(), + benchmark_definitions.get_benchmark_hash(), progress=False, ) is None diff --git a/bench_runner/scripts/install.py b/bench_runner/scripts/install.py index ec80b305..9275338e 100644 --- a/bench_runner/scripts/install.py +++ b/bench_runner/scripts/install.py @@ -241,13 +241,11 @@ def generate_generic(dst: Any) -> Any: def _main(check: bool) -> None: WORKFLOW_PATH.mkdir(parents=True, exist_ok=True) - env = load_yaml(TEMPLATE_PATH / "env.yml") - for path in TEMPLATE_PATH.glob("*"): if path.name.endswith(".src.yml") or path.name == "env.yml": continue - if not (ROOT_PATH / path.name).is_file(): + if not (ROOT_PATH / path.name).is_file() or path.suffix == ".py": if check: fail_check(ROOT_PATH / path.name) else: @@ -258,7 +256,6 @@ def _main(check: bool) -> None: generator = GENERATORS.get(src_path.name, generate_generic) src = load_yaml(src_path) dst = generator(src) - dst = {"env": env, **dst} write_yaml(dst_path, dst, check) diff --git a/bench_runner/scripts/run_benchmarks.py b/bench_runner/scripts/run_benchmarks.py index 01a3ad66..fa1249dc 100644 --- a/bench_runner/scripts/run_benchmarks.py +++ b/bench_runner/scripts/run_benchmarks.py @@ -18,6 +18,7 @@ import rich_argparse +from bench_runner import benchmark_definitions from bench_runner import flags from bench_runner import git from bench_runner.result import Result @@ -278,7 +279,7 @@ def update_metadata( merge_base = git.get_git_merge_base(cpython) if merge_base is not None: metadata["commit_merge_base"] = merge_base - metadata["benchmark_hash"] = util.get_benchmark_hash() + metadata["benchmark_hash"] = benchmark_definitions.get_benchmark_hash() if run_id is not None: metadata["github_action_url"] = f"{GITHUB_URL}/actions/runs/{run_id}" actor = os.environ.get("GITHUB_ACTOR") diff --git a/bench_runner/scripts/should_run.py b/bench_runner/scripts/should_run.py deleted file mode 100644 index 3d8f42f4..00000000 --- a/bench_runner/scripts/should_run.py +++ /dev/null @@ -1,109 +0,0 @@ -# Determines if this should run. -# If force is `true`, we always run, otherwise, we only run if we don't have -# results. - -import argparse -from pathlib import Path -import subprocess -import sys - - -import rich_argparse - - -# NOTE: This file should import in Python 3.9 or later so it can at least print -# the error message that the version of Python is too old. - - -def _main( - force: bool, - fork: str, - ref: str, - machine: str, - pystats: bool, - flag_str: str, - cpython: Path = Path("cpython"), - results_dir: Path = Path("results"), -) -> None: - if sys.version_info[:2] < (3, 10): - print( - "The benchmarking infrastructure requires Python 3.10 or later.", - file=sys.stderr, - ) - sys.exit(1) - - # Now that we've assert we are Python 3.11 or later, we can import - # parts of our library. - from bench_runner import flags as mflags - from bench_runner import git - from bench_runner.result import has_result - from bench_runner import util - - flags = mflags.parse_flags(flag_str) - - if "PYTHON_UOPS" in flags and "JIT" in flags: - print("Tier 2 interpreter and JIT may not be selected at the same time") - sys.exit(1) - - try: - commit_hash = git.get_git_hash(cpython) - except subprocess.CalledProcessError: - # This will fail if the cpython checkout failed for some reason. Print - # a nice error message since the one the checkout itself gives is - # totally inscrutable. - print("The checkout of cpython failed.", file=sys.stderr) - print(f"You specified fork {fork!r} and ref {ref!r}.", file=sys.stderr) - print("Are you sure you entered the fork and ref correctly?", file=sys.stderr) - # Fail the rest of the workflow - sys.exit(1) - - found_result = has_result( - results_dir, - commit_hash, - machine, - pystats, - flags, - util.get_benchmark_hash(), - progress=False, - ) - - if force: - if found_result is not None: - for filepath in found_result.filename.parent.iterdir(): - if filepath.suffix != ".json": - git.remove(results_dir.parent, filepath) - should_run = True - else: - should_run = (machine in ("__really_all", "all")) or found_result is None - - print(f"should_run={str(should_run).lower()}") - - -def main(): - parser = argparse.ArgumentParser( - description="Do we need to run this commit?", - formatter_class=rich_argparse.ArgumentDefaultsRichHelpFormatter, - ) - parser.add_argument( - "force", - help="If true, force a re-run", - ) - parser.add_argument("fork") - parser.add_argument("ref") - parser.add_argument("machine") - parser.add_argument("pystats") - parser.add_argument("flags") - args = parser.parse_args() - - _main( - args.force != "false", - args.fork, - args.ref, - args.machine, - args.pystats != "false", - args.flags, - ) - - -if __name__ == "__main__": - main() diff --git a/bench_runner/scripts/workflow.py b/bench_runner/scripts/workflow.py new file mode 100644 index 00000000..1364696a --- /dev/null +++ b/bench_runner/scripts/workflow.py @@ -0,0 +1,394 @@ +from __future__ import annotations + + +import argparse +import contextlib +import os +from pathlib import Path +import shlex +import shutil +import subprocess +import sys + + +import rich_argparse + + +from bench_runner import benchmark_definitions +from bench_runner import config +from bench_runner import flags as mflags +from bench_runner import git +from bench_runner.result import has_result +from bench_runner import util +from bench_runner.util import PathLike + + +from bench_runner.scripts import run_benchmarks as mrun_benchmarks + + +def get_windows_build_dir(force_32bit: bool) -> Path: + if force_32bit: + return Path("PCbuild") / "win32" + return Path("PCbuild") / "amd64" + + +def get_exe_path(cpython: Path, flags: list[str], force_32bit: bool) -> Path: + match util.get_simple_platform(): + case "linux": + return cpython / "python" + case "macos": + return cpython / "python.exe" + case "windows": + build_dir = cpython / get_windows_build_dir(force_32bit) + if "NOGIL" in flags: + exe = next(build_dir.glob("python3.*.exe")) + else: + exe = build_dir / "python.exe" + return exe + + +def run_in_venv( + venv: PathLike, module: str, cmd: list[str], sudo: bool = False +) -> None: + venv = Path(venv) + + if util.get_simple_platform() == "windows": + exe = venv / "Scripts" / "python.exe" + else: + exe = venv / "bin" / "python" + + args = [ + str(exe), + "-m", + module, + *cmd, + ] + + if sudo: + ld_library_path = os.environ.get("LD_LIBRARY_PATH", "") + args = ["sudo", f"LD_LIBRARY_PATH={ld_library_path}"] + args + + print("Running command:", " ".join(args)) + subprocess.check_call(args) + + +def should_run( + force: bool, + fork: str, + ref: str, + machine: str, + pystats: bool, + flags: list[str], + cpython: Path = Path("cpython"), + results_dir: Path = Path("results"), +) -> bool: + try: + commit_hash = git.get_git_hash(cpython) + except subprocess.CalledProcessError: + # This will fail if the cpython checkout failed for some reason. Print + # a nice error message since the one the checkout itself gives is + # totally inscrutable. + print("The checkout of cpython failed.", file=sys.stderr) + print(f"You specified fork {fork!r} and ref {ref!r}.", file=sys.stderr) + print("Are you sure you entered the fork and ref correctly?", file=sys.stderr) + # Fail the rest of the workflow + sys.exit(1) + + found_result = has_result( + results_dir, + commit_hash, + machine, + pystats, + flags, + benchmark_definitions.get_benchmark_hash(), + progress=False, + ) + + if force: + if found_result is not None: + for filepath in found_result.filename.parent.iterdir(): + if filepath.suffix != ".json": + git.remove(results_dir.parent, filepath) + should_run = True + else: + should_run = (machine in ("__really_all", "all")) or found_result is None + + return should_run + + +def checkout_cpython(fork: str, ref: str, cpython: PathLike = Path("cpython")): + git.clone(cpython, f"https://github.com/{fork}/cpython.git", branch=ref, depth=50) + + +def checkout_benchmarks(): + for repo in benchmark_definitions.BENCHMARK_REPOS: + git.clone( + Path(repo.dirname), + repo.url, + branch=repo.hash, + depth=1, + ) + + +def compile_unix(cpython: PathLike, flags: list[str], pgo: bool, pystats: bool) -> None: + cpython = Path(cpython) + cfg = config.get_config_for_current_runner() + + env = os.environ.copy() + if "CLANG" in flags: + match util.get_simple_platform(): + case "linux": + env["CC"] = util.safe_which("clang-19") + env["LLVM_AR"] = util.safe_which("llvm-ar-19") + env["LLVM_PROFDATA"] = util.safe_which("llvm-profdata-19") + case "macos": + llvm_prefix = util.get_brew_prefix("llvm") + env["PATH"] = f"{llvm_prefix}/bin:{env['PATH']}" + env["CC"] = f"{llvm_prefix}/bin/clang" + env["LDFLAGS"] = f"-L{llvm_prefix}/lib" + env["CFLAGS"] = f"-I{llvm_prefix}/include" + + if util.get_simple_platform() == "macos": + openssl_prefix = util.get_brew_prefix("openssl@1.1") + env["PKG_CONFIG_PATH"] = f"{openssl_prefix}/lib/pkgconfig" + + args = [] + if pystats: + args.append("--enable-pystats") + if pgo: + args.extend(["--enable-optimizations", "--with-lto=full"]) + if "PYTHON_UOPS" in flags: + assert "JIT" not in flags + args.append("--enable-experimental-jit=interpreter") + if "JIT" in flags: + assert "PYTHON_UOPS" not in flags + args.append("--enable-experimental-jit=yes") + if "NOGIL" in flags: + args.append("--disable-gil") + if "CLANG" in flags: + args.append("--with-tail-call-interp") + args.append("--enable-option-checking=fatal") + if configure_flags := os.environ.get("PYTHON_CONFIGURE_FLAGS"): + args.extend(shlex.split(configure_flags)) + + make_args = [] + if cores := cfg.get("use_cores", None): + make_args.extend(["-j", str(cores)]) + else: + make_args.extend(["-j"]) + + with contextlib.chdir(cpython): + subprocess.check_call(["./configure", *args], env=env) + subprocess.check_call(["make", *make_args], env=env) + + +def compile_windows( + cpython: PathLike, flags: list[str], pgo: bool, force_32bit: bool +) -> None: + cpython = Path(cpython) + + args = ["--%"] # This is the PowerShell "stop parsing" flag + if force_32bit: + args.extend(["-p", "win32"]) + args.extend(["-c", "Release"]) + if pgo: + args.append("--pgo") + if "JIT" in flags: + args.append("--experimental-jit") + if "PYTHON_UOPS" in flags: + args.append("--experimental-jit-interpreter") + if "NOGIL" in flags: + args.append("--disable-gil") + if "CLANG" in flags: + args.extend( + [ + "--tail-call-interp", + '"/p:PlatformToolset=clangcl"', + '"/p:LLVMInstallDir=C:\\Program Files\\LLVM"', + '"/p:LLVMToolsVersion=19.1.6"', + ] + ) + + with contextlib.chdir(cpython): + subprocess.check_call( + [ + "powershell.exe", + Path("PCbuild") / "build.bat", + *args, + ], + ) + shutil.copytree(get_windows_build_dir(force_32bit), "libs", dirs_exist_ok=True) + + +def install_pyperformance(venv: PathLike) -> None: + run_in_venv(venv, "pip", ["install", "./pyperformance"]) + + +def tune_system(venv: PathLike, perf: bool) -> None: + # System tuning is Linux only + if util.get_simple_platform() != "linux": + return + + args = ["system", perf and "reset" or "tune"] + if cpu_affinity := os.environ.get("CPU_AFFINITY"): + args.append(f'--affinity="{cpu_affinity}"') + + run_in_venv(venv, "pyperf", args, sudo=True) + + if perf: + subprocess.check_call( + [ + "sudo", + "bash", + "-c", + "echo 100000 > /proc/sys/kernel/perf_event_max_sample_rate", + ] + ) + + +def reset_system(venv: PathLike) -> None: + # System tuning is Linux only + if util.get_simple_platform() != "linux": + return + + run_in_venv( + venv, + "pyperf", + ["system", "reset"], + sudo=True, + ) + + +def _main( + fork: str, + ref: str, + machine: str, + benchmarks: str, + flags: list[str], + force: bool, + pgo: bool, + perf: bool, + pystats: bool, + force_32bit: bool, + run_id: str | None = None, +): + venv = Path("venv") + cpython = Path("cpython") + platform = util.get_simple_platform() + + if force_32bit and platform != "windows": + raise RuntimeError("32-bit builds are only supported on Windows") + if perf and platform != "linux": + raise RuntimeError("perf profiling is only supported on Linux") + if pystats and platform != "linux": + raise RuntimeError("Pystats is only supported on Linux") + + checkout_cpython(fork, ref, cpython) + + if not should_run(force, fork, ref, machine, False, flags, cpython=cpython): + print("No need to run benchmarks. Skipping...") + return + + checkout_benchmarks() + + match platform: + case "linux" | "macos": + compile_unix(cpython, flags, pgo, pystats) + case "windows": + compile_windows(cpython, flags, pgo, force_32bit) + + # Print out the version of Python we built just so we can confirm it's the + # right thing in the logs + subprocess.check_call([get_exe_path(cpython, flags, force_32bit), "-VV"]) + + install_pyperformance(venv) + tune_system(venv, perf) + + try: + if Path(".debug").exists(): + shutil.rmtree(".debug") + + pystats_dir = Path("/tmp") / "py_stats" + if pystats: + shutil.rmtree(pystats_dir, ignore_errors=True) + pystats_dir.mkdir(parents=True) + + if perf: + mode = "perf" + elif pystats: + mode = "pystats" + else: + mode = "benchmark" + + mrun_benchmarks._main( + mode, + get_exe_path(cpython, flags, force_32bit), + fork, + ref, + benchmarks, + flags=flags, + run_id=run_id, + test_mode=False, + individual=pystats, + ) + finally: + reset_system(venv) + + +def main(): + parser = argparse.ArgumentParser( + description=""" + Run the full compile/benchmark workflow. + """, + formatter_class=rich_argparse.ArgumentDefaultsRichHelpFormatter, + ) + parser.add_argument("fork", help="The fork of CPython") + parser.add_argument("ref", help="The git ref in the fork") + parser.add_argument( + "machine", + help="The machine to run the benchmarks on.", + ) + parser.add_argument("benchmarks", help="The benchmarks to run") + parser.add_argument("flags", help="Configuration flags") + parser.add_argument("--force", action="store_true", help="Force a re-run") + parser.add_argument( + "--pgo", + action="store_true", + help="Build with profiling guided optimization", + ) + parser.add_argument( + "--perf", + action="store_true", + help="Collect Linux perf profiling data (Linux only)", + ) + parser.add_argument( + "--pystats", + action="store_true", + help="Enable Pystats (Linux only)", + ) + parser.add_argument( + "--32bit", + action="store_true", + dest="force_32bit", + help="Do a 32-bit build (Windows only)", + ) + parser.add_argument("--run_id", default=None, type=str, help="The github run id") + args = parser.parse_args() + + _main( + args.fork, + args.ref, + args.machine, + args.benchmarks, + mflags.parse_flags(args.flags), + args.force, + args.pgo, + args.perf, + args.pystats, + args.force_32bit, + args.run_id, + ) + + +if __name__ == "__main__": + main() diff --git a/bench_runner/templates/_benchmark.src.yml b/bench_runner/templates/_benchmark.src.yml index 5b80eba8..f7178a27 100644 --- a/bench_runner/templates/_benchmark.src.yml +++ b/bench_runner/templates/_benchmark.src.yml @@ -71,65 +71,11 @@ jobs: - name: git gc run: | git gc - - name: Checkout CPython - uses: actions/checkout@v4 - with: - persist-credentials: false - repository: ${{ inputs.fork }}/cpython - path: cpython - ref: ${{ inputs.ref }} - fetch-depth: 50 - - name: Install dependencies from PyPI - run: | - Remove-Item venv -Recurse -ErrorAction SilentlyContinue - py -m venv venv - venv\Scripts\python.exe -m pip install --upgrade pip - venv\Scripts\python.exe -m pip install -r requirements.txt - - name: Should we run? - if: ${{ always() }} - id: should_run - run: | - venv\Scripts\python.exe -m bench_runner should_run ${{ inputs.force }} ${{ inputs.fork }} ${{ inputs.ref }} ${{ inputs.machine }} false "${{ env.flags }}" >> $GITHUB_OUTPUT - - name: Checkout python-macrobenchmarks - uses: actions/checkout@v4 - if: ${{ steps.should_run.outputs.should_run != 'false' }} - with: - persist-credentials: false - repository: pyston/python-macrobenchmarks - path: pyston-benchmarks - ref: ${{ env.PYSTON_BENCHMARKS_HASH }} - - name: Checkout pyperformance - uses: actions/checkout@v4 - if: ${{ steps.should_run.outputs.should_run != 'false' }} - with: - persist-credentials: false - repository: mdboom/pyperformance - path: pyperformance - ref: ${{ env.PYPERFORMANCE_HASH }} - - name: Build Python - if: ${{ steps.should_run.outputs.should_run != 'false' }} - # The build.bat script is much easier to use from cmd - shell: cmd - run: | - cd cpython - PCbuild\build.bat %BUILD_FLAGS% ${{ (inputs.pgo == true) && '--pgo' || '' }} ${{ inputs.clang == true && '--tail-call-interp' || '' }} ${{ inputs.jit == true && '--experimental-jit' || '' }} ${{ inputs.tier2 == true && '--experimental-jit-interpreter' || '' }} ${{ inputs.nogil == true && '--disable-gil' || '' }} -c Release ${{ inputs.clang == true && '"/p:PlatformToolset=clangcl"' || '' }} ${{ inputs.clang == true && '"/p:LLVMInstallDir=C:\Program Files\LLVM"' || '' }} ${{ inputs.clang == true && '"/p:LLVMToolsVersion=19.1.6"' || '' }} - - name: Copy Python to different location - if: ${{ steps.should_run.outputs.should_run != 'false' }} + - name: Building Python and running pyperformance run: | - # Copy the build products to a place that libraries can find them. - cd cpython - Copy-Item -Path $env:BUILD_DEST -Destination "libs" -Recurse - - name: Install pyperformance - if: ${{ steps.should_run.outputs.should_run != 'false' }} - run: | - venv\Scripts\python.exe -m pip install .\pyperformance - - name: Running pyperformance - if: ${{ steps.should_run.outputs.should_run != 'false' }} - run: | - venv\Scripts\python.exe -m bench_runner run_benchmarks benchmark ${{ (inputs.nogil == true && '(get-item cpython/$env:BUILD_DEST/python3.*.exe).FullName' || 'cpython/$env:BUILD_DEST/python.exe') }} ${{ inputs.fork }} ${{ inputs.ref }} ${{ inputs.benchmarks || 'all' }} "${{ env.flags }}" --run_id ${{ github.run_id }} + python workflow_bootstrap.py ${{ inputs.fork }} ${{ inputs.ref }} ${{ inputs.machine }} ${{ inputs.benchmarks || 'all' }} "${{ env.flags }}" ${{ inputs.force && '--force' || '' }} ${{ inputs.pgo && '--pgo' || '' }} --run_id ${{ github.run_id }} # Pull again, since another job may have committed results in the meantime - name: Pull benchmarking - if: ${{ steps.should_run.outputs.should_run != 'false' }} run: | # Another benchmarking task may have created results for the same # commit while the above was running. This "magic" incantation means @@ -137,12 +83,10 @@ jobs: # just pulled in in that case. git pull -s recursive -X ours --autostash --rebase - name: Add data to repo - if: ${{ steps.should_run.outputs.should_run != 'false' }} uses: EndBug/add-and-commit@v9 with: add: results - name: Upload artifacts - if: ${{ steps.should_run.outputs.should_run != 'false' }} uses: actions/upload-artifact@v4 with: name: benchmark @@ -161,79 +105,17 @@ jobs: run: | git gc - uses: fregante/setup-git-user@v2 - - name: Checkout CPython - uses: actions/checkout@v4 - with: - persist-credentials: false - repository: ${{ inputs.fork }}/cpython - path: cpython - ref: ${{ inputs.ref }} - fetch-depth: 50 - - name: Install dependencies from PyPI - run: | - rm -rf venv - python -m venv venv - venv/bin/python -m pip install --upgrade pip - venv/bin/python -m pip install -r requirements.txt - - name: Should we run? - if: ${{ always() }} - id: should_run - run: | - venv/bin/python -m bench_runner should_run ${{ inputs.force }} ${{ inputs.fork }} ${{ inputs.ref }} ${{ inputs.machine }} false ${{ env.flags }} >> $GITHUB_OUTPUT - - name: Checkout python-macrobenchmarks - uses: actions/checkout@v4 - if: ${{ steps.should_run.outputs.should_run != 'false' }} - with: - persist-credentials: false - repository: pyston/python-macrobenchmarks - path: pyston-benchmarks - ref: ${{ env.PYSTON_BENCHMARKS_HASH }} - - name: Checkout pyperformance - uses: actions/checkout@v4 - if: ${{ steps.should_run.outputs.should_run != 'false' }} + - name: Setup system Python + if: ${{ runner.arch == 'X64' }} + uses: actions/setup-python@v5 with: - persist-credentials: false - repository: mdboom/pyperformance - path: pyperformance - ref: ${{ env.PYPERFORMANCE_HASH }} - - name: Build with clang - if: ${{ inputs.clang }} + python-version: "3.11" + - name: Building Python and running pyperformance run: | - echo "CC=`which clang-19`" >> $GITHUB_ENV - echo "LLVM_AR=`which llvm-ar-19`" >> $GITHUB_ENV - echo "LLVM_PROFDATA=`which llvm-profdata-19`" >> $GITHUB_ENV - - name: Build Python - if: ${{ steps.should_run.outputs.should_run != 'false' }} - run: | - cd cpython - ./configure --enable-option-checking=fatal ${{ inputs.pgo == true && '--enable-optimizations --with-lto=full' || '' }} ${{ inputs.tier2 == true && '--enable-experimental-jit=interpreter' || '' }} ${{ inputs.jit == true && '--enable-experimental-jit=yes' || '' }} ${{ inputs.nogil == true && '--disable-gil' || '' }} ${{ inputs.clang == true && '--with-tail-call-interp' || '' }} ${PYTHON_CONFIGURE_FLAGS:-} - make ${{ runner.arch == 'ARM64' && '-j' || '-j4' }} - ./python -VV - - name: Install pyperformance - if: ${{ steps.should_run.outputs.should_run != 'false' }} - run: | - venv/bin/python -m pip install ./pyperformance - - name: Tune system - if: ${{ steps.should_run.outputs.should_run != 'false' }} - run: | - sudo LD_LIBRARY_PATH=$LD_LIBRARY_PATH venv/bin/python -m pyperf system ${{ inputs.perf && 'reset' || 'tune ${CPU_AFFINITY:+--affinity="$CPU_AFFINITY"}' }} - - name: Tune for (Linux) perf - if: ${{ steps.should_run.outputs.should_run != 'false' && inputs.perf }} - run: | - # Must match the PERF_PERIOD value in profiling_plot.py - sudo bash -c "echo 100000 > /proc/sys/kernel/perf_event_max_sample_rate" - - name: Running pyperformance - if: ${{ steps.should_run.outputs.should_run != 'false' }} - run: | - rm -rf ~/.debug/* - venv/bin/python -m bench_runner run_benchmarks ${{ inputs.perf && 'perf' || 'benchmark' }} cpython/python ${{ inputs.fork }} ${{ inputs.ref }} ${{ inputs.benchmarks || 'all' }} ${{ env.flags }} --run_id ${{ github.run_id }} - - name: Untune system - if: ${{ steps.should_run.outputs.should_run != 'false' }} - run: | - sudo LD_LIBRARY_PATH=$LD_LIBRARY_PATH venv/bin/python -m pyperf system reset + python workflow_bootstrap.py ${{ inputs.fork }} ${{ inputs.ref }} ${{ inputs.machine }} ${{ inputs.benchmarks || 'all' }} ${{ env.flags }} ${{ inputs.force && '--force' || '' }} ${{ inputs.pgo && '--pgo' || '' }} ${{ inputs.perf && '--perf' || '' }} --run_id ${{ github.run_id }} # Pull again, since another job may have committed results in the meantime - name: Pull benchmarking - if: ${{ steps.should_run.outputs.should_run != 'false' && !inputs.perf }} + if: ${{ !inputs.perf }} run: | # Another benchmarking task may have created results for the same # commit while the above was running. This "magic" incantation means @@ -241,12 +123,12 @@ jobs: # just pulled in in that case. git pull -s recursive -X ours --autostash --rebase - name: Adding data to repo - if: ${{ steps.should_run.outputs.should_run != 'false' && !inputs.perf }} + if: ${{ !inputs.perf }} uses: EndBug/add-and-commit@v9 with: add: results - name: Upload benchmark artifacts - if: ${{ steps.should_run.outputs.should_run != 'false' && !inputs.perf }} + if: ${{ !inputs.perf }} uses: actions/upload-artifact@v4 with: name: benchmark @@ -254,7 +136,7 @@ jobs: benchmark.json overwrite: true - name: Upload perf artifacts - if: ${{ steps.should_run.outputs.should_run != 'false' && inputs.perf }} + if: ${{ inputs.perf }} uses: actions/upload-artifact@v4 with: name: perf @@ -270,70 +152,11 @@ jobs: - name: git gc run: | git gc - - name: Checkout CPython - uses: actions/checkout@v4 - with: - persist-credentials: false - repository: ${{ inputs.fork }}/cpython - path: cpython - ref: ${{ inputs.ref }} - fetch-depth: 50 - - name: Install dependencies from PyPI - run: | - rm -rf venv - python3 -m venv venv - venv/bin/python -m pip install --upgrade pip - venv/bin/python -m pip install -r requirements.txt - - name: Should we run? - if: ${{ always() }} - id: should_run - run: | - venv/bin/python -m bench_runner should_run ${{ inputs.force }} ${{ inputs.force }} ${{ inputs.ref }} ${{ inputs.machine }} false ${{ env.flags }} >> $GITHUB_OUTPUT - - name: Checkout python-macrobenchmarks - uses: actions/checkout@v4 - if: ${{ steps.should_run.outputs.should_run != 'false' }} - with: - persist-credentials: false - repository: pyston/python-macrobenchmarks - path: pyston-benchmarks - ref: ${{ env.PYSTON_BENCHMARKS_HASH }} - - name: Checkout pyperformance - uses: actions/checkout@v4 - if: ${{ steps.should_run.outputs.should_run != 'false' }} - with: - persist-credentials: false - repository: mdboom/pyperformance - path: pyperformance - ref: ${{ env.PYPERFORMANCE_HASH }} - - name: Setup environment - if: ${{ steps.should_run.outputs.should_run != 'false' }} - run: | - echo "PKG_CONFIG_PATH=$(brew --prefix openssl@1.1)/lib/pkgconfig" >> $GITHUB_ENV - - name: Build with clang - if: ${{ inputs.clang }} - run: | - echo "PATH=$(brew --prefix llvm)/bin:$PATH" >> $GITHUB_ENV - echo "CC=$(brew --prefix llvm)/bin/clang" >> $GITHUB_ENV - echo "LDFLAGS=-L$(brew --prefix llvm)/lib" >> $GITHUB_ENV - echo "CFLAGS=-I$(brew --prefix llvm)/include" >> $GITHUB_ENV - - name: Build Python - if: ${{ steps.should_run.outputs.should_run != 'false' }} - run: | - cd cpython - ./configure --enable-option-checking=fatal ${{ inputs.pgo == true && '--enable-optimizations --with-lto=full' || '' }} ${{ inputs.tier2 == true && '--enable-experimental-jit=interpreter' || '' }} ${{ inputs.jit == true && '--enable-experimental-jit=yes' || '' }} ${{ inputs.nogil == true && '--disable-gil' || '' }} ${{ inputs.clang == true && '--with-tail-call-interp' || '' }} ${PYTHON_CONFIGURE_FLAGS:-} - make -j4 - ./python.exe -VV - - name: Install pyperformance - if: ${{ steps.should_run.outputs.should_run != 'false' }} - run: | - venv/bin/python -m pip install ./pyperformance - - name: Running pyperformance - if: ${{ steps.should_run.outputs.should_run != 'false' }} + - name: Building Python and running pyperformance run: | - venv/bin/python -m bench_runner run_benchmarks benchmark cpython/python.exe ${{ inputs.fork }} ${{ inputs.ref }} ${{ inputs.benchmarks || 'all' }} ${{ env.flags }} --run_id ${{ github.run_id }} + python3 workflow_bootstrap.py ${{ inputs.fork }} ${{ inputs.ref }} ${{ inputs.machine }} ${{ inputs.benchmarks || 'all' }} ${{ env.flags }} ${{ inputs.force && '--force' || '' }} ${{ inputs.pgo && '--pgo' || '' }} --run_id ${{ github.run_id }} # Pull again, since another job may have committed results in the meantime - name: Pull benchmarking - if: ${{ steps.should_run.outputs.should_run != 'false' }} run: | # Another benchmarking task may have created results for the same # commit while the above was running. This "magic" incantation means @@ -341,12 +164,10 @@ jobs: # just pulled in in that case. git pull -s recursive -X ours --autostash --rebase - name: Add data to repo - if: ${{ steps.should_run.outputs.should_run != 'false' }} uses: EndBug/add-and-commit@v9 with: add: results - name: Upload artifacts - if: ${{ steps.should_run.outputs.should_run != 'false' }} uses: actions/upload-artifact@v4 with: name: benchmark diff --git a/bench_runner/templates/_pystats.src.yml b/bench_runner/templates/_pystats.src.yml index b5272edb..8ef1b143 100644 --- a/bench_runner/templates/_pystats.src.yml +++ b/bench_runner/templates/_pystats.src.yml @@ -18,9 +18,6 @@ name: _pystats force: description: "Rerun and replace results if commit already exists" type: boolean - individual: - description: "Collect pystats for each individual benchmark" - type: boolean workflow_call: inputs: @@ -39,9 +36,6 @@ name: _pystats force: description: "Rerun and replace results if commit already exists" type: boolean - individual: - description: "Collect pystats for each individual benchmark" - type: boolean jobs: collect-stats: @@ -56,63 +50,10 @@ jobs: uses: actions/setup-python@v5 with: python-version: "3.11" - - name: Checkout CPython - uses: actions/checkout@v4 - with: - persist-credentials: false - repository: ${{ inputs.fork }}/cpython - ref: ${{ inputs.ref }} - path: cpython - fetch-depth: 50 - - name: Install dependencies from PyPI - run: | - rm -rf venv - python -m venv venv - venv/bin/python -m pip install -r requirements.txt - - name: Should we run? - if: ${{ always() }} - id: should_run - run: | - venv/bin/python -m bench_runner should_run ${{ inputs.force }} ${{ inputs.fork }} ${{ inputs.ref }} all true ${{ env.flags }} >> $GITHUB_OUTPUT - - name: Checkout python-macrobenchmarks - uses: actions/checkout@v4 - if: ${{ steps.should_run.outputs.should_run != 'false' }} - with: - persist-credentials: false - repository: pyston/python-macrobenchmarks - path: pyston-benchmarks - ref: ${{ env.PYSTON_BENCHMARKS_HASH }} - - name: Checkout pyperformance - uses: actions/checkout@v4 - if: ${{ steps.should_run.outputs.should_run != 'false' }} - with: - persist-credentials: false - repository: mdboom/pyperformance - path: pyperformance - ref: ${{ env.PYPERFORMANCE_HASH }} - - name: Create pystats directory - if: ${{ steps.should_run.outputs.should_run != 'false' }} - run: | - # If we don't do this, stats are printed to the console - rm -rf /tmp/py_stats - mkdir /tmp/py_stats - - name: Build Python - if: ${{ steps.should_run.outputs.should_run != 'false' }} - run: | - cd cpython - ./configure --enable-option-checking=fatal --enable-pystats --prefix=$PWD/install ${{ inputs.tier2 == true && '--enable-experimental-jit=interpreter' || '' }} ${{ inputs.jit == true && '--enable-experimental-jit=yes' || '' }} ${{ inputs.nogil == true && '--disable-gil' || '' }} - make -j4 - make install - - name: Install pyperformance into the system python - if: ${{ steps.should_run.outputs.should_run != 'false' }} - run: | - venv/bin/python -m pip install --no-binary :all: ./pyperformance - - name: Running pyperformance - if: ${{ steps.should_run.outputs.should_run != 'false' }} + - name: Build CPython and run pyperformance benchmarks run: | - venv/bin/python -m bench_runner run_benchmarks pystats cpython/python ${{ inputs.fork }} ${{ inputs.ref }} ${{ inputs.benchmarks || 'all' }} ${{ env.flags }} --run_id ${{ github.run_id }} ${{ inputs.individual == true && '--individual' || '' }} + python workflow_bootstrap.py --pystats ${{ inputs.fork }} ${{ inputs.ref }} all ${{ inputs.benchmarks || 'all' }} ${{ env.flags }} --run_id ${{ github.run_id }} - name: Pull benchmarking - if: ${{ steps.should_run.outputs.should_run != 'false' }} run: | # Another benchmarking task may have created results for the same # commit while the above was running. This "magic" incantation means @@ -120,7 +61,6 @@ jobs: # just pulled in in that case. git pull -s recursive -X ours --autostash --rebase - name: Add data to repo - if: ${{ steps.should_run.outputs.should_run != 'false' }} uses: EndBug/add-and-commit@v9 with: add: results diff --git a/bench_runner/templates/benchmark.src.yml b/bench_runner/templates/benchmark.src.yml index f5e90c62..d569da9f 100644 --- a/bench_runner/templates/benchmark.src.yml +++ b/bench_runner/templates/benchmark.src.yml @@ -104,7 +104,6 @@ jobs: fork: ${{ inputs.fork }} ref: ${{ inputs.ref }} benchmarks: ${{ inputs.benchmarks }} - individual: true force: true secrets: inherit @@ -116,7 +115,6 @@ jobs: fork: python ref: ${{ needs.determine_base.outputs.ref }} benchmarks: ${{ inputs.benchmarks }} - individual: true force: false secrets: inherit diff --git a/bench_runner/templates/env.yml b/bench_runner/templates/env.yml deleted file mode 100644 index e7268417..00000000 --- a/bench_runner/templates/env.yml +++ /dev/null @@ -1,2 +0,0 @@ -PYPERFORMANCE_HASH: 56d12a8fd7cc1432835965d374929bfa7f6f7a07 -PYSTON_BENCHMARKS_HASH: 265655e7f03ace13ec1e00e1ba299179e69f8a00 diff --git a/bench_runner/templates/workflow_bootstrap.py b/bench_runner/templates/workflow_bootstrap.py new file mode 100644 index 00000000..f919c233 --- /dev/null +++ b/bench_runner/templates/workflow_bootstrap.py @@ -0,0 +1,79 @@ +# This script may only use the standard library, since it bootstraps setting up +# the virtual environment to run the full bench_runner. + + +# NOTE: This file should import in Python 3.9 or later so it can at least print +# the error message that the version of Python is too old. + + +from pathlib import Path +import shutil +import subprocess +import sys + + +def create_venv(venv: Path) -> None: + if venv.exists(): + shutil.rmtree(venv) + + subprocess.check_call( + [ + sys.executable, + "-m", + "venv", + str(venv), + ] + ) + + +def run_in_venv( + venv: Path, module: str, cmd: list[str], prefix: list[str] = [] +) -> None: + venv = Path(venv) + + if sys.platform.startswith("win"): + exe = Path("Scripts") / "python.exe" + else: + exe = Path("bin") / "python" + + args = [ + *prefix, + str(venv / exe), + "-m", + module, + *cmd, + ] + + print("Running command:", " ".join(args)) + subprocess.check_call(args) + + +def install_requirements(venv: Path) -> None: + run_in_venv(venv, "pip", ["install", "--upgrade", "pip"]) + run_in_venv(venv, "pip", ["install", "-r", "requirements.txt"]) + + +def main(): + venv = Path("venv") + create_venv(venv) + install_requirements(venv) + + # Now that we've installed the full bench_runner library, + # continue on in a new process... + + last_arg = sys.argv.index("workflow_bootstrap.py") + if last_arg == -1: + raise ValueError("Couldn't parse command line") + + run_in_venv(venv, "bench_runner", ["workflow", *sys.argv[last_arg + 1 :]]) + + +if __name__ == "__main__": + if sys.version_info[:2] < (3, 11): + print( + "The benchmarking infrastructure requires Python 3.11 or later.", + file=sys.stderr, + ) + sys.exit(1) + + main() diff --git a/bench_runner/util.py b/bench_runner/util.py index 59cbc867..63845c10 100644 --- a/bench_runner/util.py +++ b/bench_runner/util.py @@ -1,9 +1,11 @@ import functools -import hashlib import itertools import os from pathlib import Path -from typing import TypeAlias, Union +import shutil +import subprocess +import sys +from typing import Literal, TypeAlias, Union from . import config @@ -12,13 +14,6 @@ PathLike: TypeAlias = Union[str, os.PathLike] -def get_benchmark_hash() -> str: - hash = hashlib.sha256() - hash.update(os.environ["PYPERFORMANCE_HASH"].encode("ascii")[:7]) - hash.update(os.environ["PYSTON_BENCHMARKS_HASH"].encode("ascii")[:7]) - return hash.hexdigest()[:6] - - TYPE_TO_ICON = { "table": "📄", "time plot": "📈", @@ -55,3 +50,38 @@ def has_any_element(iterable): return True # If successful, the generator is not empty except StopIteration: return False # If StopIteration is raised, the generator is empty + + +def safe_which(cmd: str) -> str: + """ + shutil, but raises a RuntimeError if the command is not found. + """ + path = shutil.which(cmd) + if path is None: + raise RuntimeError(f"Command {cmd} not found in PATH") + return path + + +def get_brew_prefix(command: str) -> str: + """ + Get the prefix of the Homebrew installation. + """ + try: + prefix = subprocess.check_output(["brew", "--prefix", command]) + except subprocess.CalledProcessError: + raise RuntimeError(f"Unable to find brew installation prefix for {command}") + return prefix.decode("utf-8").strip() + + +@functools.cache +def get_simple_platform() -> Literal["linux", "macos", "windows"]: + """ + Return a basic platform name: linux, macos or windows. + """ + if sys.platform.startswith("linux"): + return "linux" + elif sys.platform == "darwin": + return "macos" + elif sys.platform.startswith("win"): + return "windows" + raise RuntimeError(f"Unsupported platform {sys.platform}.") diff --git a/pyproject.toml b/pyproject.toml index cbe9ff70..fe033f2c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ authors = [ ] description = "Faster CPython's benchmarking runner utilities" readme = "README.md" -requires-python = ">=3.10" +requires-python = ">=3.11" license = {text = "BSD-3-Clause"} classifiers = [ "Programming Language :: Python :: 3", @@ -21,7 +21,7 @@ dependencies = [ "rich-argparse==1.7.0", "ruamel.yaml==0.18.10", "scour==0.38.2", - "tomli==2.0.1; python_version < '3.11'", + "tomli==2.0.1", "wheel", ] dynamic = ["version"] diff --git a/tests/test_run_benchmarks.py b/tests/test_run_benchmarks.py index 9d24ede3..99a14106 100644 --- a/tests/test_run_benchmarks.py +++ b/tests/test_run_benchmarks.py @@ -9,11 +9,11 @@ import pytest +from bench_runner import benchmark_definitions from bench_runner import git from bench_runner.scripts import generate_results from bench_runner.scripts import run_benchmarks -from bench_runner.scripts import should_run -from bench_runner import util +from bench_runner.scripts import workflow DATA_PATH = Path(__file__).parent / "data" @@ -32,8 +32,16 @@ def dummy(*args, **kwargs): monkeypatch.setattr(git, "get_git_merge_base", dummy) +def hardcode_benchmark_hash(monkeypatch): + def dummy(*args, **kwargs): + return "215d35" + + monkeypatch.setattr(benchmark_definitions, "get_benchmark_hash", dummy) + + def test_update_metadata(benchmarks_checkout, monkeypatch): dont_get_git_merge_base(monkeypatch) + hardcode_benchmark_hash(monkeypatch) shutil.copy( DATA_PATH @@ -67,7 +75,9 @@ def test_update_metadata(benchmarks_checkout, monkeypatch): ) -def test_run_benchmarks(benchmarks_checkout): +def test_run_benchmarks(benchmarks_checkout, monkeypatch): + hardcode_benchmark_hash(monkeypatch) + shutil.copyfile( DATA_PATH / "bench_runner.toml", benchmarks_checkout / "bench_runner.toml" ) @@ -148,88 +158,88 @@ def test_run_benchmarks(benchmarks_checkout): assert returncode == 1 -def test_should_run_exists_noforce(benchmarks_checkout, capsys, monkeypatch): +def test_should_run_exists_noforce(benchmarks_checkout, monkeypatch): + hardcode_benchmark_hash(monkeypatch) repo = _copy_repo(benchmarks_checkout) monkeypatch.chdir(repo) - should_run._main( + result = workflow.should_run( False, "python", "main", "linux-x86_64-linux", False, - ",,", + [], benchmarks_checkout / "cpython", repo / "results", ) - captured = capsys.readouterr() - assert captured.out.strip() == "should_run=false" + assert result is False assert (repo / "results" / "bm-20220323-3.10.4-9d38120").is_dir() -def test_should_run_diff_machine_noforce(benchmarks_checkout, capsys, monkeypatch): +def test_should_run_diff_machine_noforce(benchmarks_checkout, monkeypatch): repo = _copy_repo(benchmarks_checkout) monkeypatch.chdir(repo) - should_run._main( + result = workflow.should_run( False, "python", "main", "darwin-x86_64-darwin", False, - ",,", + [], benchmarks_checkout / "cpython", repo / "results", ) - captured = capsys.readouterr() - assert captured.out.strip() == "should_run=true" + assert result is True assert len(list((repo / "results" / "bm-20220323-3.10.4-9d38120").iterdir())) == 1 -def test_should_run_all_noforce(benchmarks_checkout, capsys, monkeypatch): +def test_should_run_all_noforce(benchmarks_checkout, monkeypatch): repo = _copy_repo(benchmarks_checkout) monkeypatch.chdir(repo) - should_run._main( + result = workflow.should_run( False, "python", "main", "all", False, - ",,", + [], benchmarks_checkout / "cpython", repo / "results", ) - captured = capsys.readouterr() - assert captured.out.strip() == "should_run=true" + assert result is True assert len(list((repo / "results" / "bm-20220323-3.10.4-9d38120").iterdir())) == 1 -def test_should_run_noexists_noforce(benchmarks_checkout, capsys, monkeypatch): +def test_should_run_noexists_noforce(benchmarks_checkout, monkeypatch): + hardcode_benchmark_hash(monkeypatch) repo = _copy_repo(benchmarks_checkout) monkeypatch.chdir(repo) shutil.rmtree(repo / "results" / "bm-20220323-3.10.4-9d38120") - should_run._main( + result = workflow.should_run( False, "python", "main", "linux-x86_64-linux", False, - ",,", + [], benchmarks_checkout / "cpython", repo / "results", ) - captured = capsys.readouterr() - assert captured.out.strip() == "should_run=true" + assert result is True assert not (repo / "results" / "bm-20220323-3.10.4-9d38120").is_dir() -def test_should_run_exists_force(benchmarks_checkout, capsys, monkeypatch): +def test_should_run_exists_force(benchmarks_checkout, monkeypatch): + hardcode_benchmark_hash(monkeypatch) + repo = _copy_repo(benchmarks_checkout) monkeypatch.chdir(repo) @@ -242,19 +252,18 @@ def remove(repo, path): monkeypatch.setattr(git, "remove", remove) generate_results._main(repo, force=False, bases=["3.11.0b3"]) - should_run._main( + result = workflow.should_run( True, "python", "main", "linux-x86_64-linux", False, - ",,", + [], benchmarks_checkout / "cpython", repo / "results", ) - captured = capsys.readouterr() - assert captured.out.splitlines()[-1].strip() == "should_run=true" + assert result is True assert (repo / "results" / "bm-20220323-3.10.4-9d38120").is_dir() assert set(x.name for x in removed_paths) == { "bm-20220323-linux-x86_64-python-main-3.10.4-9d38120-vs-3.11.0b3.svg", @@ -263,24 +272,24 @@ def remove(repo, path): } -def test_should_run_noexists_force(benchmarks_checkout, capsys, monkeypatch): +def test_should_run_noexists_force(benchmarks_checkout, monkeypatch): + hardcode_benchmark_hash(monkeypatch) repo = _copy_repo(benchmarks_checkout) monkeypatch.chdir(repo) shutil.rmtree(repo / "results" / "bm-20220323-3.10.4-9d38120") - should_run._main( + result = workflow.should_run( True, "python", "main", "linux-x86_64-linux", False, - ",,", + [], benchmarks_checkout / "cpython", repo / "results", ) - captured = capsys.readouterr() - assert captured.out.strip() == "should_run=true" + assert result is True assert not (repo / "results" / "bm-20220323-3.10.4-9d38120").is_dir() @@ -292,13 +301,13 @@ def test_should_run_checkout_failed(tmp_path, capsys, monkeypatch): subprocess.check_call(["git", "init"], cwd=cpython_path) with pytest.raises(SystemExit): - should_run._main( + workflow.should_run( True, "python", "main", "linux-x86_64-linux", False, - ",,", + [], cpython_path, repo / "results", ) @@ -350,4 +359,4 @@ def test_run_benchmarks_flags(benchmarks_checkout): def test_get_benchmark_hash(): - assert util.get_benchmark_hash() == "215d35" + assert benchmark_definitions.get_benchmark_hash() == "dcfded"