diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..1ec4fb0f
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,14 @@
+## Unreleased
+
+## v2.0.0
+
+Most of the work has moved from GitHub Actions `.yml` files to Python code in `workflow.py`.
+In the future, this will allow supporting more workflow engines beyond just GitHub Actions.
+
+**Migration note**: After running `python -m bench_runner install` to update your local files, but sure to add the new `workflow_bootstrap.py` file to your git repository.
+
+### New configuration
+
+Runners have a new configuration `use_cores` to control the number of CPU cores
+used to build CPython. By default, this will use all available cores, but some
+Cloud VMs require using fewer.
diff --git a/README.md b/README.md
index 416fc158..816f3f27 100644
--- a/README.md
+++ b/README.md
@@ -93,6 +93,12 @@ If you don't want a machine to be included when the user selects "machine == 'al
 include_in_all = false
 ```
 
+You may limit the number of cores used to build Python with the `use_cores` option. This may be necessary, for example, on cloud VMs.
+
+```
+use_cores = 2
+```
+
 ### Try a benchmarking run
 
 There are instructions for running a benchmarking action already in the `README.md` of your repo. Look there and give it a try!
diff --git a/bench_runner/__main__.py b/bench_runner/__main__.py
index 8465fbb5..b843b9d3 100644
--- a/bench_runner/__main__.py
+++ b/bench_runner/__main__.py
@@ -14,13 +14,13 @@
         "Get the merge base of the selected commit, and determine if it should run"
     ),
     "install": "Install the workflow files into a results repository",
+    "notify": "Send a notification about the completion of the workflow",
     "profiling_plot": "Generate the profiling plots from raw data",
     "purge": "Purge old results from a results repository",
     "remove_benchmark": "Remove specific benchmarks from the data set",
     "run_benchmarks": "Run benchmarks (in timing, pyperf or perf modes)",
-    "should_run": "Determine whether we need to rerun results for the current commit",
     "synthesize_loops_file": "Create a loops file from multiple benchmark results",
-    "notify": "Send a notification about the completion of the workflow",
+    "workflow": "Run the full compile/benchmark workflow",
 }
 
 if __name__ == "__main__":
diff --git a/bench_runner/benchmark_definitions.py b/bench_runner/benchmark_definitions.py
new file mode 100644
index 00000000..5ea214b4
--- /dev/null
+++ b/bench_runner/benchmark_definitions.py
@@ -0,0 +1,41 @@
+from __future__ import annotations
+
+
+import dataclasses
+import hashlib
+from pathlib import Path
+
+
+from . import git
+
+
+@dataclasses.dataclass
+class BenchmarkRepo:
+    hash: str
+    url: str
+    dirname: str
+
+
+BENCHMARK_REPOS = [
+    BenchmarkRepo(
+        "56d12a8fd7cc1432835965d374929bfa7f6f7a07",
+        "https://github.com/python/pyperformance.git",
+        "pyperformance",
+    ),
+    BenchmarkRepo(
+        "265655e7f03ace13ec1e00e1ba299179e69f8a00",
+        "https://github.com/pyston/python-macrobenchmarks.git",
+        "pyston-benchmarks",
+    ),
+]
+
+
+def get_benchmark_hash() -> str:
+    hash = hashlib.sha256()
+    for repo in BENCHMARK_REPOS:
+        if Path(repo.dirname).is_dir():
+            current_hash = git.get_git_hash(Path(repo.dirname))
+        else:
+            current_hash = repo.hash
+        hash.update(current_hash.encode("ascii")[:7])
+    return hash.hexdigest()[:6]
diff --git a/bench_runner/config.py b/bench_runner/config.py
index 1ebd6b2d..2fffc7ae 100644
--- a/bench_runner/config.py
+++ b/bench_runner/config.py
@@ -4,6 +4,7 @@
 
 import functools
 from pathlib import Path
+from typing import Any
 
 try:
     import tomllib
@@ -11,9 +12,21 @@
     import tomli as tomllib  # type: ignore
 
 
+from . import runners
+
+
 @functools.cache
 def get_bench_runner_config(
     filepath: Path | str = Path("bench_runner.toml"),
 ):
     with Path(filepath).open("rb") as fd:
         return tomllib.load(fd)
+
+
+def get_config_for_current_runner() -> dict[str, Any]:
+    config = get_bench_runner_config()
+    runner = runners.get_runner_for_hostname()
+    all_runners = config.get("runners", [])
+    if len(all_runners) >= 1:
+        return all_runners[0].get(runner.nickname, {})
+    return {}
diff --git a/bench_runner/git.py b/bench_runner/git.py
index e6209e59..7f8120ed 100644
--- a/bench_runner/git.py
+++ b/bench_runner/git.py
@@ -2,9 +2,12 @@
 from __future__ import annotations
 
 
+import contextlib
 import datetime
 from pathlib import Path
+import shutil
 import subprocess
+import re
 
 
 import rich
@@ -128,3 +131,40 @@ def get_commits_between(dirname: PathLike, ref1: str, ref2: str) -> list[str]:
 def bisect_commits(dirname: PathLike, ref1: str, ref2: str) -> str:
     commits = get_commits_between(dirname, ref1, ref2)
     return commits[len(commits) // 2]
+
+
+def clone(
+    dirname: PathLike,
+    url: str,
+    *,
+    branch: str | None = None,
+    depth: int = 1,
+) -> None:
+    is_hash = re.match(r"^[0-9a-f]{40}$", branch) if branch else False
+
+    dirname = Path(dirname)
+    if dirname.is_dir():
+        if is_hash and (dirname / ".git").is_dir() and get_git_hash(dirname) == branch:
+            # This is a git repo, and the hash matches
+            return
+        shutil.rmtree(dirname)
+
+    # Fetching a hash and fetching a branch require different approaches
+
+    if is_hash:
+        assert branch is not None
+        dirname.mkdir()
+        with contextlib.chdir(dirname):
+            subprocess.check_call(["git", "init"])
+            subprocess.check_call(["git", "remote", "add", "origin", url])
+            subprocess.check_call(
+                ["git", "fetch", "--depth", str(depth), "origin", branch]
+            )
+            subprocess.check_call(["git", "checkout", branch])
+    else:
+        args = ["git", "clone", url, str(dirname)]
+        if branch is not None:
+            args += ["--branch", branch]
+        if depth is not None:
+            args += ["--depth", str(depth)]
+        subprocess.check_call(args)
diff --git a/bench_runner/result.py b/bench_runner/result.py
index 2a47b759..9b3274a0 100644
--- a/bench_runner/result.py
+++ b/bench_runner/result.py
@@ -9,7 +9,6 @@
 from operator import itemgetter
 from pathlib import Path
 import re
-import socket
 import subprocess
 import sys
 from typing import Any, Callable, Iterable, Sequence
@@ -524,7 +523,7 @@ def from_scratch(
         flags: Iterable[str] | None = None,
     ) -> "Result":
         result = cls(
-            _clean(runners.get_nickname_for_hostname(socket.gethostname())),
+            _clean(runners.get_nickname_for_hostname()),
             _clean(_get_architecture(python)),
             _clean_for_url(fork),
             _clean(ref[:20]),
diff --git a/bench_runner/runners.py b/bench_runner/runners.py
index dc96b4b3..f8b4e5eb 100644
--- a/bench_runner/runners.py
+++ b/bench_runner/runners.py
@@ -3,6 +3,7 @@
 
 import functools
 import os
+import socket
 
 
 from . import config
@@ -80,13 +81,19 @@ def get_runners_by_nickname() -> dict[str, Runner]:
     return {x.nickname: x for x in get_runners()}
 
 
-def get_nickname_for_hostname(hostname: str) -> str:
+def get_nickname_for_hostname(hostname: str | None = None) -> str:
     # The envvar BENCHMARK_MACHINE_NICKNAME is used to override the machine that
     # results are reported for.
     if "BENCHMARK_MACHINE_NICKNAME" in os.environ:
         return os.environ["BENCHMARK_MACHINE_NICKNAME"]
-    return get_runners_by_hostname().get(hostname, unknown_runner).nickname
+    return get_runner_for_hostname(hostname).nickname
 
 
 def get_runner_by_nickname(nickname: str) -> Runner:
     return get_runners_by_nickname().get(nickname, unknown_runner)
+
+
+def get_runner_for_hostname(hostname: str | None = None) -> Runner:
+    if hostname is None:
+        hostname = socket.gethostname()
+    return get_runners_by_hostname().get(hostname, unknown_runner)
diff --git a/bench_runner/scripts/generate_results.py b/bench_runner/scripts/generate_results.py
index f8f55361..cfb101b3 100644
--- a/bench_runner/scripts/generate_results.py
+++ b/bench_runner/scripts/generate_results.py
@@ -126,7 +126,11 @@ def sort_runner_names(runner_names: Iterable[str]) -> list[str]:
     def sorter(val):
         if val is None:
             return ()
-        return order.index(val.split()[0]), val
+        try:
+            idx = order.index(val.split()[0])
+        except ValueError:
+            idx = -1
+        return idx, val
 
     return sorted(runner_names, key=sorter)
 
diff --git a/bench_runner/scripts/get_merge_base.py b/bench_runner/scripts/get_merge_base.py
index 2ea7cfd6..4209d4bd 100644
--- a/bench_runner/scripts/get_merge_base.py
+++ b/bench_runner/scripts/get_merge_base.py
@@ -6,10 +6,10 @@
 import rich_argparse
 
 
+from bench_runner import benchmark_definitions
 from bench_runner import flags as mflags
 from bench_runner import git
 from bench_runner.result import has_result
-from bench_runner import util
 from bench_runner.util import PathLike
 
 
@@ -55,7 +55,7 @@ def _main(
                     machine,
                     pystats,
                     flags,
-                    util.get_benchmark_hash(),
+                    benchmark_definitions.get_benchmark_hash(),
                     progress=False,
                 )
                 is None
diff --git a/bench_runner/scripts/install.py b/bench_runner/scripts/install.py
index ec80b305..9275338e 100644
--- a/bench_runner/scripts/install.py
+++ b/bench_runner/scripts/install.py
@@ -241,13 +241,11 @@ def generate_generic(dst: Any) -> Any:
 def _main(check: bool) -> None:
     WORKFLOW_PATH.mkdir(parents=True, exist_ok=True)
 
-    env = load_yaml(TEMPLATE_PATH / "env.yml")
-
     for path in TEMPLATE_PATH.glob("*"):
         if path.name.endswith(".src.yml") or path.name == "env.yml":
             continue
 
-        if not (ROOT_PATH / path.name).is_file():
+        if not (ROOT_PATH / path.name).is_file() or path.suffix == ".py":
             if check:
                 fail_check(ROOT_PATH / path.name)
             else:
@@ -258,7 +256,6 @@ def _main(check: bool) -> None:
         generator = GENERATORS.get(src_path.name, generate_generic)
         src = load_yaml(src_path)
         dst = generator(src)
-        dst = {"env": env, **dst}
         write_yaml(dst_path, dst, check)
 
 
diff --git a/bench_runner/scripts/run_benchmarks.py b/bench_runner/scripts/run_benchmarks.py
index 01a3ad66..fa1249dc 100644
--- a/bench_runner/scripts/run_benchmarks.py
+++ b/bench_runner/scripts/run_benchmarks.py
@@ -18,6 +18,7 @@
 import rich_argparse
 
 
+from bench_runner import benchmark_definitions
 from bench_runner import flags
 from bench_runner import git
 from bench_runner.result import Result
@@ -278,7 +279,7 @@ def update_metadata(
     merge_base = git.get_git_merge_base(cpython)
     if merge_base is not None:
         metadata["commit_merge_base"] = merge_base
-    metadata["benchmark_hash"] = util.get_benchmark_hash()
+    metadata["benchmark_hash"] = benchmark_definitions.get_benchmark_hash()
     if run_id is not None:
         metadata["github_action_url"] = f"{GITHUB_URL}/actions/runs/{run_id}"
     actor = os.environ.get("GITHUB_ACTOR")
diff --git a/bench_runner/scripts/should_run.py b/bench_runner/scripts/should_run.py
deleted file mode 100644
index 3d8f42f4..00000000
--- a/bench_runner/scripts/should_run.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# Determines if this should run.
-# If force is `true`, we always run, otherwise, we only run if we don't have
-# results.
-
-import argparse
-from pathlib import Path
-import subprocess
-import sys
-
-
-import rich_argparse
-
-
-# NOTE: This file should import in Python 3.9 or later so it can at least print
-# the error message that the version of Python is too old.
-
-
-def _main(
-    force: bool,
-    fork: str,
-    ref: str,
-    machine: str,
-    pystats: bool,
-    flag_str: str,
-    cpython: Path = Path("cpython"),
-    results_dir: Path = Path("results"),
-) -> None:
-    if sys.version_info[:2] < (3, 10):
-        print(
-            "The benchmarking infrastructure requires Python 3.10 or later.",
-            file=sys.stderr,
-        )
-        sys.exit(1)
-
-    # Now that we've assert we are Python 3.11 or later, we can import
-    # parts of our library.
-    from bench_runner import flags as mflags
-    from bench_runner import git
-    from bench_runner.result import has_result
-    from bench_runner import util
-
-    flags = mflags.parse_flags(flag_str)
-
-    if "PYTHON_UOPS" in flags and "JIT" in flags:
-        print("Tier 2 interpreter and JIT may not be selected at the same time")
-        sys.exit(1)
-
-    try:
-        commit_hash = git.get_git_hash(cpython)
-    except subprocess.CalledProcessError:
-        # This will fail if the cpython checkout failed for some reason. Print
-        # a nice error message since the one the checkout itself gives is
-        # totally inscrutable.
-        print("The checkout of cpython failed.", file=sys.stderr)
-        print(f"You specified fork {fork!r} and ref {ref!r}.", file=sys.stderr)
-        print("Are you sure you entered the fork and ref correctly?", file=sys.stderr)
-        # Fail the rest of the workflow
-        sys.exit(1)
-
-    found_result = has_result(
-        results_dir,
-        commit_hash,
-        machine,
-        pystats,
-        flags,
-        util.get_benchmark_hash(),
-        progress=False,
-    )
-
-    if force:
-        if found_result is not None:
-            for filepath in found_result.filename.parent.iterdir():
-                if filepath.suffix != ".json":
-                    git.remove(results_dir.parent, filepath)
-        should_run = True
-    else:
-        should_run = (machine in ("__really_all", "all")) or found_result is None
-
-    print(f"should_run={str(should_run).lower()}")
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Do we need to run this commit?",
-        formatter_class=rich_argparse.ArgumentDefaultsRichHelpFormatter,
-    )
-    parser.add_argument(
-        "force",
-        help="If true, force a re-run",
-    )
-    parser.add_argument("fork")
-    parser.add_argument("ref")
-    parser.add_argument("machine")
-    parser.add_argument("pystats")
-    parser.add_argument("flags")
-    args = parser.parse_args()
-
-    _main(
-        args.force != "false",
-        args.fork,
-        args.ref,
-        args.machine,
-        args.pystats != "false",
-        args.flags,
-    )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/bench_runner/scripts/workflow.py b/bench_runner/scripts/workflow.py
new file mode 100644
index 00000000..1364696a
--- /dev/null
+++ b/bench_runner/scripts/workflow.py
@@ -0,0 +1,394 @@
+from __future__ import annotations
+
+
+import argparse
+import contextlib
+import os
+from pathlib import Path
+import shlex
+import shutil
+import subprocess
+import sys
+
+
+import rich_argparse
+
+
+from bench_runner import benchmark_definitions
+from bench_runner import config
+from bench_runner import flags as mflags
+from bench_runner import git
+from bench_runner.result import has_result
+from bench_runner import util
+from bench_runner.util import PathLike
+
+
+from bench_runner.scripts import run_benchmarks as mrun_benchmarks
+
+
+def get_windows_build_dir(force_32bit: bool) -> Path:
+    if force_32bit:
+        return Path("PCbuild") / "win32"
+    return Path("PCbuild") / "amd64"
+
+
+def get_exe_path(cpython: Path, flags: list[str], force_32bit: bool) -> Path:
+    match util.get_simple_platform():
+        case "linux":
+            return cpython / "python"
+        case "macos":
+            return cpython / "python.exe"
+        case "windows":
+            build_dir = cpython / get_windows_build_dir(force_32bit)
+            if "NOGIL" in flags:
+                exe = next(build_dir.glob("python3.*.exe"))
+            else:
+                exe = build_dir / "python.exe"
+            return exe
+
+
+def run_in_venv(
+    venv: PathLike, module: str, cmd: list[str], sudo: bool = False
+) -> None:
+    venv = Path(venv)
+
+    if util.get_simple_platform() == "windows":
+        exe = venv / "Scripts" / "python.exe"
+    else:
+        exe = venv / "bin" / "python"
+
+    args = [
+        str(exe),
+        "-m",
+        module,
+        *cmd,
+    ]
+
+    if sudo:
+        ld_library_path = os.environ.get("LD_LIBRARY_PATH", "")
+        args = ["sudo", f"LD_LIBRARY_PATH={ld_library_path}"] + args
+
+    print("Running command:", " ".join(args))
+    subprocess.check_call(args)
+
+
+def should_run(
+    force: bool,
+    fork: str,
+    ref: str,
+    machine: str,
+    pystats: bool,
+    flags: list[str],
+    cpython: Path = Path("cpython"),
+    results_dir: Path = Path("results"),
+) -> bool:
+    try:
+        commit_hash = git.get_git_hash(cpython)
+    except subprocess.CalledProcessError:
+        # This will fail if the cpython checkout failed for some reason. Print
+        # a nice error message since the one the checkout itself gives is
+        # totally inscrutable.
+        print("The checkout of cpython failed.", file=sys.stderr)
+        print(f"You specified fork {fork!r} and ref {ref!r}.", file=sys.stderr)
+        print("Are you sure you entered the fork and ref correctly?", file=sys.stderr)
+        # Fail the rest of the workflow
+        sys.exit(1)
+
+    found_result = has_result(
+        results_dir,
+        commit_hash,
+        machine,
+        pystats,
+        flags,
+        benchmark_definitions.get_benchmark_hash(),
+        progress=False,
+    )
+
+    if force:
+        if found_result is not None:
+            for filepath in found_result.filename.parent.iterdir():
+                if filepath.suffix != ".json":
+                    git.remove(results_dir.parent, filepath)
+        should_run = True
+    else:
+        should_run = (machine in ("__really_all", "all")) or found_result is None
+
+    return should_run
+
+
+def checkout_cpython(fork: str, ref: str, cpython: PathLike = Path("cpython")):
+    git.clone(cpython, f"https://github.com/{fork}/cpython.git", branch=ref, depth=50)
+
+
+def checkout_benchmarks():
+    for repo in benchmark_definitions.BENCHMARK_REPOS:
+        git.clone(
+            Path(repo.dirname),
+            repo.url,
+            branch=repo.hash,
+            depth=1,
+        )
+
+
+def compile_unix(cpython: PathLike, flags: list[str], pgo: bool, pystats: bool) -> None:
+    cpython = Path(cpython)
+    cfg = config.get_config_for_current_runner()
+
+    env = os.environ.copy()
+    if "CLANG" in flags:
+        match util.get_simple_platform():
+            case "linux":
+                env["CC"] = util.safe_which("clang-19")
+                env["LLVM_AR"] = util.safe_which("llvm-ar-19")
+                env["LLVM_PROFDATA"] = util.safe_which("llvm-profdata-19")
+            case "macos":
+                llvm_prefix = util.get_brew_prefix("llvm")
+                env["PATH"] = f"{llvm_prefix}/bin:{env['PATH']}"
+                env["CC"] = f"{llvm_prefix}/bin/clang"
+                env["LDFLAGS"] = f"-L{llvm_prefix}/lib"
+                env["CFLAGS"] = f"-I{llvm_prefix}/include"
+
+    if util.get_simple_platform() == "macos":
+        openssl_prefix = util.get_brew_prefix("openssl@1.1")
+        env["PKG_CONFIG_PATH"] = f"{openssl_prefix}/lib/pkgconfig"
+
+    args = []
+    if pystats:
+        args.append("--enable-pystats")
+    if pgo:
+        args.extend(["--enable-optimizations", "--with-lto=full"])
+    if "PYTHON_UOPS" in flags:
+        assert "JIT" not in flags
+        args.append("--enable-experimental-jit=interpreter")
+    if "JIT" in flags:
+        assert "PYTHON_UOPS" not in flags
+        args.append("--enable-experimental-jit=yes")
+    if "NOGIL" in flags:
+        args.append("--disable-gil")
+    if "CLANG" in flags:
+        args.append("--with-tail-call-interp")
+    args.append("--enable-option-checking=fatal")
+    if configure_flags := os.environ.get("PYTHON_CONFIGURE_FLAGS"):
+        args.extend(shlex.split(configure_flags))
+
+    make_args = []
+    if cores := cfg.get("use_cores", None):
+        make_args.extend(["-j", str(cores)])
+    else:
+        make_args.extend(["-j"])
+
+    with contextlib.chdir(cpython):
+        subprocess.check_call(["./configure", *args], env=env)
+        subprocess.check_call(["make", *make_args], env=env)
+
+
+def compile_windows(
+    cpython: PathLike, flags: list[str], pgo: bool, force_32bit: bool
+) -> None:
+    cpython = Path(cpython)
+
+    args = ["--%"]  # This is the PowerShell "stop parsing" flag
+    if force_32bit:
+        args.extend(["-p", "win32"])
+    args.extend(["-c", "Release"])
+    if pgo:
+        args.append("--pgo")
+    if "JIT" in flags:
+        args.append("--experimental-jit")
+    if "PYTHON_UOPS" in flags:
+        args.append("--experimental-jit-interpreter")
+    if "NOGIL" in flags:
+        args.append("--disable-gil")
+    if "CLANG" in flags:
+        args.extend(
+            [
+                "--tail-call-interp",
+                '"/p:PlatformToolset=clangcl"',
+                '"/p:LLVMInstallDir=C:\\Program Files\\LLVM"',
+                '"/p:LLVMToolsVersion=19.1.6"',
+            ]
+        )
+
+    with contextlib.chdir(cpython):
+        subprocess.check_call(
+            [
+                "powershell.exe",
+                Path("PCbuild") / "build.bat",
+                *args,
+            ],
+        )
+        shutil.copytree(get_windows_build_dir(force_32bit), "libs", dirs_exist_ok=True)
+
+
+def install_pyperformance(venv: PathLike) -> None:
+    run_in_venv(venv, "pip", ["install", "./pyperformance"])
+
+
+def tune_system(venv: PathLike, perf: bool) -> None:
+    # System tuning is Linux only
+    if util.get_simple_platform() != "linux":
+        return
+
+    args = ["system", perf and "reset" or "tune"]
+    if cpu_affinity := os.environ.get("CPU_AFFINITY"):
+        args.append(f'--affinity="{cpu_affinity}"')
+
+    run_in_venv(venv, "pyperf", args, sudo=True)
+
+    if perf:
+        subprocess.check_call(
+            [
+                "sudo",
+                "bash",
+                "-c",
+                "echo 100000 > /proc/sys/kernel/perf_event_max_sample_rate",
+            ]
+        )
+
+
+def reset_system(venv: PathLike) -> None:
+    # System tuning is Linux only
+    if util.get_simple_platform() != "linux":
+        return
+
+    run_in_venv(
+        venv,
+        "pyperf",
+        ["system", "reset"],
+        sudo=True,
+    )
+
+
+def _main(
+    fork: str,
+    ref: str,
+    machine: str,
+    benchmarks: str,
+    flags: list[str],
+    force: bool,
+    pgo: bool,
+    perf: bool,
+    pystats: bool,
+    force_32bit: bool,
+    run_id: str | None = None,
+):
+    venv = Path("venv")
+    cpython = Path("cpython")
+    platform = util.get_simple_platform()
+
+    if force_32bit and platform != "windows":
+        raise RuntimeError("32-bit builds are only supported on Windows")
+    if perf and platform != "linux":
+        raise RuntimeError("perf profiling is only supported on Linux")
+    if pystats and platform != "linux":
+        raise RuntimeError("Pystats is only supported on Linux")
+
+    checkout_cpython(fork, ref, cpython)
+
+    if not should_run(force, fork, ref, machine, False, flags, cpython=cpython):
+        print("No need to run benchmarks.  Skipping...")
+        return
+
+    checkout_benchmarks()
+
+    match platform:
+        case "linux" | "macos":
+            compile_unix(cpython, flags, pgo, pystats)
+        case "windows":
+            compile_windows(cpython, flags, pgo, force_32bit)
+
+    # Print out the version of Python we built just so we can confirm it's the
+    # right thing in the logs
+    subprocess.check_call([get_exe_path(cpython, flags, force_32bit), "-VV"])
+
+    install_pyperformance(venv)
+    tune_system(venv, perf)
+
+    try:
+        if Path(".debug").exists():
+            shutil.rmtree(".debug")
+
+        pystats_dir = Path("/tmp") / "py_stats"
+        if pystats:
+            shutil.rmtree(pystats_dir, ignore_errors=True)
+            pystats_dir.mkdir(parents=True)
+
+        if perf:
+            mode = "perf"
+        elif pystats:
+            mode = "pystats"
+        else:
+            mode = "benchmark"
+
+        mrun_benchmarks._main(
+            mode,
+            get_exe_path(cpython, flags, force_32bit),
+            fork,
+            ref,
+            benchmarks,
+            flags=flags,
+            run_id=run_id,
+            test_mode=False,
+            individual=pystats,
+        )
+    finally:
+        reset_system(venv)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="""
+        Run the full compile/benchmark workflow.
+        """,
+        formatter_class=rich_argparse.ArgumentDefaultsRichHelpFormatter,
+    )
+    parser.add_argument("fork", help="The fork of CPython")
+    parser.add_argument("ref", help="The git ref in the fork")
+    parser.add_argument(
+        "machine",
+        help="The machine to run the benchmarks on.",
+    )
+    parser.add_argument("benchmarks", help="The benchmarks to run")
+    parser.add_argument("flags", help="Configuration flags")
+    parser.add_argument("--force", action="store_true", help="Force a re-run")
+    parser.add_argument(
+        "--pgo",
+        action="store_true",
+        help="Build with profiling guided optimization",
+    )
+    parser.add_argument(
+        "--perf",
+        action="store_true",
+        help="Collect Linux perf profiling data (Linux only)",
+    )
+    parser.add_argument(
+        "--pystats",
+        action="store_true",
+        help="Enable Pystats (Linux only)",
+    )
+    parser.add_argument(
+        "--32bit",
+        action="store_true",
+        dest="force_32bit",
+        help="Do a 32-bit build (Windows only)",
+    )
+    parser.add_argument("--run_id", default=None, type=str, help="The github run id")
+    args = parser.parse_args()
+
+    _main(
+        args.fork,
+        args.ref,
+        args.machine,
+        args.benchmarks,
+        mflags.parse_flags(args.flags),
+        args.force,
+        args.pgo,
+        args.perf,
+        args.pystats,
+        args.force_32bit,
+        args.run_id,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bench_runner/templates/_benchmark.src.yml b/bench_runner/templates/_benchmark.src.yml
index 5b80eba8..f7178a27 100644
--- a/bench_runner/templates/_benchmark.src.yml
+++ b/bench_runner/templates/_benchmark.src.yml
@@ -71,65 +71,11 @@ jobs:
       - name: git gc
         run: |
           git gc
-      - name: Checkout CPython
-        uses: actions/checkout@v4
-        with:
-          persist-credentials: false
-          repository: ${{ inputs.fork }}/cpython
-          path: cpython
-          ref: ${{ inputs.ref }}
-          fetch-depth: 50
-      - name: Install dependencies from PyPI
-        run: |
-          Remove-Item venv -Recurse -ErrorAction SilentlyContinue
-          py -m venv venv
-          venv\Scripts\python.exe -m pip install --upgrade pip
-          venv\Scripts\python.exe -m pip install -r requirements.txt
-      - name: Should we run?
-        if: ${{ always() }}
-        id: should_run
-        run: |
-          venv\Scripts\python.exe -m bench_runner should_run ${{ inputs.force }} ${{ inputs.fork }} ${{ inputs.ref }} ${{ inputs.machine }} false "${{ env.flags }}" >> $GITHUB_OUTPUT
-      - name: Checkout python-macrobenchmarks
-        uses: actions/checkout@v4
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        with:
-          persist-credentials: false
-          repository: pyston/python-macrobenchmarks
-          path: pyston-benchmarks
-          ref: ${{ env.PYSTON_BENCHMARKS_HASH }}
-      - name: Checkout pyperformance
-        uses: actions/checkout@v4
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        with:
-          persist-credentials: false
-          repository: mdboom/pyperformance
-          path: pyperformance
-          ref: ${{ env.PYPERFORMANCE_HASH }}
-      - name: Build Python
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        # The build.bat script is much easier to use from cmd
-        shell: cmd
-        run: |
-          cd cpython
-          PCbuild\build.bat %BUILD_FLAGS% ${{ (inputs.pgo == true) && '--pgo' || '' }} ${{ inputs.clang == true && '--tail-call-interp' || '' }} ${{ inputs.jit == true && '--experimental-jit' || '' }} ${{ inputs.tier2 == true && '--experimental-jit-interpreter' || '' }} ${{ inputs.nogil == true && '--disable-gil' || '' }} -c Release ${{ inputs.clang == true && '"/p:PlatformToolset=clangcl"' || '' }} ${{ inputs.clang == true && '"/p:LLVMInstallDir=C:\Program Files\LLVM"' || '' }} ${{ inputs.clang == true && '"/p:LLVMToolsVersion=19.1.6"' || '' }}
-      - name: Copy Python to different location
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
+      - name: Building Python and running pyperformance
         run: |
-          # Copy the build products to a place that libraries can find them.
-          cd cpython
-          Copy-Item -Path $env:BUILD_DEST -Destination "libs" -Recurse
-      - name: Install pyperformance
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        run: |
-          venv\Scripts\python.exe -m pip install .\pyperformance
-      - name: Running pyperformance
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        run: |
-          venv\Scripts\python.exe -m bench_runner run_benchmarks benchmark ${{ (inputs.nogil == true && '(get-item cpython/$env:BUILD_DEST/python3.*.exe).FullName' || 'cpython/$env:BUILD_DEST/python.exe') }} ${{ inputs.fork }} ${{ inputs.ref }} ${{ inputs.benchmarks || 'all' }} "${{ env.flags }}" --run_id ${{ github.run_id }}
+          python workflow_bootstrap.py ${{ inputs.fork }} ${{ inputs.ref }} ${{ inputs.machine }} ${{ inputs.benchmarks || 'all' }} "${{ env.flags }}" ${{ inputs.force && '--force' || '' }} ${{ inputs.pgo && '--pgo' || '' }} --run_id ${{ github.run_id }}
       # Pull again, since another job may have committed results in the meantime
       - name: Pull benchmarking
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
         run: |
           # Another benchmarking task may have created results for the same
           # commit while the above was running. This "magic" incantation means
@@ -137,12 +83,10 @@ jobs:
           # just pulled in in that case.
           git pull -s recursive -X ours --autostash --rebase
       - name: Add data to repo
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
         uses: EndBug/add-and-commit@v9
         with:
           add: results
       - name: Upload artifacts
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
         uses: actions/upload-artifact@v4
         with:
           name: benchmark
@@ -161,79 +105,17 @@ jobs:
         run: |
           git gc
       - uses: fregante/setup-git-user@v2
-      - name: Checkout CPython
-        uses: actions/checkout@v4
-        with:
-          persist-credentials: false
-          repository: ${{ inputs.fork }}/cpython
-          path: cpython
-          ref: ${{ inputs.ref }}
-          fetch-depth: 50
-      - name: Install dependencies from PyPI
-        run: |
-          rm -rf venv
-          python -m venv venv
-          venv/bin/python -m pip install --upgrade pip
-          venv/bin/python -m pip install -r requirements.txt
-      - name: Should we run?
-        if: ${{ always() }}
-        id: should_run
-        run: |
-          venv/bin/python -m bench_runner should_run ${{ inputs.force }} ${{ inputs.fork }} ${{ inputs.ref }} ${{ inputs.machine }} false ${{ env.flags }} >> $GITHUB_OUTPUT
-      - name: Checkout python-macrobenchmarks
-        uses: actions/checkout@v4
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        with:
-          persist-credentials: false
-          repository: pyston/python-macrobenchmarks
-          path: pyston-benchmarks
-          ref: ${{ env.PYSTON_BENCHMARKS_HASH }}
-      - name: Checkout pyperformance
-        uses: actions/checkout@v4
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
+      - name: Setup system Python
+        if: ${{ runner.arch == 'X64' }}
+        uses: actions/setup-python@v5
         with:
-          persist-credentials: false
-          repository: mdboom/pyperformance
-          path: pyperformance
-          ref: ${{ env.PYPERFORMANCE_HASH }}
-      - name: Build with clang
-        if: ${{ inputs.clang }}
+          python-version: "3.11"
+      - name: Building Python and running pyperformance
         run: |
-          echo "CC=`which clang-19`" >> $GITHUB_ENV
-          echo "LLVM_AR=`which llvm-ar-19`" >> $GITHUB_ENV
-          echo "LLVM_PROFDATA=`which llvm-profdata-19`" >> $GITHUB_ENV
-      - name: Build Python
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        run: |
-          cd cpython
-          ./configure --enable-option-checking=fatal ${{ inputs.pgo == true && '--enable-optimizations --with-lto=full' || '' }} ${{ inputs.tier2 == true && '--enable-experimental-jit=interpreter' || '' }} ${{ inputs.jit == true && '--enable-experimental-jit=yes' || '' }} ${{ inputs.nogil == true && '--disable-gil' || '' }} ${{ inputs.clang == true && '--with-tail-call-interp' || '' }} ${PYTHON_CONFIGURE_FLAGS:-}
-          make ${{ runner.arch == 'ARM64' && '-j' || '-j4' }}
-          ./python -VV
-      - name: Install pyperformance
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        run: |
-          venv/bin/python -m pip install ./pyperformance
-      - name: Tune system
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        run: |
-          sudo LD_LIBRARY_PATH=$LD_LIBRARY_PATH venv/bin/python -m pyperf system ${{ inputs.perf && 'reset' || 'tune ${CPU_AFFINITY:+--affinity="$CPU_AFFINITY"}' }}
-      - name: Tune for (Linux) perf
-        if: ${{ steps.should_run.outputs.should_run != 'false' && inputs.perf }}
-        run: |
-          # Must match the PERF_PERIOD value in profiling_plot.py
-          sudo bash -c "echo 100000 > /proc/sys/kernel/perf_event_max_sample_rate"
-      - name: Running pyperformance
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        run: |
-          rm -rf ~/.debug/*
-          venv/bin/python -m bench_runner run_benchmarks ${{ inputs.perf && 'perf' || 'benchmark' }} cpython/python ${{ inputs.fork }} ${{ inputs.ref }} ${{ inputs.benchmarks || 'all' }} ${{ env.flags }} --run_id ${{ github.run_id }}
-      - name: Untune system
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        run: |
-          sudo LD_LIBRARY_PATH=$LD_LIBRARY_PATH venv/bin/python -m pyperf system reset
+          python workflow_bootstrap.py ${{ inputs.fork }} ${{ inputs.ref }} ${{ inputs.machine }} ${{ inputs.benchmarks || 'all' }} ${{ env.flags }} ${{ inputs.force && '--force' || '' }} ${{ inputs.pgo && '--pgo' || '' }} ${{ inputs.perf && '--perf' || '' }} --run_id ${{ github.run_id }}
       # Pull again, since another job may have committed results in the meantime
       - name: Pull benchmarking
-        if: ${{ steps.should_run.outputs.should_run != 'false' && !inputs.perf }}
+        if: ${{ !inputs.perf }}
         run: |
           # Another benchmarking task may have created results for the same
           # commit while the above was running. This "magic" incantation means
@@ -241,12 +123,12 @@ jobs:
           # just pulled in in that case.
           git pull -s recursive -X ours --autostash --rebase
       - name: Adding data to repo
-        if: ${{ steps.should_run.outputs.should_run != 'false' && !inputs.perf }}
+        if: ${{ !inputs.perf }}
         uses: EndBug/add-and-commit@v9
         with:
           add: results
       - name: Upload benchmark artifacts
-        if: ${{ steps.should_run.outputs.should_run != 'false' && !inputs.perf }}
+        if: ${{ !inputs.perf }}
         uses: actions/upload-artifact@v4
         with:
           name: benchmark
@@ -254,7 +136,7 @@ jobs:
             benchmark.json
           overwrite: true
       - name: Upload perf artifacts
-        if: ${{ steps.should_run.outputs.should_run != 'false' && inputs.perf }}
+        if: ${{ inputs.perf }}
         uses: actions/upload-artifact@v4
         with:
           name: perf
@@ -270,70 +152,11 @@ jobs:
       - name: git gc
         run: |
           git gc
-      - name: Checkout CPython
-        uses: actions/checkout@v4
-        with:
-          persist-credentials: false
-          repository: ${{ inputs.fork }}/cpython
-          path: cpython
-          ref: ${{ inputs.ref }}
-          fetch-depth: 50
-      - name: Install dependencies from PyPI
-        run: |
-          rm -rf venv
-          python3 -m venv venv
-          venv/bin/python -m pip install --upgrade pip
-          venv/bin/python -m pip install -r requirements.txt
-      - name: Should we run?
-        if: ${{ always() }}
-        id: should_run
-        run: |
-          venv/bin/python -m bench_runner should_run ${{ inputs.force }} ${{ inputs.force }} ${{ inputs.ref }} ${{ inputs.machine }} false ${{ env.flags }} >> $GITHUB_OUTPUT
-      - name: Checkout python-macrobenchmarks
-        uses: actions/checkout@v4
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        with:
-          persist-credentials: false
-          repository: pyston/python-macrobenchmarks
-          path: pyston-benchmarks
-          ref: ${{ env.PYSTON_BENCHMARKS_HASH }}
-      - name: Checkout pyperformance
-        uses: actions/checkout@v4
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        with:
-          persist-credentials: false
-          repository: mdboom/pyperformance
-          path: pyperformance
-          ref: ${{ env.PYPERFORMANCE_HASH }}
-      - name: Setup environment
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        run: |
-          echo "PKG_CONFIG_PATH=$(brew --prefix openssl@1.1)/lib/pkgconfig" >> $GITHUB_ENV
-      - name: Build with clang
-        if: ${{ inputs.clang }}
-        run: |
-          echo "PATH=$(brew --prefix llvm)/bin:$PATH" >> $GITHUB_ENV
-          echo "CC=$(brew --prefix llvm)/bin/clang" >> $GITHUB_ENV
-          echo "LDFLAGS=-L$(brew --prefix llvm)/lib" >> $GITHUB_ENV
-          echo "CFLAGS=-I$(brew --prefix llvm)/include" >> $GITHUB_ENV
-      - name: Build Python
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        run: |
-          cd cpython
-          ./configure --enable-option-checking=fatal ${{ inputs.pgo == true && '--enable-optimizations --with-lto=full' || '' }} ${{ inputs.tier2 == true && '--enable-experimental-jit=interpreter' || '' }} ${{ inputs.jit == true && '--enable-experimental-jit=yes' || '' }} ${{ inputs.nogil == true && '--disable-gil' || '' }} ${{ inputs.clang == true && '--with-tail-call-interp' || '' }} ${PYTHON_CONFIGURE_FLAGS:-}
-          make -j4
-          ./python.exe -VV
-      - name: Install pyperformance
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        run: |
-          venv/bin/python -m pip install ./pyperformance
-      - name: Running pyperformance
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
+      - name: Building Python and running pyperformance
         run: |
-          venv/bin/python -m bench_runner run_benchmarks benchmark cpython/python.exe ${{ inputs.fork }} ${{ inputs.ref }} ${{ inputs.benchmarks || 'all' }} ${{ env.flags }} --run_id ${{ github.run_id }}
+          python3 workflow_bootstrap.py ${{ inputs.fork }} ${{ inputs.ref }} ${{ inputs.machine }} ${{ inputs.benchmarks || 'all' }} ${{ env.flags }} ${{ inputs.force && '--force' || '' }} ${{ inputs.pgo && '--pgo' || '' }} --run_id ${{ github.run_id }}
       # Pull again, since another job may have committed results in the meantime
       - name: Pull benchmarking
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
         run: |
           # Another benchmarking task may have created results for the same
           # commit while the above was running. This "magic" incantation means
@@ -341,12 +164,10 @@ jobs:
           # just pulled in in that case.
           git pull -s recursive -X ours --autostash --rebase
       - name: Add data to repo
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
         uses: EndBug/add-and-commit@v9
         with:
           add: results
       - name: Upload artifacts
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
         uses: actions/upload-artifact@v4
         with:
           name: benchmark
diff --git a/bench_runner/templates/_pystats.src.yml b/bench_runner/templates/_pystats.src.yml
index b5272edb..8ef1b143 100644
--- a/bench_runner/templates/_pystats.src.yml
+++ b/bench_runner/templates/_pystats.src.yml
@@ -18,9 +18,6 @@ name: _pystats
       force:
         description: "Rerun and replace results if commit already exists"
         type: boolean
-      individual:
-        description: "Collect pystats for each individual benchmark"
-        type: boolean
 
   workflow_call:
     inputs:
@@ -39,9 +36,6 @@ name: _pystats
       force:
         description: "Rerun and replace results if commit already exists"
         type: boolean
-      individual:
-        description: "Collect pystats for each individual benchmark"
-        type: boolean
 
 jobs:
   collect-stats:
@@ -56,63 +50,10 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version: "3.11"
-      - name: Checkout CPython
-        uses: actions/checkout@v4
-        with:
-          persist-credentials: false
-          repository: ${{ inputs.fork }}/cpython
-          ref: ${{ inputs.ref }}
-          path: cpython
-          fetch-depth: 50
-      - name: Install dependencies from PyPI
-        run: |
-          rm -rf venv
-          python -m venv venv
-          venv/bin/python -m pip install -r requirements.txt
-      - name: Should we run?
-        if: ${{ always() }}
-        id: should_run
-        run: |
-          venv/bin/python -m bench_runner should_run ${{ inputs.force }} ${{ inputs.fork }} ${{ inputs.ref }} all true ${{ env.flags }} >> $GITHUB_OUTPUT
-      - name: Checkout python-macrobenchmarks
-        uses: actions/checkout@v4
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        with:
-          persist-credentials: false
-          repository: pyston/python-macrobenchmarks
-          path: pyston-benchmarks
-          ref: ${{ env.PYSTON_BENCHMARKS_HASH }}
-      - name: Checkout pyperformance
-        uses: actions/checkout@v4
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        with:
-          persist-credentials: false
-          repository: mdboom/pyperformance
-          path: pyperformance
-          ref: ${{ env.PYPERFORMANCE_HASH }}
-      - name: Create pystats directory
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        run: |
-          # If we don't do this, stats are printed to the console
-          rm -rf /tmp/py_stats
-          mkdir /tmp/py_stats
-      - name: Build Python
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        run: |
-          cd cpython
-          ./configure --enable-option-checking=fatal --enable-pystats --prefix=$PWD/install ${{ inputs.tier2 == true && '--enable-experimental-jit=interpreter' || '' }} ${{ inputs.jit == true && '--enable-experimental-jit=yes' || '' }} ${{ inputs.nogil == true && '--disable-gil' || '' }}
-          make -j4
-          make install
-      - name: Install pyperformance into the system python
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
-        run: |
-          venv/bin/python -m pip install --no-binary :all: ./pyperformance
-      - name: Running pyperformance
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
+      - name: Build CPython and run pyperformance benchmarks
         run: |
-          venv/bin/python -m bench_runner run_benchmarks pystats cpython/python ${{ inputs.fork }} ${{ inputs.ref }} ${{ inputs.benchmarks || 'all' }} ${{ env.flags }} --run_id ${{ github.run_id }} ${{ inputs.individual == true && '--individual' || '' }}
+          python workflow_bootstrap.py --pystats ${{ inputs.fork }} ${{ inputs.ref }} all ${{ inputs.benchmarks || 'all' }} ${{ env.flags }} --run_id ${{ github.run_id }}
       - name: Pull benchmarking
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
         run: |
           # Another benchmarking task may have created results for the same
           # commit while the above was running. This "magic" incantation means
@@ -120,7 +61,6 @@ jobs:
           # just pulled in in that case.
           git pull -s recursive -X ours --autostash --rebase
       - name: Add data to repo
-        if: ${{ steps.should_run.outputs.should_run != 'false' }}
         uses: EndBug/add-and-commit@v9
         with:
           add: results
diff --git a/bench_runner/templates/benchmark.src.yml b/bench_runner/templates/benchmark.src.yml
index f5e90c62..d569da9f 100644
--- a/bench_runner/templates/benchmark.src.yml
+++ b/bench_runner/templates/benchmark.src.yml
@@ -104,7 +104,6 @@ jobs:
       fork: ${{ inputs.fork }}
       ref: ${{ inputs.ref }}
       benchmarks: ${{ inputs.benchmarks }}
-      individual: true
       force: true
     secrets: inherit
 
@@ -116,7 +115,6 @@ jobs:
       fork: python
       ref: ${{ needs.determine_base.outputs.ref }}
       benchmarks: ${{ inputs.benchmarks }}
-      individual: true
       force: false
     secrets: inherit
 
diff --git a/bench_runner/templates/env.yml b/bench_runner/templates/env.yml
deleted file mode 100644
index e7268417..00000000
--- a/bench_runner/templates/env.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-PYPERFORMANCE_HASH: 56d12a8fd7cc1432835965d374929bfa7f6f7a07
-PYSTON_BENCHMARKS_HASH: 265655e7f03ace13ec1e00e1ba299179e69f8a00
diff --git a/bench_runner/templates/workflow_bootstrap.py b/bench_runner/templates/workflow_bootstrap.py
new file mode 100644
index 00000000..f919c233
--- /dev/null
+++ b/bench_runner/templates/workflow_bootstrap.py
@@ -0,0 +1,79 @@
+# This script may only use the standard library, since it bootstraps setting up
+# the virtual environment to run the full bench_runner.
+
+
+# NOTE: This file should import in Python 3.9 or later so it can at least print
+# the error message that the version of Python is too old.
+
+
+from pathlib import Path
+import shutil
+import subprocess
+import sys
+
+
+def create_venv(venv: Path) -> None:
+    if venv.exists():
+        shutil.rmtree(venv)
+
+    subprocess.check_call(
+        [
+            sys.executable,
+            "-m",
+            "venv",
+            str(venv),
+        ]
+    )
+
+
+def run_in_venv(
+    venv: Path, module: str, cmd: list[str], prefix: list[str] = []
+) -> None:
+    venv = Path(venv)
+
+    if sys.platform.startswith("win"):
+        exe = Path("Scripts") / "python.exe"
+    else:
+        exe = Path("bin") / "python"
+
+    args = [
+        *prefix,
+        str(venv / exe),
+        "-m",
+        module,
+        *cmd,
+    ]
+
+    print("Running command:", " ".join(args))
+    subprocess.check_call(args)
+
+
+def install_requirements(venv: Path) -> None:
+    run_in_venv(venv, "pip", ["install", "--upgrade", "pip"])
+    run_in_venv(venv, "pip", ["install", "-r", "requirements.txt"])
+
+
+def main():
+    venv = Path("venv")
+    create_venv(venv)
+    install_requirements(venv)
+
+    # Now that we've installed the full bench_runner library,
+    # continue on in a new process...
+
+    last_arg = sys.argv.index("workflow_bootstrap.py")
+    if last_arg == -1:
+        raise ValueError("Couldn't parse command line")
+
+    run_in_venv(venv, "bench_runner", ["workflow", *sys.argv[last_arg + 1 :]])
+
+
+if __name__ == "__main__":
+    if sys.version_info[:2] < (3, 11):
+        print(
+            "The benchmarking infrastructure requires Python 3.11 or later.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    main()
diff --git a/bench_runner/util.py b/bench_runner/util.py
index 59cbc867..63845c10 100644
--- a/bench_runner/util.py
+++ b/bench_runner/util.py
@@ -1,9 +1,11 @@
 import functools
-import hashlib
 import itertools
 import os
 from pathlib import Path
-from typing import TypeAlias, Union
+import shutil
+import subprocess
+import sys
+from typing import Literal, TypeAlias, Union
 
 
 from . import config
@@ -12,13 +14,6 @@
 PathLike: TypeAlias = Union[str, os.PathLike]
 
 
-def get_benchmark_hash() -> str:
-    hash = hashlib.sha256()
-    hash.update(os.environ["PYPERFORMANCE_HASH"].encode("ascii")[:7])
-    hash.update(os.environ["PYSTON_BENCHMARKS_HASH"].encode("ascii")[:7])
-    return hash.hexdigest()[:6]
-
-
 TYPE_TO_ICON = {
     "table": "📄",
     "time plot": "📈",
@@ -55,3 +50,38 @@ def has_any_element(iterable):
         return True  # If successful, the generator is not empty
     except StopIteration:
         return False  # If StopIteration is raised, the generator is empty
+
+
+def safe_which(cmd: str) -> str:
+    """
+    shutil, but raises a RuntimeError if the command is not found.
+    """
+    path = shutil.which(cmd)
+    if path is None:
+        raise RuntimeError(f"Command {cmd} not found in PATH")
+    return path
+
+
+def get_brew_prefix(command: str) -> str:
+    """
+    Get the prefix of the Homebrew installation.
+    """
+    try:
+        prefix = subprocess.check_output(["brew", "--prefix", command])
+    except subprocess.CalledProcessError:
+        raise RuntimeError(f"Unable to find brew installation prefix for {command}")
+    return prefix.decode("utf-8").strip()
+
+
+@functools.cache
+def get_simple_platform() -> Literal["linux", "macos", "windows"]:
+    """
+    Return a basic platform name: linux, macos or windows.
+    """
+    if sys.platform.startswith("linux"):
+        return "linux"
+    elif sys.platform == "darwin":
+        return "macos"
+    elif sys.platform.startswith("win"):
+        return "windows"
+    raise RuntimeError(f"Unsupported platform {sys.platform}.")
diff --git a/pyproject.toml b/pyproject.toml
index cbe9ff70..fe033f2c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,7 @@ authors = [
 ]
 description = "Faster CPython's benchmarking runner utilities"
 readme = "README.md"
-requires-python = ">=3.10"
+requires-python = ">=3.11"
 license = {text = "BSD-3-Clause"}
 classifiers = [
     "Programming Language :: Python :: 3",
@@ -21,7 +21,7 @@ dependencies = [
     "rich-argparse==1.7.0",
     "ruamel.yaml==0.18.10",
     "scour==0.38.2",
-    "tomli==2.0.1; python_version < '3.11'",
+    "tomli==2.0.1",
     "wheel",
 ]
 dynamic = ["version"]
diff --git a/tests/test_run_benchmarks.py b/tests/test_run_benchmarks.py
index 9d24ede3..99a14106 100644
--- a/tests/test_run_benchmarks.py
+++ b/tests/test_run_benchmarks.py
@@ -9,11 +9,11 @@
 import pytest
 
 
+from bench_runner import benchmark_definitions
 from bench_runner import git
 from bench_runner.scripts import generate_results
 from bench_runner.scripts import run_benchmarks
-from bench_runner.scripts import should_run
-from bench_runner import util
+from bench_runner.scripts import workflow
 
 
 DATA_PATH = Path(__file__).parent / "data"
@@ -32,8 +32,16 @@ def dummy(*args, **kwargs):
     monkeypatch.setattr(git, "get_git_merge_base", dummy)
 
 
+def hardcode_benchmark_hash(monkeypatch):
+    def dummy(*args, **kwargs):
+        return "215d35"
+
+    monkeypatch.setattr(benchmark_definitions, "get_benchmark_hash", dummy)
+
+
 def test_update_metadata(benchmarks_checkout, monkeypatch):
     dont_get_git_merge_base(monkeypatch)
+    hardcode_benchmark_hash(monkeypatch)
 
     shutil.copy(
         DATA_PATH
@@ -67,7 +75,9 @@ def test_update_metadata(benchmarks_checkout, monkeypatch):
     )
 
 
-def test_run_benchmarks(benchmarks_checkout):
+def test_run_benchmarks(benchmarks_checkout, monkeypatch):
+    hardcode_benchmark_hash(monkeypatch)
+
     shutil.copyfile(
         DATA_PATH / "bench_runner.toml", benchmarks_checkout / "bench_runner.toml"
     )
@@ -148,88 +158,88 @@ def test_run_benchmarks(benchmarks_checkout):
     assert returncode == 1
 
 
-def test_should_run_exists_noforce(benchmarks_checkout, capsys, monkeypatch):
+def test_should_run_exists_noforce(benchmarks_checkout, monkeypatch):
+    hardcode_benchmark_hash(monkeypatch)
     repo = _copy_repo(benchmarks_checkout)
     monkeypatch.chdir(repo)
 
-    should_run._main(
+    result = workflow.should_run(
         False,
         "python",
         "main",
         "linux-x86_64-linux",
         False,
-        ",,",
+        [],
         benchmarks_checkout / "cpython",
         repo / "results",
     )
 
-    captured = capsys.readouterr()
-    assert captured.out.strip() == "should_run=false"
+    assert result is False
     assert (repo / "results" / "bm-20220323-3.10.4-9d38120").is_dir()
 
 
-def test_should_run_diff_machine_noforce(benchmarks_checkout, capsys, monkeypatch):
+def test_should_run_diff_machine_noforce(benchmarks_checkout, monkeypatch):
     repo = _copy_repo(benchmarks_checkout)
     monkeypatch.chdir(repo)
 
-    should_run._main(
+    result = workflow.should_run(
         False,
         "python",
         "main",
         "darwin-x86_64-darwin",
         False,
-        ",,",
+        [],
         benchmarks_checkout / "cpython",
         repo / "results",
     )
 
-    captured = capsys.readouterr()
-    assert captured.out.strip() == "should_run=true"
+    assert result is True
     assert len(list((repo / "results" / "bm-20220323-3.10.4-9d38120").iterdir())) == 1
 
 
-def test_should_run_all_noforce(benchmarks_checkout, capsys, monkeypatch):
+def test_should_run_all_noforce(benchmarks_checkout, monkeypatch):
     repo = _copy_repo(benchmarks_checkout)
     monkeypatch.chdir(repo)
 
-    should_run._main(
+    result = workflow.should_run(
         False,
         "python",
         "main",
         "all",
         False,
-        ",,",
+        [],
         benchmarks_checkout / "cpython",
         repo / "results",
     )
 
-    captured = capsys.readouterr()
-    assert captured.out.strip() == "should_run=true"
+    assert result is True
     assert len(list((repo / "results" / "bm-20220323-3.10.4-9d38120").iterdir())) == 1
 
 
-def test_should_run_noexists_noforce(benchmarks_checkout, capsys, monkeypatch):
+def test_should_run_noexists_noforce(benchmarks_checkout, monkeypatch):
+    hardcode_benchmark_hash(monkeypatch)
     repo = _copy_repo(benchmarks_checkout)
     monkeypatch.chdir(repo)
     shutil.rmtree(repo / "results" / "bm-20220323-3.10.4-9d38120")
 
-    should_run._main(
+    result = workflow.should_run(
         False,
         "python",
         "main",
         "linux-x86_64-linux",
         False,
-        ",,",
+        [],
         benchmarks_checkout / "cpython",
         repo / "results",
     )
 
-    captured = capsys.readouterr()
-    assert captured.out.strip() == "should_run=true"
+    assert result is True
     assert not (repo / "results" / "bm-20220323-3.10.4-9d38120").is_dir()
 
 
-def test_should_run_exists_force(benchmarks_checkout, capsys, monkeypatch):
+def test_should_run_exists_force(benchmarks_checkout, monkeypatch):
+    hardcode_benchmark_hash(monkeypatch)
+
     repo = _copy_repo(benchmarks_checkout)
     monkeypatch.chdir(repo)
 
@@ -242,19 +252,18 @@ def remove(repo, path):
     monkeypatch.setattr(git, "remove", remove)
 
     generate_results._main(repo, force=False, bases=["3.11.0b3"])
-    should_run._main(
+    result = workflow.should_run(
         True,
         "python",
         "main",
         "linux-x86_64-linux",
         False,
-        ",,",
+        [],
         benchmarks_checkout / "cpython",
         repo / "results",
     )
 
-    captured = capsys.readouterr()
-    assert captured.out.splitlines()[-1].strip() == "should_run=true"
+    assert result is True
     assert (repo / "results" / "bm-20220323-3.10.4-9d38120").is_dir()
     assert set(x.name for x in removed_paths) == {
         "bm-20220323-linux-x86_64-python-main-3.10.4-9d38120-vs-3.11.0b3.svg",
@@ -263,24 +272,24 @@ def remove(repo, path):
     }
 
 
-def test_should_run_noexists_force(benchmarks_checkout, capsys, monkeypatch):
+def test_should_run_noexists_force(benchmarks_checkout, monkeypatch):
+    hardcode_benchmark_hash(monkeypatch)
     repo = _copy_repo(benchmarks_checkout)
     monkeypatch.chdir(repo)
     shutil.rmtree(repo / "results" / "bm-20220323-3.10.4-9d38120")
 
-    should_run._main(
+    result = workflow.should_run(
         True,
         "python",
         "main",
         "linux-x86_64-linux",
         False,
-        ",,",
+        [],
         benchmarks_checkout / "cpython",
         repo / "results",
     )
 
-    captured = capsys.readouterr()
-    assert captured.out.strip() == "should_run=true"
+    assert result is True
     assert not (repo / "results" / "bm-20220323-3.10.4-9d38120").is_dir()
 
 
@@ -292,13 +301,13 @@ def test_should_run_checkout_failed(tmp_path, capsys, monkeypatch):
     subprocess.check_call(["git", "init"], cwd=cpython_path)
 
     with pytest.raises(SystemExit):
-        should_run._main(
+        workflow.should_run(
             True,
             "python",
             "main",
             "linux-x86_64-linux",
             False,
-            ",,",
+            [],
             cpython_path,
             repo / "results",
         )
@@ -350,4 +359,4 @@ def test_run_benchmarks_flags(benchmarks_checkout):
 
 
 def test_get_benchmark_hash():
-    assert util.get_benchmark_hash() == "215d35"
+    assert benchmark_definitions.get_benchmark_hash() == "dcfded"