From 68b22b287bc278446a842164001dc3b5ee8231b0 Mon Sep 17 00:00:00 2001 From: Dale Roberts Date: Wed, 25 Mar 2026 10:03:42 +1100 Subject: [PATCH 1/6] Separate changes to firedrake-configure and CI from dsroberts/offload-pc --- .github/actionlint.yaml | 4 + .github/workflows/core.yml | 137 +++++++++++++++++++++ .github/workflows/pr.yml | 2 + scripts/firedrake-configure | 234 ++++++++++++++++++++++++++++-------- tests/firedrake/conftest.py | 10 +- 5 files changed, 336 insertions(+), 51 deletions(-) create mode 100644 .github/actionlint.yaml diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml new file mode 100644 index 0000000000..7196b121ca --- /dev/null +++ b/.github/actionlint.yaml @@ -0,0 +1,4 @@ +self-hosted-runner: + labels: + # Custom label for GPU-enabled self-hosted runners + - gpu \ No newline at end of file diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index e738561b9c..85102478fc 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -23,6 +23,10 @@ on: description: Whether to test using macOS type: boolean default: false + test_gpu: + description: Whether to test using CUDA-enabled PETSc + type: boolean + default: false deploy_website: description: Whether to deploy the website type: boolean @@ -54,6 +58,10 @@ on: description: Whether to test using macOS type: boolean default: false + test_gpu: + description: Whether to test using CUDA-enabled PETSc + type: boolean + default: false deploy_website: description: Whether to deploy the website type: boolean @@ -465,6 +473,135 @@ jobs: run: | find . -delete + test_gpu: + name: Build and test Firedrake (Linux CUDA) + runs-on: [self-hosted, Linux, gpu] + container: + image: ubuntu:latest + options: --gpus all + if: inputs.test_gpu + env: + OMPI_ALLOW_RUN_AS_ROOT: 1 + OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1 + OMP_NUM_THREADS: 1 + OPENBLAS_NUM_THREADS: 1 + FIREDRAKE_CI: 1 + PYOP2_SPMD_STRICT: 1 + # Disable fast math as it exposes compiler bugs + PYOP2_CFLAGS: -fno-fast-math + # NOTE: One should occasionally update test_durations.json by running + # 'make test_durations' inside a 'firedrake:latest' Docker image. + EXTRA_PYTEST_ARGS: --splitting-algorithm least_duration --timeout=600 --timeout-method=thread -o faulthandler_timeout=660 --durations-path=./firedrake-repo/tests/test_durations.json --durations=50 + PYTEST_MPI_MAX_NPROCS: 8 + PETSC_OPTIONS: -use_gpu_aware_mpi 0 + EXTRA_OPTIONS: -use_gpu_aware_mpi 0 + steps: + - name: Confirm Nvidia GPUs are enabled + # The presence of the nvidia-smi command indicates that the Nvidia drivers have + # successfully been imported into the container, there is no point continuing + # if nvidia-smi is not present + run: nvidia-smi + + - name: Fix HOME + # For unknown reasons GitHub actions overwrite HOME to /github/home + # which will break everything unless fixed + # (https://github.com/actions/runner/issues/863) + run: echo "HOME=/root" >> "$GITHUB_ENV" + + + # Git is needed for actions/checkout and Python for firedrake-configure + # curl needed for adding new deb repositories to ubuntu + - name: Install system dependencies (1) + run: | + apt-get update + apt-get -y install git python3 curl + + + - name: Pre-run cleanup + # Make sure the current directory is empty + run: find . -delete + + - uses: actions/checkout@v5 + with: + path: firedrake-repo + ref: ${{ inputs.source_ref }} + + - name: Add Nvidia CUDA deb repositories + run: | + deburl=$( python3 ./firedrake-repo/scripts/firedrake-configure --show-extra-repo-pkg-url --gpu-arch cuda ) + debfile=$( basename "${deburl}" ) + curl -fsSLO "${deburl}" + dpkg -i "${debfile}" + apt-get update + + - name: Install system dependencies (2) + run: | + apt-get -y install \ + $(python3 ./firedrake-repo/scripts/firedrake-configure --arch default --gpu-arch cuda --show-system-packages) + apt-get -y install python3-venv + : # Dependencies needed to run the test suite + apt-get -y install fonts-dejavu graphviz graphviz-dev parallel poppler-utils + + - name: Install PETSc + run: | + if [ ${{ inputs.target_branch }} = 'release' ]; then + git clone --depth 1 \ + --branch $(python3 ./firedrake-repo/scripts/firedrake-configure --gpu-arch cuda --show-petsc-version) \ + https://gitlab.com/petsc/petsc.git + else + git clone --depth 1 https://gitlab.com/petsc/petsc.git + fi + cd petsc + python3 ../firedrake-repo/scripts/firedrake-configure \ + --arch default --gpu-arch cuda --show-petsc-configure-options | \ + xargs -L1 ./configure --with-make-np=4 + make + make check + { + echo "PETSC_DIR=/__w/firedrake/firedrake/petsc" + echo "PETSC_ARCH=arch-firedrake-default-cuda" + echo "SLEPC_DIR=/__w/firedrake/firedrake/petsc/arch-firedrake-default-cuda" + } >> "$GITHUB_ENV" + + - name: Install Firedrake + id: install + run: | + export $(python3 ./firedrake-repo/scripts/firedrake-configure --arch default --gpu-arch cuda --show-env) + python3 -m venv venv + . venv/bin/activate + + : # Empty the pip cache to ensure that everything is compiled from scratch + pip cache purge + + if [ ${{ inputs.target_branch }} = 'release' ]; then + EXTRA_PIP_FLAGS='' + else + : # Install build dependencies + pip install "$PETSC_DIR"/src/binding/petsc4py + pip install -r ./firedrake-repo/requirements-build.txt + + : # We have to pass '--no-build-isolation' to use a custom petsc4py + EXTRA_PIP_FLAGS='--no-build-isolation' + fi + + pip install --verbose $EXTRA_PIP_FLAGS \ + --no-binary h5py \ + './firedrake-repo[check]' + + firedrake-clean + pip list + + - name: Run smoke tests + run: | + . venv/bin/activate + firedrake-check + timeout-minutes: 10 + + - name: Post-run cleanup + if: always() + run: | + find . -delete + lint: name: Lint codebase runs-on: ubuntu-latest diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index a1430b57e7..6b63f97cae 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -12,4 +12,6 @@ jobs: target_branch: ${{ github.base_ref }} # Only run macOS tests if the PR is labelled 'macOS' test_macos: ${{ contains(github.event.pull_request.labels.*.name, 'macOS') }} + # Only run GPU tests if the PR is labelled 'gpu' + test_gpu: ${{ contains(github.event.pull_request.labels.*.name, 'gpu') }} secrets: inherit diff --git a/scripts/firedrake-configure b/scripts/firedrake-configure index 0c1030808d..53d0e2f307 100755 --- a/scripts/firedrake-configure +++ b/scripts/firedrake-configure @@ -30,6 +30,15 @@ LINUX_APT_AARCH64 = PackageManager.LINUX_APT_AARCH64 MACOS_HOMEBREW_ARM64 = PackageManager.MACOS_HOMEBREW_ARM64 +class GPUArch(enum.Enum): + NO_GPU = "none" + CUDA = "cuda" + + +NO_GPU = GPUArch.NO_GPU +CUDA = GPUArch.CUDA + + class FiredrakeArch(enum.Enum): DEFAULT = "default" COMPLEX = "complex" @@ -40,6 +49,20 @@ ARCH_COMPLEX = FiredrakeArch.COMPLEX SUPPORTED_PETSC_VERSION = "v3.24.5" +# SuperLU_DIST built via PETSc does not support CUDA 13 +SUPPORTED_CUDA_VERSION = "12.9" + + +CUDA_ARCH_MAP = { + "aarch64": "sbsa" +} +# Structure is ( deb_repo_filename, file_contents, GPG_key_URL ) +# A file named /etc/apt/sources.list.d/deb_repo_filename containing file_contents will be created +# The output of curl -fsSL GPG_Key_URL will be passed to 'apt-key add' +EXTRA_LINUX_APT_PKG_URL = { + NO_GPU: "", + CUDA: f"https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/{CUDA_ARCH_MAP.get(platform.machine(), platform.machine())}/cuda-keyring_1.1-1_all.deb", +} def main(): @@ -77,6 +100,12 @@ Please see https://firedrakeproject.org/install for more information.""" default=ARCH_DEFAULT, help="The target configuration to install.", ) + parser.add_argument( + "--gpu-arch", + choices=[arch.value for arch in GPUArch], + default="none", + help="Target GPU architecture" + ) cmd_group = parser.add_mutually_exclusive_group(required=True) cmd_group.add_argument( "--show-system-packages", @@ -113,6 +142,12 @@ Please see https://firedrakeproject.org/install for more information.""" action="store_true", help="Print out the environment variables that need to be exported to install Firedrake.", ) + cmd_group.add_argument( + "--show-extra-repo-pkg-url", + "--repopkgurl", + action="store_true", + help="Print out the URL of any package required to enable non-OS repo access for this build", + ) args = parser.parse_args() if args.package_manager is not None: @@ -124,13 +159,20 @@ Please see https://firedrakeproject.org/install for more information.""" package_manager = sniff_package_manager() arch = FiredrakeArch(args.arch) + gpu_arch = GPUArch(args.gpu_arch) + if gpu_arch != NO_GPU and package_manager == MACOS_HOMEBREW_ARM64: + raise RuntimeError( + "GPU-compatible PETSc builds are currently only supported" + "on Linux" + ) + if args.show_system_packages: if package_manager is None: raise RuntimeError( "Cannot install Firedrake dependencies without a package manager, " "please install them manually" ) - print(" ".join(SYSTEM_PACKAGES[package_manager, arch]), end="") + print(" ".join(SYSTEM_PACKAGES[package_manager, arch, gpu_arch]), end="") elif args.show_minimal_system_packages: if package_manager is None: raise RuntimeError( @@ -139,12 +181,14 @@ Please see https://firedrakeproject.org/install for more information.""" ) print(" ".join(MINIMAL_SYSTEM_PACKAGES[package_manager]), end="") elif args.show_petsc_configure_options: - print(" ".join(PETSC_CONFIGURE_OPTIONS[package_manager, arch]), end="") + print(" ".join(PETSC_CONFIGURE_OPTIONS[package_manager, arch, gpu_arch]), end="") elif args.show_petsc_version: print(SUPPORTED_PETSC_VERSION, end="") + elif args.show_extra_repo_pkg_url: + print(EXTRA_LINUX_APT_PKG_URL[gpu_arch], end="") else: assert args.show_env - print(" ".join(ENVIRONMENT_VARS[package_manager, arch]), end="") + print(" ".join(ENVIRONMENT_VARS[package_manager, arch, gpu_arch]), end="") def sniff_package_manager() -> Optional[PackageManager]: @@ -199,7 +243,7 @@ BASE_LINUX_APT_PACKAGES = ( MINIMAL_LINUX_APT_PACKAGES + ("bison", "cmake", "libopenblas-dev", "libopenmpi-dev") ) -PETSC_EXTRAS_LINUX_APT_PACKAGES = ( +PETSC_EXTRAS_COMMON_APT_PACKAGES = ( "libfftw3-dev", "libfftw3-mpi-dev", "libhwloc-dev", @@ -210,13 +254,33 @@ PETSC_EXTRAS_LINUX_APT_PACKAGES = ( "libpnetcdf-dev", "libptscotch-dev", "libscalapack-openmpi-dev", +) + +PETSC_EXTRAS_LINUX_APT_PACKAGES = PETSC_EXTRAS_COMMON_APT_PACKAGES + ( "libsuitesparse-dev", "libsuperlu-dev", "libsuperlu-dist-dev", ) +cuda_ver_str = SUPPORTED_CUDA_VERSION.replace(".", "-") + +PETSC_EXTRAS_LINUX_APT_CUDA_PACKAGES = PETSC_EXTRAS_COMMON_APT_PACKAGES + ( + f"cuda-compat-{cuda_ver_str}", + f"cuda-nvtx-{cuda_ver_str}", + f"cuda-cudart-dev-{cuda_ver_str}", + f"cuda-command-line-tools-{cuda_ver_str}", + f"cuda-minimal-build-{cuda_ver_str}", + f"cuda-libraries-dev-{cuda_ver_str}", + f"cuda-nvml-dev-{cuda_ver_str}", + f"libnpp-dev-{cuda_ver_str}", + f"libcusparse-dev-{cuda_ver_str}", + f"libcublas-dev-{cuda_ver_str}", +) + LINUX_APT_PACKAGES = BASE_LINUX_APT_PACKAGES + PETSC_EXTRAS_LINUX_APT_PACKAGES +LINUX_APT_PACKAGES_CUDA = BASE_LINUX_APT_PACKAGES + PETSC_EXTRAS_LINUX_APT_CUDA_PACKAGES + MINIMAL_MACOS_HOMEBREW_PACKAGES = ( "autoconf", "automake", @@ -255,12 +319,14 @@ MINIMAL_SYSTEM_PACKAGES = { } SYSTEM_PACKAGES = { - (LINUX_APT_X86_64, ARCH_DEFAULT): LINUX_APT_PACKAGES, - (LINUX_APT_X86_64, ARCH_COMPLEX): LINUX_APT_PACKAGES, - (LINUX_APT_AARCH64, ARCH_DEFAULT): LINUX_APT_PACKAGES, - (LINUX_APT_AARCH64, ARCH_COMPLEX): LINUX_APT_PACKAGES, - (MACOS_HOMEBREW_ARM64, ARCH_DEFAULT): MACOS_HOMEBREW_PACKAGES, - (MACOS_HOMEBREW_ARM64, ARCH_COMPLEX): MACOS_HOMEBREW_PACKAGES, + (LINUX_APT_X86_64, ARCH_DEFAULT, NO_GPU): LINUX_APT_PACKAGES, + (LINUX_APT_X86_64, ARCH_COMPLEX, NO_GPU): LINUX_APT_PACKAGES, + (LINUX_APT_AARCH64, ARCH_DEFAULT, NO_GPU): LINUX_APT_PACKAGES, + (LINUX_APT_AARCH64, ARCH_COMPLEX, NO_GPU): LINUX_APT_PACKAGES, + (MACOS_HOMEBREW_ARM64, ARCH_DEFAULT, NO_GPU): MACOS_HOMEBREW_PACKAGES, + (MACOS_HOMEBREW_ARM64, ARCH_COMPLEX, NO_GPU): MACOS_HOMEBREW_PACKAGES, + (LINUX_APT_X86_64, ARCH_DEFAULT, CUDA): LINUX_APT_PACKAGES_CUDA, + (LINUX_APT_AARCH64, ARCH_DEFAULT, CUDA): LINUX_APT_PACKAGES_CUDA, } COMMON_PETSC_CONFIGURE_OPTIONS = ( @@ -271,11 +337,18 @@ COMMON_PETSC_CONFIGURE_OPTIONS = ( "--with-strict-petscerrorcode", ) + +class PetscPackageAction(enum.IntEnum): + PETSC_AUTODETECT = enum.auto() + PETSC_DOWNLOAD = enum.auto() + + # Placeholder value to use when we want PETSc to autodetect the package -PETSC_AUTODETECT = 333 +PETSC_AUTODETECT = PetscPackageAction.PETSC_AUTODETECT # Placeholder value to use when we want PETSc to download the package -PETSC_DOWNLOAD = 666 +PETSC_DOWNLOAD = PetscPackageAction.PETSC_DOWNLOAD + # For each package and architecture there are a number of different types of input: # 1. PETSC_AUTODETECT - PETSc will be able to find the package itself @@ -285,7 +358,10 @@ PETSC_DOWNLOAD = 666 # 'lib' subdirectories) # 4. tuple[str, tuple[str, ...]] - a 2-tuple consisting of the includes directory # (location of the header files) and a collection of library files that PETSc needs. -PETSC_EXTERNAL_PACKAGE_SPECS = { +PetscSpecValueType = PetscPackageAction | str | tuple[str | None, tuple[str, ...]] +PetscSpecsDictType = dict[str, dict[PackageManager, PetscSpecValueType]] + +PETSC_EXTERNAL_PACKAGE_SPECS_COMMON: PetscSpecsDictType = { "bison": { LINUX_APT_X86_64: PETSC_AUTODETECT, LINUX_APT_AARCH64: PETSC_AUTODETECT, @@ -341,16 +417,6 @@ PETSC_EXTERNAL_PACKAGE_SPECS = { LINUX_APT_AARCH64: (None, ("-lscalapack-openmpi",)), MACOS_HOMEBREW_ARM64: "/opt/homebrew", }, - "suitesparse": { - LINUX_APT_X86_64: PETSC_AUTODETECT, - LINUX_APT_AARCH64: PETSC_AUTODETECT, - MACOS_HOMEBREW_ARM64: "/opt/homebrew", - }, - "superlu_dist": { - LINUX_APT_X86_64: PETSC_AUTODETECT, - LINUX_APT_AARCH64: PETSC_AUTODETECT, - MACOS_HOMEBREW_ARM64: PETSC_DOWNLOAD, - }, "zlib": { LINUX_APT_X86_64: PETSC_AUTODETECT, LINUX_APT_AARCH64: PETSC_AUTODETECT, @@ -358,6 +424,43 @@ PETSC_EXTERNAL_PACKAGE_SPECS = { }, } +PETSC_EXTERNAL_PACKAGE_SPECS: PetscSpecsDictType = ( + PETSC_EXTERNAL_PACKAGE_SPECS_COMMON + | { + "suitesparse": { + LINUX_APT_X86_64: PETSC_AUTODETECT, + LINUX_APT_AARCH64: PETSC_AUTODETECT, + MACOS_HOMEBREW_ARM64: "/opt/homebrew", + }, + "superlu_dist": { + LINUX_APT_X86_64: PETSC_AUTODETECT, + LINUX_APT_AARCH64: PETSC_AUTODETECT, + MACOS_HOMEBREW_ARM64: PETSC_DOWNLOAD, + }, + } +) + +PETSC_EXTERNAL_PACKAGE_SPECS_CUDA: PetscSpecsDictType = ( + PETSC_EXTERNAL_PACKAGE_SPECS_COMMON + | { + "suitesparse": { + LINUX_APT_X86_64: PETSC_DOWNLOAD, + LINUX_APT_AARCH64: PETSC_DOWNLOAD, + MACOS_HOMEBREW_ARM64: "/opt/homebrew", + }, + "superlu_dist": { + LINUX_APT_X86_64: PETSC_DOWNLOAD, + LINUX_APT_AARCH64: PETSC_DOWNLOAD, + MACOS_HOMEBREW_ARM64: PETSC_DOWNLOAD, + }, + "umpire": { + LINUX_APT_X86_64: PETSC_DOWNLOAD, + LINUX_APT_AARCH64: PETSC_DOWNLOAD, + MACOS_HOMEBREW_ARM64: PETSC_DOWNLOAD, + }, + } +) + COMMON_PETSC_EXTERNAL_PACKAGES = ( "bison", "fftw", @@ -374,10 +477,13 @@ COMMON_PETSC_EXTERNAL_PACKAGES = ( "zlib", ) +PETSC_EXTRA_EXTERNAL_PACKAGES_CUDA = ("umpire",) + def prepare_external_package_configure_options( external_packages: Sequence[str], - package_manager: Optional[PackageManager], + package_manager: PackageManager | None = None, + gpu_arch: GPUArch = NO_GPU, ) -> tuple[str, ...]: configure_options = [] for external_package in external_packages: @@ -385,7 +491,10 @@ def prepare_external_package_configure_options( # Don't know anything about the system, download everything package_spec = PETSC_DOWNLOAD else: - package_spec = PETSC_EXTERNAL_PACKAGE_SPECS[external_package][package_manager] + if gpu_arch == NO_GPU: + package_spec = PETSC_EXTERNAL_PACKAGE_SPECS[external_package][package_manager] + elif gpu_arch == CUDA: + package_spec = PETSC_EXTERNAL_PACKAGE_SPECS_CUDA[external_package][package_manager] if package_spec == PETSC_AUTODETECT: # PETSc will find the package for us @@ -408,12 +517,20 @@ def prepare_external_package_configure_options( return tuple(configure_options) +def get_petsc_arch(arch: FiredrakeArch, gpu_arch: GPUArch) -> str: + arr = ["arch", "firedrake", arch.value] + if gpu_arch != NO_GPU: + arr.append(gpu_arch.value) + return "-".join(arr) + + def prepare_configure_options( package_manager: Optional[PackageManager], arch: FiredrakeArch, + gpu_arch: GPUArch, ) -> tuple[str, ...]: configure_options = list(COMMON_PETSC_CONFIGURE_OPTIONS) - configure_options.append(f"PETSC_ARCH=arch-firedrake-{arch.value}") + configure_options.append(f"PETSC_ARCH={get_petsc_arch(arch, gpu_arch)}") # include/link flags if package_manager in (LINUX_APT_X86_64, LINUX_APT_AARCH64): @@ -426,10 +543,14 @@ def prepare_configure_options( includes = ( f"{incdir}/hdf5/openmpi", f"{incdir}/scotch", - f"{incdir}/superlu", - f"{incdir}/superlu-dist", ) + if gpu_arch == NO_GPU: + includes = includes + ( + f"{incdir}/superlu", + f"{incdir}/superlu-dist", + ) + libraries = ( f"{libdir}/hdf5/openmpi", ) @@ -458,39 +579,59 @@ def prepare_configure_options( if arch == ARCH_COMPLEX: configure_options.append("--with-scalar-type=complex") + if gpu_arch == CUDA: + configure_options.extend( + ["--with-cuda=1", "--with-openmp=1", "--with-cxx-dialect=c++17"] + ) + external_packages = list(COMMON_PETSC_EXTERNAL_PACKAGES) if arch != ARCH_COMPLEX: external_packages.append("hypre") + if gpu_arch == CUDA: + external_packages.extend(PETSC_EXTRA_EXTERNAL_PACKAGES_CUDA) configure_options.extend( - prepare_external_package_configure_options(external_packages, package_manager) + prepare_external_package_configure_options( + external_packages, package_manager, gpu_arch + ) ) return tuple(configure_options) +PETSC_VALID_BUILD_COMBINATIONS = ( + (LINUX_APT_X86_64, ARCH_DEFAULT, NO_GPU), + (LINUX_APT_X86_64, ARCH_COMPLEX, NO_GPU), + (LINUX_APT_AARCH64, ARCH_DEFAULT, NO_GPU), + (LINUX_APT_AARCH64, ARCH_COMPLEX, NO_GPU), + (MACOS_HOMEBREW_ARM64, ARCH_DEFAULT, NO_GPU), + (MACOS_HOMEBREW_ARM64, ARCH_COMPLEX, NO_GPU), + (None, ARCH_DEFAULT, NO_GPU), + (None, ARCH_COMPLEX, NO_GPU), + (LINUX_APT_X86_64, ARCH_DEFAULT, CUDA), + (LINUX_APT_AARCH64, ARCH_DEFAULT, CUDA), + (None, ARCH_DEFAULT, CUDA), +) + + PETSC_CONFIGURE_OPTIONS = { - (package_manager, arch): prepare_configure_options(package_manager, arch) - for (package_manager, arch) in ( - (LINUX_APT_X86_64, ARCH_DEFAULT), - (LINUX_APT_X86_64, ARCH_COMPLEX), - (LINUX_APT_AARCH64, ARCH_DEFAULT), - (LINUX_APT_AARCH64, ARCH_COMPLEX), - (MACOS_HOMEBREW_ARM64, ARCH_DEFAULT), - (MACOS_HOMEBREW_ARM64, ARCH_COMPLEX), - (None, ARCH_DEFAULT), - (None, ARCH_COMPLEX), + (package_manager, arch, gpu_arch): prepare_configure_options( + package_manager, arch, gpu_arch ) + for (package_manager, arch, gpu_arch) in PETSC_VALID_BUILD_COMBINATIONS } def prepare_environment_vars( package_manager: Optional[PackageManager], arch: FiredrakeArch, + gpu_arch: GPUArch, ) -> tuple[str, ...]: vars = { "PETSC_DIR": f"{os.getcwd()}/petsc", - "PETSC_ARCH": f"arch-firedrake-{arch.value}", + "PETSC_ARCH": get_petsc_arch(arch, gpu_arch), "HDF5_MPI": "ON", } + if gpu_arch == CUDA: + vars["PATH"] = f"/usr/local/cuda/bin:{os.environ.get('PATH', '')}" if package_manager == MACOS_HOMEBREW_ARM64: # On macOS h5py cannot find the HDF5 library without help @@ -503,17 +644,10 @@ def prepare_environment_vars( ENVIRONMENT_VARS = { - (package_manager, arch): prepare_environment_vars(package_manager, arch) - for (package_manager, arch) in ( - (LINUX_APT_X86_64, ARCH_DEFAULT), - (LINUX_APT_X86_64, ARCH_COMPLEX), - (LINUX_APT_AARCH64, ARCH_DEFAULT), - (LINUX_APT_AARCH64, ARCH_COMPLEX), - (MACOS_HOMEBREW_ARM64, ARCH_DEFAULT), - (MACOS_HOMEBREW_ARM64, ARCH_COMPLEX), - (None, ARCH_DEFAULT), - (None, ARCH_COMPLEX), + (package_manager, arch, gpu_arch): prepare_environment_vars( + package_manager, arch, gpu_arch ) + for (package_manager, arch, gpu_arch) in PETSC_VALID_BUILD_COMBINATIONS } diff --git a/tests/firedrake/conftest.py b/tests/firedrake/conftest.py index 1fd9344dd6..9c82dfc884 100644 --- a/tests/firedrake/conftest.py +++ b/tests/firedrake/conftest.py @@ -166,10 +166,14 @@ def pytest_configure(config): "markers", "skipnetgen: mark as skipped if netgen and ngsPETSc is not installed" ) + config.addinivalue_line( + "markers", + "skipnogpu: mark as skipped when GPU hardware is unavailable" + ) def pytest_collection_modifyitems(session, config, items): - from firedrake.utils import complex_mode, SLATE_SUPPORTS_COMPLEX + from firedrake.utils import complex_mode, device_matrix_type, SLATE_SUPPORTS_COMPLEX for item in items: if complex_mode: @@ -181,6 +185,10 @@ def pytest_collection_modifyitems(session, config, items): if item.get_closest_marker("skipreal") is not None: item.add_marker(pytest.mark.skip(reason="Test makes no sense unless in complex mode")) + if device_matrix_type() is None: + if item.get_closest_marker("skipnogpu") is not None: + item.add_marker(pytest.mark.skip(reason="Test requires GPU hardware to run.")) + for dep, marker, reason in dependency_skip_markers_and_reasons: if item.get_closest_marker(marker) is not None and _skip_test_dependency(dep): item.add_marker(pytest.mark.skip(reason)) From f3a4bb69a4bb7765a1ed539158b04de27832112d Mon Sep 17 00:00:00 2001 From: Dale Roberts Date: Wed, 25 Mar 2026 11:20:24 +1100 Subject: [PATCH 2/6] Rework device_matrix_type --- firedrake/utils.py | 51 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/firedrake/utils.py b/firedrake/utils.py index f39f84d478..d8b055284c 100644 --- a/firedrake/utils.py +++ b/firedrake/utils.py @@ -7,6 +7,8 @@ from pyop2.datatypes import IntType # noqa: F401 from pyop2.datatypes import as_ctypes # noqa: F401 from pyop2.mpi import MPI +from petsc4py import PETSc +from functools import cache import petsctools @@ -23,6 +25,55 @@ SLATE_SUPPORTS_COMPLEX = False +@cache +def device_matrix_type(warn: bool = False) -> str | None: + """Get device matrix type + + Attempt to initialise a GPU device and return the PETSc mat_type + compatible with that device, or None if no device is detected + + Args: + warn: Emit a warning containing the reason a device mat_type + has not been returned. Defaults to False. + + Raises: + RuntimeError: Raised when PETSc initialises a GPU device that + Firedrake does not understand + + Returns: + The PETSc mat_type compatible with the GPU device detected on + this system or None + + Typical Usage Example: + mat_type = device_matrix_type(pc.comm.rank == 0) + + """ + _device_mat_type_map = {"HOST": None, "CUDA": "aijcusparse"} + try: + dev = PETSc.Device.create() + except PETSc.Error: + # Could not initialise device - not a failure condition as this could + # be a GPU-enabled PETSc installation running on a CPU-only host. + if warn: + warnings.warn( + "This installation of Firedrake is GPU-enabled, but no GPU device has been detected" + ) + return None + dev_type = dev.getDeviceType() + dev.destroy() + if dev_type not in _device_mat_type_map: + raise RuntimeError(f"Unknown device type: {dev_type} initialised by PETSc") + + if warn: + if dev_type == "HOST": + warnings.warn( + "This installation of Firedrake is not GPU-enabled, to enable GPU functionality " + "PETSc will need to be rebuilt with some GPU capability appropriate for this system " + "(e.g. '--with-cuda=1')." + ) + return _device_mat_type_map[dev_type] + + def _new_uid(comm): uid = comm.Get_attr(FIREDRAKE_UID) if uid is None: From 625acc68031f17f3be0559b95e33ae1dd915b0b3 Mon Sep 17 00:00:00 2001 From: Dale Roberts Date: Wed, 25 Mar 2026 12:40:11 +1100 Subject: [PATCH 3/6] Fix docstring in device_matrix_type --- firedrake/utils.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/firedrake/utils.py b/firedrake/utils.py index d8b055284c..dde3bc4a07 100644 --- a/firedrake/utils.py +++ b/firedrake/utils.py @@ -27,26 +27,22 @@ @cache def device_matrix_type(warn: bool = False) -> str | None: - """Get device matrix type + r"""Get device matrix type Attempt to initialise a GPU device and return the PETSc mat_type - compatible with that device, or None if no device is detected + compatible with that device, or None if no device is detected. + Typical Usage Example: + mat_type = device_matrix_type(pc.comm.rank == 0) - Args: - warn: Emit a warning containing the reason a device mat_type + :arg warn: Emit a warning containing the reason a device mat_type has not been returned. Defaults to False. - Raises: - RuntimeError: Raised when PETSc initialises a GPU device that + :raises RuntimeError: Raised when PETSc initialises a GPU device that Firedrake does not understand - Returns: - The PETSc mat_type compatible with the GPU device detected on + :returns: The PETSc mat_type compatible with the GPU device detected on this system or None - Typical Usage Example: - mat_type = device_matrix_type(pc.comm.rank == 0) - """ _device_mat_type_map = {"HOST": None, "CUDA": "aijcusparse"} try: From 578f5ed13512d1df6782414c4ab0acc1c4e0aa3f Mon Sep 17 00:00:00 2001 From: Dale Roberts Date: Thu, 26 Mar 2026 11:36:56 +1100 Subject: [PATCH 4/6] Address review comments --- .github/workflows/core.yml | 4 ++-- .github/workflows/push.yml | 1 + firedrake/exceptions.py | 6 ++++++ firedrake/utils.py | 22 ++++++++++++++++++---- scripts/firedrake-configure | 27 ++++++++++++++++----------- 5 files changed, 43 insertions(+), 17 deletions(-) diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index 85102478fc..856a9fe68f 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -493,8 +493,6 @@ jobs: # 'make test_durations' inside a 'firedrake:latest' Docker image. EXTRA_PYTEST_ARGS: --splitting-algorithm least_duration --timeout=600 --timeout-method=thread -o faulthandler_timeout=660 --durations-path=./firedrake-repo/tests/test_durations.json --durations=50 PYTEST_MPI_MAX_NPROCS: 8 - PETSC_OPTIONS: -use_gpu_aware_mpi 0 - EXTRA_OPTIONS: -use_gpu_aware_mpi 0 steps: - name: Confirm Nvidia GPUs are enabled # The presence of the nvidia-smi command indicates that the Nvidia drivers have @@ -543,6 +541,8 @@ jobs: apt-get -y install fonts-dejavu graphviz graphviz-dev parallel poppler-utils - name: Install PETSc + env: + EXTRA_OPTIONS: -use_gpu_aware_mpi 0 run: | if [ ${{ inputs.target_branch }} = 'release' ]; then git clone --depth 1 \ diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 87ce86f32c..6e17222d27 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -13,6 +13,7 @@ jobs: source_ref: ${{ github.ref_name }} target_branch: ${{ github.ref_name }} test_macos: true + test_gpu: true deploy_website: true secrets: inherit diff --git a/firedrake/exceptions.py b/firedrake/exceptions.py index 1a74c6a35e..726de214e3 100644 --- a/firedrake/exceptions.py +++ b/firedrake/exceptions.py @@ -47,3 +47,9 @@ class NonUniqueMeshSequenceError(FiredrakeException): """Raised when calling `.unique()` on a MeshSequence which contains non-unique meshes. """ + + +class UnrecognisedDeviceError(FiredrakeException): + """Raised when a GPU device has been initialised in PETSc that Firedrake + does not support. + """ diff --git a/firedrake/utils.py b/firedrake/utils.py index dde3bc4a07..068f77bb99 100644 --- a/firedrake/utils.py +++ b/firedrake/utils.py @@ -9,6 +9,7 @@ from pyop2.mpi import MPI from petsc4py import PETSc from functools import cache +from firedrake.exceptions import UnrecognisedDeviceError import petsctools @@ -34,13 +35,22 @@ def device_matrix_type(warn: bool = False) -> str | None: Typical Usage Example: mat_type = device_matrix_type(pc.comm.rank == 0) - :arg warn: Emit a warning containing the reason a device mat_type + Parameters + ---------- + warn + Emit a warning containing the reason a device mat_type has not been returned. Defaults to False. - :raises RuntimeError: Raised when PETSc initialises a GPU device that + Raises + ------ + UnrecognisedDeviceError + Raised when PETSc initialises a GPU device that Firedrake does not understand - :returns: The PETSc mat_type compatible with the GPU device detected on + Returns + ------- + str | None + The PETSc mat_type compatible with the GPU device detected on this system or None """ @@ -58,7 +68,11 @@ def device_matrix_type(warn: bool = False) -> str | None: dev_type = dev.getDeviceType() dev.destroy() if dev_type not in _device_mat_type_map: - raise RuntimeError(f"Unknown device type: {dev_type} initialised by PETSc") + raise UnrecognisedDeviceError( + f"Unknown device type: {dev_type} initialised by PETSc. Firedrake " + f"currently understands {', '.join([k for k in _device_mat_type_map if k != 'HOST'])}" + "devices" + ) if warn: if dev_type == "HOST": diff --git a/scripts/firedrake-configure b/scripts/firedrake-configure index 53d0e2f307..e027238f84 100755 --- a/scripts/firedrake-configure +++ b/scripts/firedrake-configure @@ -49,16 +49,18 @@ ARCH_COMPLEX = FiredrakeArch.COMPLEX SUPPORTED_PETSC_VERSION = "v3.24.5" -# SuperLU_DIST built via PETSc does not support CUDA 13 +# SuperLU_DIST built via PETSc does not support CUDA 13 - can be increased +# to 13.0 when SUPPORTED_PETSC_VERSION >= v3.25.0. CUDA 13.1 is currently +# not supported by GPU drivers on Firedrake CI systems. SUPPORTED_CUDA_VERSION = "12.9" CUDA_ARCH_MAP = { "aarch64": "sbsa" } -# Structure is ( deb_repo_filename, file_contents, GPG_key_URL ) -# A file named /etc/apt/sources.list.d/deb_repo_filename containing file_contents will be created -# The output of curl -fsSL GPG_Key_URL will be passed to 'apt-key add' + +# Contains the URL to a deb package that will enable vendor-specific software development +# repositories, or an empty string if none are required. EXTRA_LINUX_APT_PKG_URL = { NO_GPU: "", CUDA: f"https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/{CUDA_ARCH_MAP.get(platform.machine(), platform.machine())}/cuda-keyring_1.1-1_all.deb", @@ -104,7 +106,10 @@ Please see https://firedrakeproject.org/install for more information.""" "--gpu-arch", choices=[arch.value for arch in GPUArch], default="none", - help="Target GPU architecture" + help=( + "Target GPU architecture. WARNING: This is an experimental feature. " + "GPU support in Firedrake is currently very limited." + ), ) cmd_group = parser.add_mutually_exclusive_group(required=True) cmd_group.add_argument( @@ -256,7 +261,7 @@ PETSC_EXTRAS_COMMON_APT_PACKAGES = ( "libscalapack-openmpi-dev", ) -PETSC_EXTRAS_LINUX_APT_PACKAGES = PETSC_EXTRAS_COMMON_APT_PACKAGES + ( +PETSC_EXTRAS_LINUX_APT_NOGPU_PACKAGES = PETSC_EXTRAS_COMMON_APT_PACKAGES + ( "libsuitesparse-dev", "libsuperlu-dev", "libsuperlu-dist-dev", @@ -277,7 +282,7 @@ PETSC_EXTRAS_LINUX_APT_CUDA_PACKAGES = PETSC_EXTRAS_COMMON_APT_PACKAGES + ( f"libcublas-dev-{cuda_ver_str}", ) -LINUX_APT_PACKAGES = BASE_LINUX_APT_PACKAGES + PETSC_EXTRAS_LINUX_APT_PACKAGES +LINUX_APT_PACKAGES_NOGPU = BASE_LINUX_APT_PACKAGES + PETSC_EXTRAS_LINUX_APT_NOGPU_PACKAGES LINUX_APT_PACKAGES_CUDA = BASE_LINUX_APT_PACKAGES + PETSC_EXTRAS_LINUX_APT_CUDA_PACKAGES @@ -319,10 +324,10 @@ MINIMAL_SYSTEM_PACKAGES = { } SYSTEM_PACKAGES = { - (LINUX_APT_X86_64, ARCH_DEFAULT, NO_GPU): LINUX_APT_PACKAGES, - (LINUX_APT_X86_64, ARCH_COMPLEX, NO_GPU): LINUX_APT_PACKAGES, - (LINUX_APT_AARCH64, ARCH_DEFAULT, NO_GPU): LINUX_APT_PACKAGES, - (LINUX_APT_AARCH64, ARCH_COMPLEX, NO_GPU): LINUX_APT_PACKAGES, + (LINUX_APT_X86_64, ARCH_DEFAULT, NO_GPU): LINUX_APT_PACKAGES_NOGPU, + (LINUX_APT_X86_64, ARCH_COMPLEX, NO_GPU): LINUX_APT_PACKAGES_NOGPU, + (LINUX_APT_AARCH64, ARCH_DEFAULT, NO_GPU): LINUX_APT_PACKAGES_NOGPU, + (LINUX_APT_AARCH64, ARCH_COMPLEX, NO_GPU): LINUX_APT_PACKAGES_NOGPU, (MACOS_HOMEBREW_ARM64, ARCH_DEFAULT, NO_GPU): MACOS_HOMEBREW_PACKAGES, (MACOS_HOMEBREW_ARM64, ARCH_COMPLEX, NO_GPU): MACOS_HOMEBREW_PACKAGES, (LINUX_APT_X86_64, ARCH_DEFAULT, CUDA): LINUX_APT_PACKAGES_CUDA, From dadc415e93ff5c248113e2dba23b7050788f461b Mon Sep 17 00:00:00 2001 From: Dale Roberts Date: Thu, 26 Mar 2026 12:08:29 +1100 Subject: [PATCH 5/6] Restore PETSC_OPTIONS in test_gpu workflow --- .github/workflows/core.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index 856a9fe68f..5299c6814a 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -493,6 +493,7 @@ jobs: # 'make test_durations' inside a 'firedrake:latest' Docker image. EXTRA_PYTEST_ARGS: --splitting-algorithm least_duration --timeout=600 --timeout-method=thread -o faulthandler_timeout=660 --durations-path=./firedrake-repo/tests/test_durations.json --durations=50 PYTEST_MPI_MAX_NPROCS: 8 + PETSC_OPTIONS: -use_gpu_aware_mpi 0 steps: - name: Confirm Nvidia GPUs are enabled # The presence of the nvidia-smi command indicates that the Nvidia drivers have From b16bcaafa596fa73d7bafcc664944913494a20c5 Mon Sep 17 00:00:00 2001 From: Dale Roberts Date: Fri, 27 Mar 2026 09:15:03 +1100 Subject: [PATCH 6/6] Bump supported CUDA version. Address review comments --- .github/workflows/core.yml | 1 + firedrake/utils.py | 2 +- scripts/firedrake-configure | 10 ++++------ tests/firedrake/conftest.py | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index 5299c6814a..32f0e02f4b 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -493,6 +493,7 @@ jobs: # 'make test_durations' inside a 'firedrake:latest' Docker image. EXTRA_PYTEST_ARGS: --splitting-algorithm least_duration --timeout=600 --timeout-method=thread -o faulthandler_timeout=660 --durations-path=./firedrake-repo/tests/test_durations.json --durations=50 PYTEST_MPI_MAX_NPROCS: 8 + # Prevent PETSc from exiting with an error due to using non-GPU aware system MPI PETSC_OPTIONS: -use_gpu_aware_mpi 0 steps: - name: Confirm Nvidia GPUs are enabled diff --git a/firedrake/utils.py b/firedrake/utils.py index 068f77bb99..1df989a824 100644 --- a/firedrake/utils.py +++ b/firedrake/utils.py @@ -27,7 +27,7 @@ @cache -def device_matrix_type(warn: bool = False) -> str | None: +def device_matrix_type(warn: bool = True) -> str | None: r"""Get device matrix type Attempt to initialise a GPU device and return the PETSc mat_type diff --git a/scripts/firedrake-configure b/scripts/firedrake-configure index e027238f84..0fe40efd2d 100755 --- a/scripts/firedrake-configure +++ b/scripts/firedrake-configure @@ -49,10 +49,8 @@ ARCH_COMPLEX = FiredrakeArch.COMPLEX SUPPORTED_PETSC_VERSION = "v3.24.5" -# SuperLU_DIST built via PETSc does not support CUDA 13 - can be increased -# to 13.0 when SUPPORTED_PETSC_VERSION >= v3.25.0. CUDA 13.1 is currently -# not supported by GPU drivers on Firedrake CI systems. -SUPPORTED_CUDA_VERSION = "12.9" +# CUDA 13.1 is currently not supported by GPU drivers on Firedrake CI systems. +SUPPORTED_CUDA_VERSION = "13.0" CUDA_ARCH_MAP = { @@ -429,7 +427,7 @@ PETSC_EXTERNAL_PACKAGE_SPECS_COMMON: PetscSpecsDictType = { }, } -PETSC_EXTERNAL_PACKAGE_SPECS: PetscSpecsDictType = ( +PETSC_EXTERNAL_PACKAGE_SPECS_NOGPU: PetscSpecsDictType = ( PETSC_EXTERNAL_PACKAGE_SPECS_COMMON | { "suitesparse": { @@ -497,7 +495,7 @@ def prepare_external_package_configure_options( package_spec = PETSC_DOWNLOAD else: if gpu_arch == NO_GPU: - package_spec = PETSC_EXTERNAL_PACKAGE_SPECS[external_package][package_manager] + package_spec = PETSC_EXTERNAL_PACKAGE_SPECS_NOGPU[external_package][package_manager] elif gpu_arch == CUDA: package_spec = PETSC_EXTERNAL_PACKAGE_SPECS_CUDA[external_package][package_manager] diff --git a/tests/firedrake/conftest.py b/tests/firedrake/conftest.py index 9c82dfc884..f0f980adb0 100644 --- a/tests/firedrake/conftest.py +++ b/tests/firedrake/conftest.py @@ -185,7 +185,7 @@ def pytest_collection_modifyitems(session, config, items): if item.get_closest_marker("skipreal") is not None: item.add_marker(pytest.mark.skip(reason="Test makes no sense unless in complex mode")) - if device_matrix_type() is None: + if device_matrix_type(False) is None: if item.get_closest_marker("skipnogpu") is not None: item.add_marker(pytest.mark.skip(reason="Test requires GPU hardware to run."))