Skip to content

Commit d3660aa

Browse files
Merge pull request #7 from FluidNumerics/feature/nvidia
Add NVIDIA sm70 (V100) environment and CI workflow
2 parents 38cd7c1 + 09f2bc8 commit d3660aa

7 files changed

Lines changed: 532 additions & 0 deletions

File tree

.github/workflows/build-nvidia.yml

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
name: Build NVIDIA Docker images
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
paths:
8+
- envs/x86/sm70/**
9+
- envs/x86/sm100/**
10+
- .github/workflows/build-nvidia.yml
11+
pull_request:
12+
paths:
13+
- envs/x86/sm70/**
14+
- envs/x86/sm100/**
15+
- .github/workflows/build-nvidia.yml
16+
workflow_dispatch:
17+
18+
env:
19+
REGISTRY: docker.io
20+
IMAGE_NAME: higherordermethods/selfish
21+
22+
jobs:
23+
build:
24+
name: Build ${{ matrix.gpu_arch }} image
25+
runs-on: ubuntu-latest
26+
permissions:
27+
contents: read
28+
strategy:
29+
fail-fast: false
30+
matrix:
31+
include:
32+
- gpu_arch: sm70
33+
cuda_version: "12.4"
34+
- gpu_arch: sm100
35+
cuda_version: "13.0"
36+
steps:
37+
- name: Check out repository
38+
uses: actions/checkout@v4
39+
40+
- name: Log in to Docker Hub
41+
if: github.event_name != 'pull_request'
42+
uses: docker/login-action@v3
43+
with:
44+
username: ${{ secrets.DOCKERHUB_USERNAME }}
45+
password: ${{ secrets.DOCKERHUB_TOKEN }}
46+
47+
- name: Set up Buildx
48+
uses: docker/setup-buildx-action@v3
49+
50+
- name: Generate image metadata
51+
id: meta
52+
run: |
53+
# Convert CUDA version 12.4 -> cuda124
54+
VERSION_NO_DOTS=$(echo "${{ matrix.cuda_version }}" | tr -d '.')
55+
GPU_BACKEND="cuda${VERSION_NO_DOTS}"
56+
57+
CPU_PLATFORM="x86"
58+
GPU_ARCH="${{ matrix.gpu_arch }}"
59+
60+
# Tags: <version>-<cpu_platform>-<gpu_backend>-<gpu_arch>
61+
echo "tags<<EOF" >> $GITHUB_OUTPUT
62+
echo "${{ env.IMAGE_NAME }}:latest-${CPU_PLATFORM}-${GPU_BACKEND}-${GPU_ARCH}" >> $GITHUB_OUTPUT
63+
echo "${{ env.IMAGE_NAME }}:${{ github.sha }}-${CPU_PLATFORM}-${GPU_BACKEND}-${GPU_ARCH}" >> $GITHUB_OUTPUT
64+
echo "EOF" >> $GITHUB_OUTPUT
65+
66+
echo "gpu_backend=${GPU_BACKEND}" >> $GITHUB_OUTPUT
67+
68+
- name: Cache Docker layers
69+
uses: actions/cache@v4
70+
with:
71+
path: /tmp/.buildx-cache
72+
key: ${{ runner.os }}-buildx-${{ matrix.gpu_arch }}-${{ github.sha }}
73+
restore-keys: |
74+
${{ runner.os }}-buildx-${{ matrix.gpu_arch }}-
75+
76+
- name: Build and push Docker image
77+
uses: docker/build-push-action@v5
78+
with:
79+
context: .
80+
file: envs/x86/${{ matrix.gpu_arch }}/Dockerfile
81+
push: ${{ github.event_name != 'pull_request' }}
82+
tags: ${{ steps.meta.outputs.tags }}
83+
build-args: |
84+
CUDA_VERSION=${{ matrix.cuda_version }}
85+
cache-from: type=local,src=/tmp/.buildx-cache
86+
cache-to: type=local,dest=/tmp/.buildx-cache-new,mode=max
87+
labels: |
88+
com.fluidnumerics.cuda.target=${{ matrix.gpu_arch }}
89+
com.fluidnumerics.cuda.version=${{ matrix.cuda_version }}
90+
org.opencontainers.image.source=${{ github.server_url }}/${{ github.repository }}
91+
org.opencontainers.image.revision=${{ github.sha }}
92+
93+
- name: Move cache
94+
run: |
95+
rm -rf /tmp/.buildx-cache
96+
mv /tmp/.buildx-cache-new /tmp/.buildx-cache

envs/x86/sm100/Dockerfile

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
FROM docker.io/rockylinux:9 AS bootstrap
2+
3+
ARG CUDA_VERSION=13.0
4+
5+
ENV SPACK_ROOT=/opt/spack \
6+
CURRENTLY_BUILDING_DOCKER_IMAGE=1 \
7+
container=docker
8+
9+
RUN dnf update -y \
10+
&& dnf install -y epel-release \
11+
&& dnf update -y \
12+
&& dnf --enablerepo epel install -y \
13+
bzip2 \
14+
cmake \
15+
curl-minimal \
16+
file \
17+
findutils \
18+
gcc-c++ \
19+
gcc \
20+
gcc-gfortran \
21+
git \
22+
gnupg2 \
23+
hg \
24+
hostname \
25+
iproute \
26+
make \
27+
patch \
28+
python3 \
29+
python3-pip \
30+
python3-setuptools \
31+
svn \
32+
unzip \
33+
xz \
34+
zstd \
35+
&& pip3 install boto3 \
36+
&& rm -rf /var/cache/dnf \
37+
&& dnf clean all
38+
39+
# Install CUDA toolkit from NVIDIA repo
40+
RUN dnf config-manager \
41+
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \
42+
&& dnf clean all \
43+
&& dnf update -y \
44+
&& dnf install -y cuda-toolkit-$(echo ${CUDA_VERSION} | tr '.' '-')
45+
46+
RUN ls -l /usr/local/cuda-${CUDA_VERSION}/include/cuda.h
47+
48+
RUN mkdir $SPACK_ROOT && cd $SPACK_ROOT && \
49+
git init --quiet && git remote add origin https://github.com/spack/spack.git && git fetch --depth=1 origin develop && git checkout --detach FETCH_HEAD && \
50+
mkdir -p $SPACK_ROOT/opt/spack
51+
52+
RUN ln -s $SPACK_ROOT/share/spack/docker/entrypoint.bash \
53+
/usr/local/bin/docker-shell \
54+
&& ln -s $SPACK_ROOT/share/spack/docker/entrypoint.bash \
55+
/usr/local/bin/interactive-shell \
56+
&& ln -s $SPACK_ROOT/share/spack/docker/entrypoint.bash \
57+
/usr/local/bin/spack-env
58+
59+
RUN mkdir -p /root/.spack \
60+
&& cp $SPACK_ROOT/share/spack/docker/modules.yaml \
61+
/root/.spack/modules.yaml \
62+
&& rm -rf /root/*.* /run/nologin
63+
64+
# [WORKAROUND]
65+
# https://superuser.com/questions/1241548/
66+
# xubuntu-16-04-ttyname-failed-inappropriate-ioctl-for-device#1253889
67+
RUN [ -f ~/.profile ] \
68+
&& sed -i 's/mesg n/( tty -s \\&\\& mesg n || true )/g' ~/.profile \
69+
|| true
70+
71+
72+
WORKDIR /root
73+
SHELL ["docker-shell"]
74+
75+
# Creates the package cache
76+
RUN spack bootstrap now \
77+
&& spack bootstrap status --optional \
78+
&& spack spec hdf5+mpi
79+
80+
ENTRYPOINT ["/bin/bash", "/opt/spack/share/spack/docker/entrypoint.bash"]
81+
CMD ["interactive-shell"]
82+
83+
# Build stage with Spack pre-installed and ready to be used
84+
FROM bootstrap AS builder
85+
86+
87+
# What we want to install and how we want to install it
88+
# is specified in a manifest file (spack.yaml)
89+
RUN mkdir -p /opt/spack-environment && \
90+
set -o noclobber \
91+
&& (echo spack: \
92+
&& echo ' specs:' \
93+
&& echo ' - feq-parse@2.2.2' \
94+
&& echo ' - openmpi@5.0.8 +cuda cuda_arch=100' \
95+
&& echo ' - hdf5@1.14.5 +fortran +mpi' \
96+
&& echo ' - cmake@3.31.11'\
97+
&& echo ' packages:' \
98+
&& echo ' all:' \
99+
&& echo ' require:' \
100+
&& echo ' - target=x86_64_v3' \
101+
&& echo ' prefer:' \
102+
&& echo ' - cuda_arch=100' \
103+
&& echo ' cuda:' \
104+
&& echo ' buildable: false' \
105+
&& echo ' externals:' \
106+
&& echo " - spec: \"cuda@${CUDA_VERSION}\"" \
107+
&& echo " prefix: \"/usr/local/cuda-${CUDA_VERSION}\"" \
108+
&& echo '' \
109+
&& echo ' concretizer:' \
110+
&& echo ' unify: true' \
111+
&& echo ' config:' \
112+
&& echo ' install_tree:' \
113+
&& echo ' root: /opt/software' \
114+
&& echo ' view: /opt/views/view') > /opt/spack-environment/spack.yaml
115+
116+
# Apply feq-parse patch to add "c" build dependency
117+
COPY ./envs/x86/sm100/feq-parse.patch /tmp/feq-parse.patch
118+
#
119+
RUN SPACK_PKGS_ROOT=$(spack repo list | awk '{print $NF}') &&\
120+
SPACK_BUILTIN_PKGS_ROOT=${SPACK_PKGS_ROOT/repos\/spack_repo\/builtin} &&\
121+
patch -p1 -d $SPACK_BUILTIN_PKGS_ROOT < /tmp/feq-parse.patch
122+
123+
# Install the software, remove unnecessary deps
124+
RUN cd /opt/spack-environment && spack env activate . && spack repo list && spack install --fail-fast && spack gc -y
125+
126+
# Strip all the binaries
127+
RUN find -L /opt/views/view/* -type f -exec readlink -f '{}' \; | \
128+
xargs file -i | \
129+
grep 'charset=binary' | \
130+
grep 'x-executable\|x-archive\|x-sharedlib' | \
131+
awk -F: '{print $1}' | xargs strip
132+
133+
# Modifications to the environment that are necessary to run
134+
RUN cd /opt/spack-environment && \
135+
spack env activate --sh -d . > activate.sh
136+
137+
138+
# Bare OS image to run the installed executables
139+
FROM docker.io/rockylinux:9
140+
141+
COPY --from=builder /opt/spack-environment /opt/spack-environment
142+
COPY --from=builder /opt/software /opt/software
143+
144+
RUN dnf update -y \
145+
&& dnf install -y epel-release \
146+
&& dnf update -y \
147+
&& dnf --enablerepo epel install -y \
148+
bzip2 \
149+
cmake \
150+
curl-minimal \
151+
file \
152+
findutils \
153+
gcc-c++ \
154+
gcc \
155+
gcc-gfortran \
156+
lcov
157+
158+
# Install CUDA runtime libraries
159+
ARG CUDA_VERSION=13.0
160+
RUN dnf config-manager \
161+
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \
162+
&& dnf clean all \
163+
&& dnf update -y \
164+
&& dnf install -y \
165+
cuda-libraries-$(echo ${CUDA_VERSION} | tr '.' '-') \
166+
cuda-nvtx-$(echo ${CUDA_VERSION} | tr '.' '-')
167+
168+
# paths.view is a symlink, so copy the parent to avoid dereferencing and duplicating it
169+
COPY --from=builder /opt/views /opt/views
170+
171+
RUN { \
172+
echo '#!/bin/sh' \
173+
&& echo '.' /opt/spack-environment/activate.sh \
174+
&& echo 'exec "$@"'; \
175+
} > /entrypoint.sh \
176+
&& chmod a+x /entrypoint.sh \
177+
&& ln -s /opt/views/view /opt/view
178+
179+
180+
LABEL "mpi"="openmpi"
181+
ENTRYPOINT [ "/entrypoint.sh" ]
182+
CMD [ "/bin/bash" ]

envs/x86/sm100/feq-parse.patch

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
diff --git a/repos/spack_repo/builtin/packages/feq_parse/package.py b/repos/spack_repo/builtin/packages/feq_parse/package.py
2+
index e4b960b7..bc0916b9 100644
3+
--- a/repos/spack_repo/builtin/packages/feq_parse/package.py
4+
+++ b/repos/spack_repo/builtin/packages/feq_parse/package.py
5+
@@ -29,6 +29,7 @@ class FeqParse(CMakePackage):
6+
version("1.0.2", sha256="1cd1db7562908ea16fc65dc5268b654405d0b3d9dcfe11f409949c431b48a3e8")
7+
8+
depends_on("fortran", type="build") # generated
9+
+ depends_on("c", type="build") # generated
10+
11+
depends_on("cmake@3.0.2:", type="build")
12+

envs/x86/sm100/spack.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
spack:
2+
specs:
3+
- feq-parse@2.2.2
4+
- openmpi@5.0.8 +cuda cuda_arch=100
5+
- hdf5@1.14.5 +fortran +mpi
6+
7+
packages:
8+
all:
9+
require:
10+
- "target=x86_64_v3"
11+
prefer:
12+
- "cuda_arch=100"
13+
14+
container:
15+
format: docker
16+
images:
17+
os: rockylinux:9
18+
spack:
19+
ref: v1.0.2
20+
21+
strip: true
22+
23+
labels:
24+
mpi: openmpi

0 commit comments

Comments
 (0)