Optimize Docker images & add multi-architecture support

edoardob90 · edoardob90 · commit fb3e50e8ef1d · 2025-05-06T11:03:18.000+02:00
- Reduce image size by implementing multi-stage build
- Remove torchaudio package and optimize PyTorch installation
- Add GPU/CPU variant support via build arguments
- Enable multi-architecture builds (amd64/arm64) with proper manifests
- Fix Renku compatibility by setting provenance: false
- Simplify container environment variables and working directory
- Create architecture-specific tags for better image management
diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
@@ -29,6 +29,7 @@ jobs:
     strategy:
       matrix:
         arch: [amd64, arm64]
+        variant: [cpu, cuda]
       fail-fast: false
     steps:
       - name: Checkout code
@@ -54,10 +55,10 @@ jobs:
         with:
           images: ghcr.io/${{ github.repository }}
           tags: |
-            type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
-            type=ref,event=pr
-            type=ref,event=tag
-            type=sha,format=short
+            type=raw,value=${{ matrix.variant }}-${{ matrix.arch }},enable=${{ github.ref == 'refs/heads/main' }}
+            type=raw,value=${{ matrix.variant }}-${{ matrix.arch }}-pr-${{ github.event.pull_request.number }},enable=${{ github.event_name == 'pull_request' }}
+            type=raw,value=${{ matrix.variant }}-${{ matrix.arch }}-${{ github.ref_name }},enable=${{ startsWith(github.ref, 'refs/tags/') }}
+            type=raw,value=${{ matrix.variant }}-${{ matrix.arch }}-sha-${{ github.sha }},prefix=
 
       - name: Build and push
         uses: docker/build-push-action@v6
@@ -67,3 +68,47 @@ jobs:
           push: ${{ github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository }}
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
+          provenance: false
+          build-args: |
+            PYTORCH_VARIANT=${{ matrix.variant }}
+          outputs: |
+            type=image,name=ghcr.io/${{ github.repository }},push-by-digest=false,name-canonical=false,push=true,annotation-index.org.opencontainers.image.description=A containerized Python tutorial environment with Jupyter Lab (${{ matrix.variant }} variant, ${{ matrix.arch }} architecture).,annotation-index.org.opencontainers.image.source=https://github.com/${{ github.repository }},annotation-index.org.opencontainers.image.authors=Empa Scientific IT <scientificit@empa.ch>,annotation-index.org.opencontainers.image.licenses=MIT,annotation-index.org.opencontainers.image.variant=pytorch-${{ matrix.variant }}-${{ matrix.arch }}
+
+  create-manifests:
+    needs: build-and-push
+    runs-on: ubuntu-latest
+    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
+    permissions:
+      packages: write
+    steps:
+      - name: Log in to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Create and push CPU manifest
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          target: builder
+          platforms: linux/amd64,linux/arm64
+          push: true
+          provenance: false
+          tags: |
+            ghcr.io/${{ github.repository }}:cpu
+            ${{ startsWith(github.ref, 'refs/tags/') && format('ghcr.io/{0}:cpu-{1}', github.repository, github.ref_name) || '' }}
+            ${{ github.ref == 'refs/heads/main' && format('ghcr.io/{0}:latest', github.repository) || '' }}
+
+      - name: Create and push CUDA manifest
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          target: builder
+          platforms: linux/amd64,linux/arm64
+          push: true
+          provenance: false
+          tags: |
+            ghcr.io/${{ github.repository }}:cuda
+            ${{ startsWith(github.ref, 'refs/tags/') && format('ghcr.io/{0}:cuda-{1}', github.repository, github.ref_name) || '' }}
diff --git a/Dockerfile b/Dockerfile
@@ -1,19 +1,8 @@
-# Use the jupyter/minimal-notebook as the base image
-FROM quay.io/jupyter/minimal-notebook:latest
+# Stage 1: Build environment
+FROM quay.io/jupyter/minimal-notebook:latest as builder
 
-# Metadata labels
-LABEL org.opencontainers.image.title="Python Tutorial"
-LABEL org.opencontainers.image.description="A containerized Python tutorial environment with Jupyter Lab."
-LABEL org.opencontainers.image.authors="Empa Scientific IT <scientificit@empa.ch>"
-LABEL org.opencontainers.image.url="https://github.com/empa-scientific-it/python-tutorial"
-LABEL org.opencontainers.image.source="https://github.com/empa-scientific-it/python-tutorial"
-LABEL org.opencontainers.image.version="1.0.0"
-LABEL org.opencontainers.image.licenses="MIT"
-
-# Set environment variables for the tutorial and repository
-ENV BASENAME="python-tutorial"
-ENV REPO=${HOME}/${BASENAME}
-ENV IPYTHONDIR="${HOME}/.ipython"
+# Define build argument for PyTorch variant (cpu or cuda)
+ARG PYTORCH_VARIANT=cpu
 
 # Switch to root user to install additional dependencies
 USER root
@@ -33,16 +22,59 @@ USER ${NB_UID}
 # Set up the Conda environment
 COPY docker/environment.yml /tmp/environment.yml
 RUN mamba env update -n base -f /tmp/environment.yml && \
+    # Install PyTorch packages without cache - conditionally based on variant
+    if [ "$PYTORCH_VARIANT" = "cpu" ]; then \
+        echo "Installing CPU-only PyTorch" && \
+        pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; \
+    else \
+        echo "Installing CUDA-enabled PyTorch" && \
+        pip install --no-cache-dir torch torchvision; \
+    fi && \
+    # Clean up all package caches to reduce image size
     mamba clean --all -f -y && \
+    # Remove pip cache
+    rm -rf ~/.cache/pip && \
     fix-permissions "${CONDA_DIR}" && \
     fix-permissions "/home/${NB_USER}"
 
-# Prepare IPython configuration (move earlier in the build)
-RUN mkdir -p ${HOME}/.ipython/profile_default
+# Stage 2: Runtime environment - creates a lighter final image
+FROM quay.io/jupyter/minimal-notebook:latest
+
+# Inherit build argument for image labeling
+ARG PYTORCH_VARIANT=cpu
+
+# Metadata labels
+LABEL org.opencontainers.image.title="Python Tutorial"
+LABEL org.opencontainers.image.description="A containerized Python tutorial environment with Jupyter Lab."
+LABEL org.opencontainers.image.authors="Empa Scientific IT <scientificit@empa.ch>"
+LABEL org.opencontainers.image.url="https://github.com/empa-scientific-it/python-tutorial"
+LABEL org.opencontainers.image.source="https://github.com/empa-scientific-it/python-tutorial"
+LABEL org.opencontainers.image.version="1.0.0"
+LABEL org.opencontainers.image.licenses="MIT"
+LABEL org.opencontainers.image.variant="pytorch-${PYTORCH_VARIANT}"
+
+# Switch to root user to install minimal dependencies
+USER root
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    libgl1 && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Switch back to the default notebook user
+USER ${NB_UID}
+
+# Copy the conda environment from the builder stage
+COPY --from=builder ${CONDA_DIR} ${CONDA_DIR}
+
+# Copy home directory with configurations
+COPY --from=builder --chown=${NB_UID}:${NB_GID} /home/${NB_USER} /home/${NB_USER}
+
+# Prepare IPython configuration
 COPY --chown=${NB_UID}:${NB_GID} binder/ipython_config.py ${HOME}/.ipython/profile_default/
 
-# Set the working directory to the repository
-WORKDIR ${REPO}
+# Set the working directory to user's home (repository will be cloned here by Renku)
+WORKDIR /home/${NB_USER}
 
 # Use the default ENTRYPOINT from the base image to start Jupyter Lab
 ENTRYPOINT ["tini", "-g", "--", "start.sh"]
diff --git a/docker/environment.yml b/docker/environment.yml
@@ -26,8 +26,5 @@ dependencies:
       - python-dotenv
       - pillow
       - opencv-python
-      - torch
-      - torchaudio
-      - torchvision
       - albumentations
       - grad-cam