Skip to content

Commit 0abb07d

Browse files
committed
refactor(dockerfile-lb): native per-version GPU LB base on nvidia/cuda
Mirror the GPU worker rewrite for the load-balanced GPU image. Same nvidia/cuda + deadsnakes pattern, same native-per-version layout, just with EXPOSE 80 and the uvicorn entrypoint instead of the QB handler. Refs AE-2827.
1 parent 9cc31e9 commit 0abb07d

1 file changed

Lines changed: 46 additions & 29 deletions

File tree

Dockerfile-lb

Lines changed: 46 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,14 @@
1010
FROM runpod/pytorch:1.0.3-cu1281-torch291-ubuntu2204
1111

1212
# Target Python version for the worker runtime.
13+
# Native per-version GPU LB base. Same shape as Dockerfile, with the
14+
# uvicorn entrypoint for load-balanced endpoints. See Dockerfile for the
15+
# full rationale on the nvidia/cuda + deadsnakes approach.
1316
ARG PYTHON_VERSION=3.12
1417
ARG TORCH_VERSION=2.9.1+cu128
1518
ARG TORCH_INDEX_URL=https://download.pytorch.org/whl/cu128
1619

17-
# Expose the target version to the running worker for startup validation.
18-
ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION}
20+
FROM nvidia/cuda:12.8.1-cudnn-runtime-ubuntu22.04
1921

2022
# Validate the base image provides the requested interpreter and activate it.
2123
# For non-3.12 targets, install torch for the selected Python and repoint
@@ -42,56 +44,71 @@ RUN python${PYTHON_VERSION} --version \
4244
&& ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python \
4345
&& ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python3; \
4446
fi
47+
# Re-declare ARGs after FROM so they're visible in this build stage.
48+
ARG PYTHON_VERSION
49+
ARG TORCH_VERSION
50+
ARG TORCH_INDEX_URL
4551

46-
WORKDIR /app
47-
48-
# Prevent interactive prompts during package installation
52+
ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION}
4953
ENV DEBIAN_FRONTEND=noninteractive
50-
# Set timezone to avoid tzdata prompts
5154
ENV TZ=Etc/UTC
52-
53-
# Enable HuggingFace transfer acceleration
5455
ENV HF_HUB_ENABLE_HF_TRANSFER=1
55-
# Relocate HuggingFace cache outside /root/.cache to exclude from volume sync
5656
ENV HF_HOME=/hf-cache
5757

58-
# Configure APT cache to persist under /root/.cache for volume sync
58+
# Install ONE Python natively. 3.10 from upstream Ubuntu (jammy ships it as
59+
# system Python); 3.11/3.12/3.13 from deadsnakes.
60+
RUN apt-get update \
61+
&& apt-get install -y --no-install-recommends \
62+
software-properties-common ca-certificates curl gnupg \
63+
&& add-apt-repository -y ppa:deadsnakes/ppa \
64+
&& apt-get update \
65+
&& apt-get install -y --no-install-recommends \
66+
python${PYTHON_VERSION} \
67+
python${PYTHON_VERSION}-venv \
68+
python${PYTHON_VERSION}-dev \
69+
git \
70+
&& ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python \
71+
&& ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python3 \
72+
&& apt-get clean && rm -rf /var/lib/apt/lists/*
73+
74+
# Bootstrap pip via get-pip.py.
75+
RUN python -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \
76+
&& python /tmp/get-pip.py --no-cache-dir \
77+
&& rm -f /tmp/get-pip.py
78+
79+
# Install torch natively for the active interpreter.
80+
RUN python -m pip install --no-cache-dir \
81+
--index-url ${TORCH_INDEX_URL} \
82+
"torch==${TORCH_VERSION}"
83+
84+
WORKDIR /app
85+
86+
# Configure APT cache to persist under /root/.cache for volume sync.
5987
RUN mkdir -p /root/.cache/apt/archives/partial \
6088
&& echo 'Dir::Cache "/root/.cache/apt";' > /etc/apt/apt.conf.d/01cache
6189

62-
# Install system dependencies and uv
63-
# Note: build-essential not pre-installed to reduce image size (400MB savings)
64-
# Automatic detection will install it when needed (no manual action required)
65-
# Advanced: Users can pre-install via system_dependencies=["build-essential"]
66-
RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y --no-install-recommends \
67-
curl ca-certificates git \
68-
&& curl -LsSf https://astral.sh/uv/install.sh | sh \
90+
# Install uv for downstream dependency installation.
91+
RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
6992
&& cp ~/.local/bin/uv /usr/local/bin/uv \
70-
&& chmod +x /usr/local/bin/uv \
71-
&& apt-get clean \
72-
&& rm -rf /var/lib/apt/lists/*
93+
&& chmod +x /usr/local/bin/uv
7394

74-
# Copy app code and install dependencies
75-
# Use --python to target the active interpreter (preserves torch in its site-packages)
95+
# Copy app code and install worker dependencies into the active interpreter.
7696
COPY README.md pyproject.toml uv.lock ./
7797
COPY src/ ./
7898
RUN uv export --format requirements-txt --no-dev --no-hashes > requirements.txt \
7999
&& uv pip install --python $(which python) --break-system-packages -r requirements.txt
80100

81-
# Install numpy for the active Python version.
82-
# The runpod/pytorch image ships torch but not numpy. Flash build excludes numpy
83-
# from tarballs (BASE_IMAGE_PACKAGES) to save tarball space (~30 MB), so numpy
84-
# must be provided here in the base image.
101+
# Install numpy for the active Python (excluded from flash tarballs).
85102
RUN python -m pip install --no-cache-dir numpy
86103

87-
# Verify torch, numpy, and the expected Python version are available.
104+
# Verify torch, numpy, and the expected interpreter are wired correctly.
88105
RUN python -c "import sys; actual = f'{sys.version_info.major}.{sys.version_info.minor}'; expected = '${PYTHON_VERSION}'; assert actual == expected, f'Expected Python {expected}, got {actual}'; print(f'Python {actual} OK')" \
89106
&& python -c "import torch; print(f'torch {torch.__version__} CUDA {torch.cuda.is_available()}')" \
90107
&& python -c "import numpy; print(f'numpy {numpy.__version__}')"
91108

92109
EXPOSE 80
93110

94-
# CMD will be overridden by RunPod at runtime to run the specific generated handler
95-
# The handler factory generates handler_{resource_name}.py files
111+
# CMD will be overridden by RunPod at runtime to run the specific generated handler.
112+
# The handler factory generates handler_{resource_name}.py files.
96113
# RunPod will invoke: uvicorn handler_{resource_name}:app --host 0.0.0.0 --port 80
97114
CMD ["uvicorn", "lb_handler:app", "--host", "0.0.0.0", "--port", "80", "--timeout-keep-alive", "600"]

0 commit comments

Comments
 (0)