1010FROM runpod/pytorch:1.0.3-cu1281-torch291-ubuntu2204
1111
1212# Target Python version for the worker runtime.
13+ # Native per-version GPU LB base. Same shape as Dockerfile, with the
14+ # uvicorn entrypoint for load-balanced endpoints. See Dockerfile for the
15+ # full rationale on the nvidia/cuda + deadsnakes approach.
1316ARG PYTHON_VERSION=3.12
1417ARG TORCH_VERSION=2.9.1+cu128
1518ARG TORCH_INDEX_URL=https://download.pytorch.org/whl/cu128
1619
17- # Expose the target version to the running worker for startup validation.
18- ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION}
20+ FROM nvidia/cuda:12.8.1-cudnn-runtime-ubuntu22.04
1921
2022# Validate the base image provides the requested interpreter and activate it.
2123# For non-3.12 targets, install torch for the selected Python and repoint
@@ -42,56 +44,71 @@ RUN python${PYTHON_VERSION} --version \
4244 && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python \
4345 && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python3; \
4446 fi
47+ # Re-declare ARGs after FROM so they're visible in this build stage.
48+ ARG PYTHON_VERSION
49+ ARG TORCH_VERSION
50+ ARG TORCH_INDEX_URL
4551
46- WORKDIR /app
47-
48- # Prevent interactive prompts during package installation
52+ ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION}
4953ENV DEBIAN_FRONTEND=noninteractive
50- # Set timezone to avoid tzdata prompts
5154ENV TZ=Etc/UTC
52-
53- # Enable HuggingFace transfer acceleration
5455ENV HF_HUB_ENABLE_HF_TRANSFER=1
55- # Relocate HuggingFace cache outside /root/.cache to exclude from volume sync
5656ENV HF_HOME=/hf-cache
5757
58- # Configure APT cache to persist under /root/.cache for volume sync
58+ # Install ONE Python natively. 3.10 from upstream Ubuntu (jammy ships it as
59+ # system Python); 3.11/3.12/3.13 from deadsnakes.
60+ RUN apt-get update \
61+ && apt-get install -y --no-install-recommends \
62+ software-properties-common ca-certificates curl gnupg \
63+ && add-apt-repository -y ppa:deadsnakes/ppa \
64+ && apt-get update \
65+ && apt-get install -y --no-install-recommends \
66+ python${PYTHON_VERSION} \
67+ python${PYTHON_VERSION}-venv \
68+ python${PYTHON_VERSION}-dev \
69+ git \
70+ && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python \
71+ && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python3 \
72+ && apt-get clean && rm -rf /var/lib/apt/lists/*
73+
74+ # Bootstrap pip via get-pip.py.
75+ RUN python -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \
76+ && python /tmp/get-pip.py --no-cache-dir \
77+ && rm -f /tmp/get-pip.py
78+
79+ # Install torch natively for the active interpreter.
80+ RUN python -m pip install --no-cache-dir \
81+ --index-url ${TORCH_INDEX_URL} \
82+ "torch==${TORCH_VERSION}"
83+
84+ WORKDIR /app
85+
86+ # Configure APT cache to persist under /root/.cache for volume sync.
5987RUN mkdir -p /root/.cache/apt/archives/partial \
6088 && echo 'Dir::Cache "/root/.cache/apt";' > /etc/apt/apt.conf.d/01cache
6189
62- # Install system dependencies and uv
63- # Note: build-essential not pre-installed to reduce image size (400MB savings)
64- # Automatic detection will install it when needed (no manual action required)
65- # Advanced: Users can pre-install via system_dependencies=["build-essential"]
66- RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y --no-install-recommends \
67- curl ca-certificates git \
68- && curl -LsSf https://astral.sh/uv/install.sh | sh \
90+ # Install uv for downstream dependency installation.
91+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
6992 && cp ~/.local/bin/uv /usr/local/bin/uv \
70- && chmod +x /usr/local/bin/uv \
71- && apt-get clean \
72- && rm -rf /var/lib/apt/lists/*
93+ && chmod +x /usr/local/bin/uv
7394
74- # Copy app code and install dependencies
75- # Use --python to target the active interpreter (preserves torch in its site-packages)
95+ # Copy app code and install worker dependencies into the active interpreter.
7696COPY README.md pyproject.toml uv.lock ./
7797COPY src/ ./
7898RUN uv export --format requirements-txt --no-dev --no-hashes > requirements.txt \
7999 && uv pip install --python $(which python) --break-system-packages -r requirements.txt
80100
81- # Install numpy for the active Python version.
82- # The runpod/pytorch image ships torch but not numpy. Flash build excludes numpy
83- # from tarballs (BASE_IMAGE_PACKAGES) to save tarball space (~30 MB), so numpy
84- # must be provided here in the base image.
101+ # Install numpy for the active Python (excluded from flash tarballs).
85102RUN python -m pip install --no-cache-dir numpy
86103
87- # Verify torch, numpy, and the expected Python version are available .
104+ # Verify torch, numpy, and the expected interpreter are wired correctly .
88105RUN python -c "import sys; actual = f'{sys.version_info.major}.{sys.version_info.minor}'; expected = '${PYTHON_VERSION}'; assert actual == expected, f'Expected Python {expected}, got {actual}'; print(f'Python {actual} OK')" \
89106 && python -c "import torch; print(f'torch {torch.__version__} CUDA {torch.cuda.is_available()}')" \
90107 && python -c "import numpy; print(f'numpy {numpy.__version__}')"
91108
92109EXPOSE 80
93110
94- # CMD will be overridden by RunPod at runtime to run the specific generated handler
95- # The handler factory generates handler_{resource_name}.py files
111+ # CMD will be overridden by RunPod at runtime to run the specific generated handler.
112+ # The handler factory generates handler_{resource_name}.py files.
96113# RunPod will invoke: uvicorn handler_{resource_name}:app --host 0.0.0.0 --port 80
97114CMD ["uvicorn", "lb_handler:app", "--host", "0.0.0.0", "--port", "80", "--timeout-keep-alive", "600"]
0 commit comments