-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathMainstreamCudaDockerfile
More file actions
38 lines (29 loc) · 1.14 KB
/
MainstreamCudaDockerfile
File metadata and controls
38 lines (29 loc) · 1.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
FROM nvidia/cuda:12.4.0-devel-ubuntu22.04
# Install dependencies
RUN apt-get update || true && apt-get install -y wget && \
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
dpkg -i cuda-keyring_1.1-1_all.deb && \
rm cuda-keyring_1.1-1_all.deb && \
apt-get update && apt-get install -y \
python3-pip \
python3-dev \
cmake \
git \
build-essential \
ninja-build \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /code
ENV GGML_CUDA=on
# Multi-arch build to ensure one image works for all "Modern" pre-Blackwell cards.
ENV GGML_CUDA_ARCH="75;80;86;89"
ENV CUDA_DOCKER_ARCH=all
RUN CMAKE_ARGS="-DGGML_CUDA=on -DGGML_CUDA_ARCH=${GGML_CUDA_ARCH} -DGGML_CUDA_FA_ALL_QUANTS=on" \
FORCE_CMAKE=1 \
CMAKE_BUILD_PARALLEL_LEVEL=2 \
pip install llama-cpp-python --no-cache-dir --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124
# Install Python dependencies
RUN pip install "fastapi[standard]" "uvicorn[standard]" httpx
COPY ./app /code
EXPOSE 8080
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]