-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile
More file actions
31 lines (26 loc) · 1.33 KB
/
Copy pathDockerfile
File metadata and controls
31 lines (26 loc) · 1.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
FROM nvidia/cuda:12.4.1-devel-ubuntu22.04
RUN apt-get update && apt-get install -y --no-install-recommends \
git cmake build-essential libcurl4-openssl-dev libssl-dev ccache \
&& rm -rf /var/lib/apt/lists/*
# DiffusionGemma support is in flight upstream — we pin to PR #24427
# ("Add diffusion-gemma block-diffusion support"). Bump this fetch ref
# (or switch to a tagged release) once support is merged into master.
RUN git clone https://github.com/ggml-org/llama.cpp /opt/llama.cpp \
&& cd /opt/llama.cpp \
&& git fetch origin pull/24427/head:diffusiongemma \
&& git checkout diffusiongemma
# The CUDA dev image ships libcudart but the driver library libcuda.so
# normally comes from the host at runtime — for the link step we point at
# the SDK's stub copy. Without this, ld fails with
# "undefined reference to cuGetErrorString".
RUN ln -sf /usr/local/cuda/lib64/stubs/libcuda.so /usr/lib/x86_64-linux-gnu/libcuda.so \
&& ln -sf /usr/local/cuda/lib64/stubs/libcuda.so /usr/lib/x86_64-linux-gnu/libcuda.so.1 \
&& ldconfig
RUN cd /opt/llama.cpp \
&& rm -rf build \
&& cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON -DLLAMA_OPENSSL=ON \
&& cmake --build build --config Release -j $(nproc) \
&& mkdir -p /usr/local/bin \
&& cp build/bin/llama-server /usr/local/bin/
EXPOSE 8080
ENTRYPOINT ["llama-server"]