diffusiongemma-server/Dockerfile at main · psaboia/diffusiongemma-server · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
FROM nvidia/cuda:12.4.1-devel-ubuntu22.04

RUN apt-get update && apt-get install -y --no-install-recommends \
    git cmake build-essential libcurl4-openssl-dev libssl-dev ccache \
    && rm -rf /var/lib/apt/lists/*

# DiffusionGemma support is in flight upstream — we pin to PR #24427
# ("Add diffusion-gemma block-diffusion support"). Bump this fetch ref
# (or switch to a tagged release) once support is merged into master.
RUN git clone https://github.com/ggml-org/llama.cpp /opt/llama.cpp \
    && cd /opt/llama.cpp \
    && git fetch origin pull/24427/head:diffusiongemma \
    && git checkout diffusiongemma

# The CUDA dev image ships libcudart but the driver library libcuda.so
# normally comes from the host at runtime — for the link step we point at
# the SDK's stub copy. Without this, ld fails with
# "undefined reference to cuGetErrorString".
RUN ln -sf /usr/local/cuda/lib64/stubs/libcuda.so /usr/lib/x86_64-linux-gnu/libcuda.so \
    && ln -sf /usr/local/cuda/lib64/stubs/libcuda.so /usr/lib/x86_64-linux-gnu/libcuda.so.1 \
    && ldconfig

RUN cd /opt/llama.cpp \
    && rm -rf build \
    && cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON -DLLAMA_OPENSSL=ON \
    && cmake --build build --config Release -j $(nproc) \
    && mkdir -p /usr/local/bin \
    && cp build/bin/llama-server /usr/local/bin/

EXPOSE 8080
ENTRYPOINT ["llama-server"]