@@ -5,10 +5,6 @@ ARG LLAMA_SERVER_VERSION=b8882
55ARG LLAMA_SERVER_VARIANT=cpu
66ARG LLAMA_UPSTREAM_IMAGE=ghcr.io/ggml-org/llama.cpp:server-vulkan-b8882
77
8- # Use 26.04 for the default Vulkan-backed Linux image.
9- # GPU variants should pair this with a compatible runtime base image.
10- ARG BASE_IMAGE=ubuntu:26.04
11-
128ARG VERSION=dev
139
1410FROM docker.io/library/golang:${GO_VERSION}-bookworm AS builder
@@ -43,11 +39,8 @@ RUN --mount=type=cache,target=/go/pkg/mod \
4339 --mount=type=cache,target=/root/.cache/go-build \
4440 CGO_ENABLED=1 GOOS=linux go build -tags=novllm -ldflags="-s -w -X main.Version=${VERSION}" -o model-runner .
4541
46- # --- Get llama.cpp binary ---
47- FROM ${LLAMA_UPSTREAM_IMAGE} AS llama-server
48-
49- # --- Final image ---
50- FROM docker.io/${BASE_IMAGE} AS llamacpp
42+ # --- Final image: directly FROM the upstream llama.cpp image ---
43+ FROM ${LLAMA_UPSTREAM_IMAGE} AS llamacpp
5144
5245ARG LLAMA_SERVER_VARIANT
5346
@@ -57,7 +50,8 @@ RUN groupadd --system modelrunner && useradd --system --gid modelrunner -G video
5750
5851COPY scripts/ /scripts/
5952
60- # Install ca-certificates for HTTPS and vulkan
53+ # Install additional packages not shipped by the upstream image
54+ # (e.g. ca-certificates for HTTPS, mesa patches for aarch64 virtio-vulkan).
6155RUN /scripts/apt-install.sh && rm -rf /scripts
6256
6357WORKDIR /app
@@ -67,29 +61,6 @@ RUN mkdir -p /var/run/model-runner /models && \
6761 chown -R modelrunner:modelrunner /var/run/model-runner /app /models && \
6862 chmod -R 755 /models
6963
70- # Copy the upstream llama.cpp /app layout as-is. The Go binary-resolver
71- # (resolveLlamaServerBin) discovers "llama-server" automatically when the
72- # Docker-convention "com.docker.llama-server" is absent.
73- COPY --from=llama-server /app/ /app/
74-
75- # Verify that every shared library copied from the upstream image can resolve
76- # its runtime dependencies. This catches missing system packages (e.g.
77- # libgomp1) at build time instead of letting them surface as cryptic
78- # "no CPU backend found" errors at runtime.
79- RUN set -e; missing="" ; \
80- export LD_LIBRARY_PATH=/app; \
81- for f in /app/llama-server /app/*.so; do \
82- out=$(ldd "$f" 2>&1) || true; \
83- not_found=$(echo "$out" | grep "not found" || true); \
84- if [ -n "$not_found" ]; then \
85- missing="$missing\n $f:\n $not_found" ; \
86- fi; \
87- done; \
88- if [ -n "$missing" ]; then \
89- printf "ERROR: unresolved shared-library dependencies:\n %b\n " "$missing" >&2; \
90- exit 1; \
91- fi
92-
9364USER modelrunner
9465
9566# Set the environment variable for the socket path and LLamA server binary path.
0 commit comments