Skip to content

Commit 18cc59d

Browse files
committed
update Dockerfile and scripts to remove BASE_IMAGE
1 parent 2e7f1e5 commit 18cc59d

5 files changed

Lines changed: 21 additions & 55 deletions

File tree

.github/workflows/release.yml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,7 @@ jobs:
315315
platforms: linux/amd64, linux/arm64
316316
build-args: |
317317
"LLAMA_SERVER_VERSION=${{ env.LLAMA_SERVER_VERSION }}"
318+
"LLAMA_SERVER_VARIANT=cpu"
318319
"LLAMA_UPSTREAM_IMAGE=${{ steps.llama-images.outputs.cpu }}"
319320
"VERSION=${{ env.RELEASE_TAG }}"
320321
push: true
@@ -332,7 +333,6 @@ jobs:
332333
"LLAMA_SERVER_VERSION=${{ env.LLAMA_SERVER_VERSION }}"
333334
"LLAMA_SERVER_VARIANT=cuda"
334335
"LLAMA_UPSTREAM_IMAGE=${{ steps.llama-images.outputs.cuda }}"
335-
"BASE_IMAGE=nvidia/cuda:13.0.2-runtime-ubuntu24.04"
336336
"VERSION=${{ env.RELEASE_TAG }}"
337337
push: true
338338
sbom: true
@@ -349,7 +349,6 @@ jobs:
349349
"LLAMA_SERVER_VERSION=${{ env.LLAMA_SERVER_VERSION }}"
350350
"LLAMA_SERVER_VARIANT=cuda"
351351
"LLAMA_UPSTREAM_IMAGE=${{ steps.llama-images.outputs.cuda }}"
352-
"BASE_IMAGE=nvidia/cuda:13.0.2-runtime-ubuntu24.04"
353352
"VLLM_VERSION=${{ env.VLLM_VERSION }}"
354353
"VERSION=${{ env.RELEASE_TAG }}"
355354
push: true
@@ -367,7 +366,6 @@ jobs:
367366
"LLAMA_SERVER_VERSION=${{ env.LLAMA_SERVER_VERSION }}"
368367
"LLAMA_SERVER_VARIANT=cuda"
369368
"LLAMA_UPSTREAM_IMAGE=${{ steps.llama-images.outputs.cuda }}"
370-
"BASE_IMAGE=nvidia/cuda:13.0.2-runtime-ubuntu24.04"
371369
"SGLANG_VERSION=${{ env.SGLANG_VERSION }}"
372370
"VERSION=${{ env.RELEASE_TAG }}"
373371
push: true
@@ -385,7 +383,6 @@ jobs:
385383
"LLAMA_SERVER_VERSION=${{ env.LLAMA_SERVER_VERSION }}"
386384
"LLAMA_SERVER_VARIANT=rocm"
387385
"LLAMA_UPSTREAM_IMAGE=${{ steps.llama-images.outputs.rocm }}"
388-
"BASE_IMAGE=rocm/dev-ubuntu-24.04:7.2.1-complete"
389386
"VERSION=${{ env.RELEASE_TAG }}"
390387
push: true
391388
sbom: true

.versions

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,3 @@ VLLM_METAL_RELEASE=v0.2.0-20260420-142150
55
DIFFUSERS_RELEASE=v0.1.0-20260216-000000
66
SGLANG_VERSION=0.5.6
77
LLAMA_SERVER_VERSION=b8882
8-
BASE_IMAGE=ubuntu:26.04

Dockerfile

Lines changed: 4 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,6 @@ ARG LLAMA_SERVER_VERSION=b8882
55
ARG LLAMA_SERVER_VARIANT=cpu
66
ARG LLAMA_UPSTREAM_IMAGE=ghcr.io/ggml-org/llama.cpp:server-vulkan-b8882
77

8-
# Use 26.04 for the default Vulkan-backed Linux image.
9-
# GPU variants should pair this with a compatible runtime base image.
10-
ARG BASE_IMAGE=ubuntu:26.04
11-
128
ARG VERSION=dev
139

1410
FROM docker.io/library/golang:${GO_VERSION}-bookworm AS builder
@@ -43,11 +39,8 @@ RUN --mount=type=cache,target=/go/pkg/mod \
4339
--mount=type=cache,target=/root/.cache/go-build \
4440
CGO_ENABLED=1 GOOS=linux go build -tags=novllm -ldflags="-s -w -X main.Version=${VERSION}" -o model-runner .
4541

46-
# --- Get llama.cpp binary ---
47-
FROM ${LLAMA_UPSTREAM_IMAGE} AS llama-server
48-
49-
# --- Final image ---
50-
FROM docker.io/${BASE_IMAGE} AS llamacpp
42+
# --- Final image: directly FROM the upstream llama.cpp image ---
43+
FROM ${LLAMA_UPSTREAM_IMAGE} AS llamacpp
5144

5245
ARG LLAMA_SERVER_VARIANT
5346

@@ -57,7 +50,8 @@ RUN groupadd --system modelrunner && useradd --system --gid modelrunner -G video
5750

5851
COPY scripts/ /scripts/
5952

60-
# Install ca-certificates for HTTPS and vulkan
53+
# Install additional packages not shipped by the upstream image
54+
# (e.g. ca-certificates for HTTPS, mesa patches for aarch64 virtio-vulkan).
6155
RUN /scripts/apt-install.sh && rm -rf /scripts
6256

6357
WORKDIR /app
@@ -67,29 +61,6 @@ RUN mkdir -p /var/run/model-runner /models && \
6761
chown -R modelrunner:modelrunner /var/run/model-runner /app /models && \
6862
chmod -R 755 /models
6963

70-
# Copy the upstream llama.cpp /app layout as-is. The Go binary-resolver
71-
# (resolveLlamaServerBin) discovers "llama-server" automatically when the
72-
# Docker-convention "com.docker.llama-server" is absent.
73-
COPY --from=llama-server /app/ /app/
74-
75-
# Verify that every shared library copied from the upstream image can resolve
76-
# its runtime dependencies. This catches missing system packages (e.g.
77-
# libgomp1) at build time instead of letting them surface as cryptic
78-
# "no CPU backend found" errors at runtime.
79-
RUN set -e; missing=""; \
80-
export LD_LIBRARY_PATH=/app; \
81-
for f in /app/llama-server /app/*.so; do \
82-
out=$(ldd "$f" 2>&1) || true; \
83-
not_found=$(echo "$out" | grep "not found" || true); \
84-
if [ -n "$not_found" ]; then \
85-
missing="$missing\n$f:\n$not_found"; \
86-
fi; \
87-
done; \
88-
if [ -n "$missing" ]; then \
89-
printf "ERROR: unresolved shared-library dependencies:\n%b\n" "$missing" >&2; \
90-
exit 1; \
91-
fi
92-
9364
USER modelrunner
9465

9566
# Set the environment variable for the socket path and LLamA server binary path.

Makefile

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ LLAMA_SERVER_VARIANT := cpu
77
LLAMA_UPSTREAM_IMAGE ?= $(shell \
88
bash scripts/resolve-llama-upstream-image.sh \
99
"$(LLAMA_SERVER_VERSION)" "$(LLAMA_SERVER_VARIANT)")
10-
VLLM_BASE_IMAGE := nvidia/cuda:13.0.2-runtime-ubuntu24.04
1110
DOCKER_IMAGE := docker/model-runner:latest
1211
DOCKER_IMAGE_VLLM := docker/model-runner:latest-vllm-cuda
1312
DOCKER_IMAGE_SGLANG := docker/model-runner:latest-sglang
@@ -34,7 +33,6 @@ DOCKER_BUILD_COMMON_ARGS = \
3433
--build-arg LLAMA_SERVER_VARIANT=$(LLAMA_SERVER_VARIANT) \
3534
--build-arg LLAMA_UPSTREAM_IMAGE=$(LLAMA_UPSTREAM_IMAGE) \
3635
--build-arg SGLANG_VERSION=$(SGLANG_VERSION) \
37-
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
3836
--build-arg VLLM_VERSION='$(VLLM_VERSION)' \
3937
--target $(DOCKER_TARGET) \
4038
-t $(DOCKER_IMAGE)
@@ -118,7 +116,7 @@ e2e:
118116
test-docker-ce-installation:
119117
@echo "Testing Docker CE installation..."
120118
@echo "Note: This requires Docker to be running"
121-
BASE_IMAGE=$(BASE_IMAGE) scripts/test-docker-ce-installation.sh
119+
scripts/test-docker-ce-installation.sh
122120

123121
validate:
124122
find . -type f -name "*.sh" | grep -v "pkg/go-containerregistry\|llamacpp/native/vendor" | xargs shellcheck
@@ -187,8 +185,7 @@ docker-build-vllm:
187185
@$(MAKE) docker-build \
188186
DOCKER_TARGET=final-vllm \
189187
DOCKER_IMAGE=$(DOCKER_IMAGE_VLLM) \
190-
LLAMA_SERVER_VARIANT=cuda \
191-
BASE_IMAGE=$(VLLM_BASE_IMAGE)
188+
LLAMA_SERVER_VARIANT=cuda
192189

193190
# Run vLLM Docker container with TCP port access and mounted model storage
194191
docker-run-vllm: docker-build-vllm
@@ -199,8 +196,7 @@ docker-build-sglang:
199196
@$(MAKE) docker-build \
200197
DOCKER_TARGET=final-sglang \
201198
DOCKER_IMAGE=$(DOCKER_IMAGE_SGLANG) \
202-
LLAMA_SERVER_VARIANT=cuda \
203-
BASE_IMAGE=$(VLLM_BASE_IMAGE)
199+
LLAMA_SERVER_VARIANT=cuda
204200

205201
# Run SGLang Docker container with TCP port access and mounted model storage
206202
docker-run-sglang: docker-build-sglang

scripts/apt-install.sh

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
#!/bin/bash
22

3+
# Install additional system packages on top of the upstream llama.cpp image.
4+
#
5+
# The upstream image already ships GPU libraries (Vulkan, CUDA, ROCm) and
6+
# libgomp1, so we only need:
7+
# - ca-certificates (for HTTPS model downloads)
8+
# - mesa patches (aarch64 + cpu variant only — Docker Desktop virtio-vulkan)
9+
310
enable_source_repos() {
411
# DEB822 format (Ubuntu 24.04+)
512
for f in /etc/apt/sources.list.d/*.sources; do
@@ -41,16 +48,12 @@ main() {
4148
set -eux -o pipefail
4249

4350
apt-get update
44-
# libgomp1 is the OpenMP runtime required by the upstream llama.cpp CPU
45-
# backend plugins (libggml-cpu-*.so) which are compiled with -fopenmp.
46-
local packages=("ca-certificates" "libgomp1")
47-
if [ "$LLAMA_SERVER_VARIANT" = "cpu" ]; then
48-
packages+=("libvulkan1")
49-
if [ "$(uname -m)" = "aarch64" ]; then
50-
rebuild_and_install_mesa
51-
else
52-
packages+=("mesa-vulkan-drivers")
53-
fi
51+
local packages=("ca-certificates")
52+
53+
# On aarch64 CPU (Vulkan) builds, rebuild mesa with Docker Desktop
54+
# virtio-vulkan patches.
55+
if [ "$LLAMA_SERVER_VARIANT" = "cpu" ] && [ "$(uname -m)" = "aarch64" ]; then
56+
rebuild_and_install_mesa
5457
fi
5558

5659
apt-get install -y "${packages[@]}"

0 commit comments

Comments
 (0)