Skip to content

Commit e063207

Browse files
committed
Merge upstream/master (ggml-org) into fork master
Assisted-by: Claude Opus 4.8
2 parents 82b829b + c2ba3e4 commit e063207

1,711 files changed

Lines changed: 211100 additions & 110775 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.devops/cann.Dockerfile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
# Define the CANN base image for easier version updates later
66
ARG CHIP_TYPE=910b
77
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.5.0-${CHIP_TYPE}-openeuler24.03-py3.11
8+
ARG BUILD_DATE=N/A
9+
ARG APP_VERSION=N/A
10+
ARG APP_REVISION=N/A
811

912
# ==============================================================================
1013
# BUILD STAGE
@@ -55,6 +58,7 @@ RUN mkdir -p /app/lib && \
5558
RUN mkdir -p /app/full && \
5659
cp build/bin/* /app/full/ && \
5760
cp *.py /app/full/ && \
61+
cp -r conversion /app/full/ && \
5862
cp -r gguf-py /app/full/ && \
5963
cp -r requirements /app/full/ && \
6064
cp requirements.txt /app/full/
@@ -67,6 +71,19 @@ RUN mkdir -p /app/full && \
6771
# ==============================================================================
6872
FROM ${CANN_BASE_IMAGE} AS base
6973

74+
ARG BUILD_DATE=N/A
75+
ARG APP_VERSION=N/A
76+
ARG APP_REVISION=N/A
77+
ARG IMAGE_URL=https://github.com/ggml-org/llama.cpp
78+
ARG IMAGE_SOURCE=https://github.com/ggml-org/llama.cpp
79+
LABEL org.opencontainers.image.created=$BUILD_DATE \
80+
org.opencontainers.image.version=$APP_VERSION \
81+
org.opencontainers.image.revision=$APP_REVISION \
82+
org.opencontainers.image.title="llama.cpp" \
83+
org.opencontainers.image.description="LLM inference in C/C++" \
84+
org.opencontainers.image.url=$IMAGE_URL \
85+
org.opencontainers.image.source=$IMAGE_SOURCE
86+
7087
# -- Install runtime dependencies --
7188
RUN yum install -y libgomp curl && \
7289
yum clean all && \

.devops/cpu.Dockerfile

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
ARG UBUNTU_VERSION=24.04
2+
ARG BUILD_DATE=N/A
3+
ARG APP_VERSION=N/A
4+
ARG APP_REVISION=N/A
25

36
FROM ubuntu:$UBUNTU_VERSION AS build
47

@@ -27,6 +30,7 @@ RUN mkdir -p /app/lib && \
2730
RUN mkdir -p /app/full \
2831
&& cp build/bin/* /app/full \
2932
&& cp *.py /app/full \
33+
&& cp -r conversion /app/full \
3034
&& cp -r gguf-py /app/full \
3135
&& cp -r requirements /app/full \
3236
&& cp requirements.txt /app/full \
@@ -35,8 +39,21 @@ RUN mkdir -p /app/full \
3539
## Base image
3640
FROM ubuntu:$UBUNTU_VERSION AS base
3741

42+
ARG BUILD_DATE=N/A
43+
ARG APP_VERSION=N/A
44+
ARG APP_REVISION=N/A
45+
ARG IMAGE_URL=https://github.com/ggml-org/llama.cpp
46+
ARG IMAGE_SOURCE=https://github.com/ggml-org/llama.cpp
47+
LABEL org.opencontainers.image.created=$BUILD_DATE \
48+
org.opencontainers.image.version=$APP_VERSION \
49+
org.opencontainers.image.revision=$APP_REVISION \
50+
org.opencontainers.image.title="llama.cpp" \
51+
org.opencontainers.image.description="LLM inference in C/C++" \
52+
org.opencontainers.image.url=$IMAGE_URL \
53+
org.opencontainers.image.source=$IMAGE_SOURCE
54+
3855
RUN apt-get update \
39-
&& apt-get install -y libgomp1 curl \
56+
&& apt-get install -y libgomp1 curl ffmpeg \
4057
&& apt autoremove -y \
4158
&& apt clean -y \
4259
&& rm -rf /tmp/* /var/tmp/* \

.devops/cuda.Dockerfile

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,26 @@
11
ARG UBUNTU_VERSION=24.04
22
# This needs to generally match the container host's environment.
33
ARG CUDA_VERSION=12.8.1
4+
ARG GCC_VERSION=14
45
# Target the CUDA build image
56
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
67

78
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
89

10+
ARG BUILD_DATE=N/A
11+
ARG APP_VERSION=N/A
12+
ARG APP_REVISION=N/A
13+
914
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
1015

16+
ARG GCC_VERSION
1117
# CUDA architecture to build for (defaults to all supported archs)
1218
ARG CUDA_DOCKER_ARCH=default
1319

1420
RUN apt-get update && \
15-
apt-get install -y gcc-14 g++-14 build-essential cmake python3 python3-pip git libssl-dev libgomp1
21+
apt-get install -y gcc-${GCC_VERSION} g++-${GCC_VERSION} build-essential cmake python3 python3-pip git libssl-dev libgomp1
1622

17-
ENV CC=gcc-14 CXX=g++-14 CUDAHOSTCXX=g++-14
23+
ENV CC=gcc-${GCC_VERSION} CXX=g++-${GCC_VERSION} CUDAHOSTCXX=g++-${GCC_VERSION}
1824

1925
WORKDIR /app
2026

@@ -32,6 +38,7 @@ RUN mkdir -p /app/lib && \
3238
RUN mkdir -p /app/full \
3339
&& cp build/bin/* /app/full \
3440
&& cp *.py /app/full \
41+
&& cp -r conversion /app/full \
3542
&& cp -r gguf-py /app/full \
3643
&& cp -r requirements /app/full \
3744
&& cp requirements.txt /app/full \
@@ -40,8 +47,21 @@ RUN mkdir -p /app/full \
4047
## Base image
4148
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
4249

50+
ARG BUILD_DATE=N/A
51+
ARG APP_VERSION=N/A
52+
ARG APP_REVISION=N/A
53+
ARG IMAGE_URL=https://github.com/ggml-org/llama.cpp
54+
ARG IMAGE_SOURCE=https://github.com/ggml-org/llama.cpp
55+
LABEL org.opencontainers.image.created=$BUILD_DATE \
56+
org.opencontainers.image.version=$APP_VERSION \
57+
org.opencontainers.image.revision=$APP_REVISION \
58+
org.opencontainers.image.title="llama.cpp" \
59+
org.opencontainers.image.description="LLM inference in C/C++" \
60+
org.opencontainers.image.url=$IMAGE_URL \
61+
org.opencontainers.image.source=$IMAGE_SOURCE
62+
4363
RUN apt-get update \
44-
&& apt-get install -y libgomp1 curl \
64+
&& apt-get install -y libgomp1 curl ffmpeg \
4565
&& apt autoremove -y \
4666
&& apt clean -y \
4767
&& rm -rf /tmp/* /var/tmp/* \

.devops/intel.Dockerfile

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,22 @@
1-
ARG ONEAPI_VERSION=2025.3.2-0-devel-ubuntu24.04
1+
ARG ONEAPI_VERSION=2025.3.3-0-devel-ubuntu24.04
2+
ARG BUILD_DATE=N/A
3+
ARG APP_VERSION=N/A
4+
ARG APP_REVISION=N/A
25

36
## Build Image
47

58
FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS build
69

710
ARG GGML_SYCL_F16=OFF
11+
ARG LEVEL_ZERO_VERSION=1.28.2
12+
ARG LEVEL_ZERO_UBUNTU_VERSION=u24.04
813
RUN apt-get update && \
9-
apt-get install -y git libssl-dev
14+
apt-get install -y git libssl-dev wget ca-certificates && \
15+
cd /tmp && \
16+
wget -q "https://github.com/oneapi-src/level-zero/releases/download/v${LEVEL_ZERO_VERSION}/level-zero_${LEVEL_ZERO_VERSION}%2B${LEVEL_ZERO_UBUNTU_VERSION}_amd64.deb" -O level-zero.deb && \
17+
wget -q "https://github.com/oneapi-src/level-zero/releases/download/v${LEVEL_ZERO_VERSION}/level-zero-devel_${LEVEL_ZERO_VERSION}%2B${LEVEL_ZERO_UBUNTU_VERSION}_amd64.deb" -O level-zero-devel.deb && \
18+
apt-get -o Dpkg::Options::="--force-overwrite" install -y ./level-zero.deb ./level-zero-devel.deb && \
19+
rm -f /tmp/level-zero.deb /tmp/level-zero-devel.deb
1020

1121
WORKDIR /app
1222

@@ -26,18 +36,42 @@ RUN mkdir -p /app/lib && \
2636
RUN mkdir -p /app/full \
2737
&& cp build/bin/* /app/full \
2838
&& cp *.py /app/full \
39+
&& cp -r conversion /app/full \
2940
&& cp -r gguf-py /app/full \
3041
&& cp -r requirements /app/full \
3142
&& cp requirements.txt /app/full \
3243
&& cp .devops/tools.sh /app/full/tools.sh
3344

3445
FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS base
3546

36-
ARG IGC_VERSION=v2.30.1
37-
ARG IGC_VERSION_FULL=2_2.30.1+20950
38-
ARG COMPUTE_RUNTIME_VERSION=26.09.37435.1
39-
ARG COMPUTE_RUNTIME_VERSION_FULL=26.09.37435.1-0
40-
ARG IGDGMM_VERSION=22.9.0
47+
ARG BUILD_DATE=N/A
48+
ARG APP_VERSION=N/A
49+
ARG APP_REVISION=N/A
50+
ARG IMAGE_URL=https://github.com/ggml-org/llama.cpp
51+
ARG IMAGE_SOURCE=https://github.com/ggml-org/llama.cpp
52+
LABEL org.opencontainers.image.created=$BUILD_DATE \
53+
org.opencontainers.image.version=$APP_VERSION \
54+
org.opencontainers.image.revision=$APP_REVISION \
55+
org.opencontainers.image.title="llama.cpp" \
56+
org.opencontainers.image.description="LLM inference in C/C++" \
57+
org.opencontainers.image.url=$IMAGE_URL \
58+
org.opencontainers.image.source=$IMAGE_SOURCE
59+
60+
#Following versions are for multiple GPUs, since 26.x has known issue:
61+
# https://github.com/ggml-org/llama.cpp/issues/21747,
62+
# https://github.com/intel/compute-runtime/issues/921.
63+
#ARG IGC_VERSION=v2.20.5
64+
#ARG IGC_VERSION_FULL=2_2.20.5+19972
65+
#ARG COMPUTE_RUNTIME_VERSION=25.40.35563.10
66+
#ARG COMPUTE_RUNTIME_VERSION_FULL=25.40.35563.10-0
67+
#ARG IGDGMM_VERSION=22.8.2
68+
69+
70+
ARG IGC_VERSION=v2.34.4
71+
ARG IGC_VERSION_FULL=2_2.34.4+21428
72+
ARG COMPUTE_RUNTIME_VERSION=26.18.38308.1
73+
ARG COMPUTE_RUNTIME_VERSION_FULL=26.18.38308.1-0
74+
ARG IGDGMM_VERSION=22.10.0
4175
RUN mkdir /tmp/neo/ && cd /tmp/neo/ \
4276
&& wget https://github.com/intel/intel-graphics-compiler/releases/download/$IGC_VERSION/intel-igc-core-${IGC_VERSION_FULL}_amd64.deb \
4377
&& wget https://github.com/intel/intel-graphics-compiler/releases/download/$IGC_VERSION/intel-igc-opencl-${IGC_VERSION_FULL}_amd64.deb \
@@ -51,7 +85,7 @@ RUN mkdir /tmp/neo/ && cd /tmp/neo/ \
5185
&& dpkg --install *.deb
5286

5387
RUN apt-get update \
54-
&& apt-get install -y libgomp1 curl \
88+
&& apt-get install -y libgomp1 curl ffmpeg \
5589
&& apt autoremove -y \
5690
&& apt clean -y \
5791
&& rm -rf /tmp/* /var/tmp/* \
@@ -109,4 +143,3 @@ WORKDIR /app
109143
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
110144

111145
ENTRYPOINT [ "/app/llama-server" ]
112-

.devops/llama-cli-cann.Dockerfile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
ARG ASCEND_VERSION=8.5.0-910b-openeuler22.03-py3.10
2+
ARG BUILD_DATE=N/A
3+
ARG APP_VERSION=N/A
4+
ARG APP_REVISION=N/A
25

36
FROM ascendai/cann:$ASCEND_VERSION AS build
47

@@ -28,6 +31,20 @@ RUN echo "Building with static libs" && \
2831

2932
# TODO: use image with NNRT
3033
FROM ascendai/cann:$ASCEND_VERSION AS runtime
34+
35+
ARG BUILD_DATE=N/A
36+
ARG APP_VERSION=N/A
37+
ARG APP_REVISION=N/A
38+
ARG IMAGE_URL=https://github.com/ggml-org/llama.cpp
39+
ARG IMAGE_SOURCE=https://github.com/ggml-org/llama.cpp
40+
LABEL org.opencontainers.image.created=$BUILD_DATE \
41+
org.opencontainers.image.version=$APP_VERSION \
42+
org.opencontainers.image.revision=$APP_REVISION \
43+
org.opencontainers.image.title="llama.cpp" \
44+
org.opencontainers.image.description="LLM inference in C/C++" \
45+
org.opencontainers.image.url=$IMAGE_URL \
46+
org.opencontainers.image.source=$IMAGE_SOURCE
47+
3148
COPY --from=build /app/build/bin/llama-cli /app/build/bin/llama-completion /
3249

3350
ENV LC_ALL=C.utf8

.devops/musa.Dockerfile

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_V
66

77
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64
88

9+
ARG BUILD_DATE=N/A
10+
ARG APP_VERSION=N/A
11+
ARG APP_REVISION=N/A
12+
913
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
1014

1115
# MUSA architecture to build for (defaults to all supported archs)
@@ -37,6 +41,7 @@ RUN mkdir -p /app/lib && \
3741
RUN mkdir -p /app/full \
3842
&& cp build/bin/* /app/full \
3943
&& cp *.py /app/full \
44+
&& cp -r conversion /app/full \
4045
&& cp -r gguf-py /app/full \
4146
&& cp -r requirements /app/full \
4247
&& cp requirements.txt /app/full \
@@ -45,8 +50,21 @@ RUN mkdir -p /app/full \
4550
## Base image
4651
FROM ${BASE_MUSA_RUN_CONTAINER} AS base
4752

53+
ARG BUILD_DATE=N/A
54+
ARG APP_VERSION=N/A
55+
ARG APP_REVISION=N/A
56+
ARG IMAGE_URL=https://github.com/ggml-org/llama.cpp
57+
ARG IMAGE_SOURCE=https://github.com/ggml-org/llama.cpp
58+
LABEL org.opencontainers.image.created=$BUILD_DATE \
59+
org.opencontainers.image.version=$APP_VERSION \
60+
org.opencontainers.image.revision=$APP_REVISION \
61+
org.opencontainers.image.title="llama.cpp" \
62+
org.opencontainers.image.description="LLM inference in C/C++" \
63+
org.opencontainers.image.url=$IMAGE_URL \
64+
org.opencontainers.image.source=$IMAGE_SOURCE
65+
4866
RUN apt-get update \
49-
&& apt-get install -y libgomp1 curl \
67+
&& apt-get install -y libgomp1 curl ffmpeg \
5068
&& apt autoremove -y \
5169
&& apt clean -y \
5270
&& rm -rf /tmp/* /var/tmp/* \

.devops/nix/package.nix

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
glibc,
44
config,
55
stdenv,
6+
stdenvNoCC,
67
runCommand,
78
cmake,
89
ninja,
@@ -18,6 +19,9 @@
1819
vulkan-loader,
1920
openssl,
2021
shaderc,
22+
spirv-headers,
23+
nodejs,
24+
importNpmLock,
2125
useBlas ?
2226
builtins.all (x: !x) [
2327
useCuda
@@ -102,6 +106,7 @@ let
102106
vulkan-headers
103107
vulkan-loader
104108
shaderc
109+
spirv-headers
105110
];
106111
in
107112

@@ -128,7 +133,31 @@ effectiveStdenv.mkDerivation (finalAttrs: {
128133
src = lib.cleanSource ../../.;
129134
};
130135

131-
postPatch = ''
136+
# Builds the webui locally, taking care not to require updating any sha256 hash.
137+
webui = stdenvNoCC.mkDerivation {
138+
pname = "webui";
139+
version = llamaVersion;
140+
src = lib.cleanSource ../../tools/ui;
141+
142+
nativeBuildInputs = [
143+
nodejs
144+
importNpmLock.linkNodeModulesHook
145+
];
146+
147+
# no sha256 required when using buildNodeModules
148+
npmDeps = importNpmLock.buildNodeModules {
149+
npmRoot = ../../tools/ui;
150+
inherit nodejs;
151+
};
152+
153+
installPhase = ''
154+
LLAMA_UI_OUT_DIR=$out npm run build --offline
155+
'';
156+
};
157+
158+
postPatch = lib.optionalString useWebUi ''
159+
cp -r ${finalAttrs.webui} tools/ui/dist
160+
chmod -R u+w tools/ui/dist
132161
'';
133162

134163
# With PR#6015 https://github.com/ggml-org/llama.cpp/pull/6015,

0 commit comments

Comments
 (0)