Skip to content

Commit 1ad1a80

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents d526394 + 241cbd4 commit 1ad1a80

1,307 files changed

Lines changed: 107445 additions & 67740 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.devops/cann.Dockerfile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
# Define the CANN base image for easier version updates later
66
ARG CHIP_TYPE=910b
77
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.5.0-${CHIP_TYPE}-openeuler24.03-py3.11
8+
ARG BUILD_DATE=N/A
9+
ARG APP_VERSION=N/A
10+
ARG APP_REVISION=N/A
811

912
# ==============================================================================
1013
# BUILD STAGE
@@ -55,6 +58,7 @@ RUN mkdir -p /app/lib && \
5558
RUN mkdir -p /app/full && \
5659
cp build/bin/* /app/full/ && \
5760
cp *.py /app/full/ && \
61+
cp -r conversion /app/full/ && \
5862
cp -r gguf-py /app/full/ && \
5963
cp -r requirements /app/full/ && \
6064
cp requirements.txt /app/full/
@@ -67,6 +71,19 @@ RUN mkdir -p /app/full && \
6771
# ==============================================================================
6872
FROM ${CANN_BASE_IMAGE} AS base
6973

74+
ARG BUILD_DATE=N/A
75+
ARG APP_VERSION=N/A
76+
ARG APP_REVISION=N/A
77+
ARG IMAGE_URL=https://github.com/ggml-org/llama.cpp
78+
ARG IMAGE_SOURCE=https://github.com/ggml-org/llama.cpp
79+
LABEL org.opencontainers.image.created=$BUILD_DATE \
80+
org.opencontainers.image.version=$APP_VERSION \
81+
org.opencontainers.image.revision=$APP_REVISION \
82+
org.opencontainers.image.title="llama.cpp" \
83+
org.opencontainers.image.description="LLM inference in C/C++" \
84+
org.opencontainers.image.url=$IMAGE_URL \
85+
org.opencontainers.image.source=$IMAGE_SOURCE
86+
7087
# -- Install runtime dependencies --
7188
RUN yum install -y libgomp curl && \
7289
yum clean all && \

.devops/cpu.Dockerfile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
ARG UBUNTU_VERSION=24.04
2+
ARG BUILD_DATE=N/A
3+
ARG APP_VERSION=N/A
4+
ARG APP_REVISION=N/A
25

36
FROM ubuntu:$UBUNTU_VERSION AS build
47

@@ -27,6 +30,7 @@ RUN mkdir -p /app/lib && \
2730
RUN mkdir -p /app/full \
2831
&& cp build/bin/* /app/full \
2932
&& cp *.py /app/full \
33+
&& cp -r conversion /app/full \
3034
&& cp -r gguf-py /app/full \
3135
&& cp -r requirements /app/full \
3236
&& cp requirements.txt /app/full \
@@ -35,6 +39,19 @@ RUN mkdir -p /app/full \
3539
## Base image
3640
FROM ubuntu:$UBUNTU_VERSION AS base
3741

42+
ARG BUILD_DATE=N/A
43+
ARG APP_VERSION=N/A
44+
ARG APP_REVISION=N/A
45+
ARG IMAGE_URL=https://github.com/ggml-org/llama.cpp
46+
ARG IMAGE_SOURCE=https://github.com/ggml-org/llama.cpp
47+
LABEL org.opencontainers.image.created=$BUILD_DATE \
48+
org.opencontainers.image.version=$APP_VERSION \
49+
org.opencontainers.image.revision=$APP_REVISION \
50+
org.opencontainers.image.title="llama.cpp" \
51+
org.opencontainers.image.description="LLM inference in C/C++" \
52+
org.opencontainers.image.url=$IMAGE_URL \
53+
org.opencontainers.image.source=$IMAGE_SOURCE
54+
3855
RUN apt-get update \
3956
&& apt-get install -y libgomp1 curl \
4057
&& apt autoremove -y \

.devops/cuda.Dockerfile

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VER
66

77
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
88

9+
ARG BUILD_DATE=N/A
10+
ARG APP_VERSION=N/A
11+
ARG APP_REVISION=N/A
12+
913
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
1014

1115
# CUDA architecture to build for (defaults to all supported archs)
@@ -32,6 +36,7 @@ RUN mkdir -p /app/lib && \
3236
RUN mkdir -p /app/full \
3337
&& cp build/bin/* /app/full \
3438
&& cp *.py /app/full \
39+
&& cp -r conversion /app/full \
3540
&& cp -r gguf-py /app/full \
3641
&& cp -r requirements /app/full \
3742
&& cp requirements.txt /app/full \
@@ -40,6 +45,19 @@ RUN mkdir -p /app/full \
4045
## Base image
4146
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
4247

48+
ARG BUILD_DATE=N/A
49+
ARG APP_VERSION=N/A
50+
ARG APP_REVISION=N/A
51+
ARG IMAGE_URL=https://github.com/ggml-org/llama.cpp
52+
ARG IMAGE_SOURCE=https://github.com/ggml-org/llama.cpp
53+
LABEL org.opencontainers.image.created=$BUILD_DATE \
54+
org.opencontainers.image.version=$APP_VERSION \
55+
org.opencontainers.image.revision=$APP_REVISION \
56+
org.opencontainers.image.title="llama.cpp" \
57+
org.opencontainers.image.description="LLM inference in C/C++" \
58+
org.opencontainers.image.url=$IMAGE_URL \
59+
org.opencontainers.image.source=$IMAGE_SOURCE
60+
4361
RUN apt-get update \
4462
&& apt-get install -y libgomp1 curl \
4563
&& apt autoremove -y \

.devops/intel.Dockerfile

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,22 @@
11
ARG ONEAPI_VERSION=2025.3.3-0-devel-ubuntu24.04
2+
ARG BUILD_DATE=N/A
3+
ARG APP_VERSION=N/A
4+
ARG APP_REVISION=N/A
25

36
## Build Image
47

58
FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS build
69

710
ARG GGML_SYCL_F16=OFF
11+
ARG LEVEL_ZERO_VERSION=1.28.2
12+
ARG LEVEL_ZERO_UBUNTU_VERSION=u24.04
813
RUN apt-get update && \
9-
apt-get install -y git libssl-dev
14+
apt-get install -y git libssl-dev wget ca-certificates && \
15+
cd /tmp && \
16+
wget -q "https://github.com/oneapi-src/level-zero/releases/download/v${LEVEL_ZERO_VERSION}/level-zero_${LEVEL_ZERO_VERSION}%2B${LEVEL_ZERO_UBUNTU_VERSION}_amd64.deb" -O level-zero.deb && \
17+
wget -q "https://github.com/oneapi-src/level-zero/releases/download/v${LEVEL_ZERO_VERSION}/level-zero-devel_${LEVEL_ZERO_VERSION}%2B${LEVEL_ZERO_UBUNTU_VERSION}_amd64.deb" -O level-zero-devel.deb && \
18+
apt-get -o Dpkg::Options::="--force-overwrite" install -y ./level-zero.deb ./level-zero-devel.deb && \
19+
rm -f /tmp/level-zero.deb /tmp/level-zero-devel.deb
1020

1121
WORKDIR /app
1222

@@ -26,18 +36,32 @@ RUN mkdir -p /app/lib && \
2636
RUN mkdir -p /app/full \
2737
&& cp build/bin/* /app/full \
2838
&& cp *.py /app/full \
39+
&& cp -r conversion /app/full \
2940
&& cp -r gguf-py /app/full \
3041
&& cp -r requirements /app/full \
3142
&& cp requirements.txt /app/full \
3243
&& cp .devops/tools.sh /app/full/tools.sh
3344

3445
FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS base
3546

36-
ARG IGC_VERSION=v2.30.1
37-
ARG IGC_VERSION_FULL=2_2.30.1+20950
38-
ARG COMPUTE_RUNTIME_VERSION=26.09.37435.1
39-
ARG COMPUTE_RUNTIME_VERSION_FULL=26.09.37435.1-0
40-
ARG IGDGMM_VERSION=22.9.0
47+
ARG BUILD_DATE=N/A
48+
ARG APP_VERSION=N/A
49+
ARG APP_REVISION=N/A
50+
ARG IMAGE_URL=https://github.com/ggml-org/llama.cpp
51+
ARG IMAGE_SOURCE=https://github.com/ggml-org/llama.cpp
52+
LABEL org.opencontainers.image.created=$BUILD_DATE \
53+
org.opencontainers.image.version=$APP_VERSION \
54+
org.opencontainers.image.revision=$APP_REVISION \
55+
org.opencontainers.image.title="llama.cpp" \
56+
org.opencontainers.image.description="LLM inference in C/C++" \
57+
org.opencontainers.image.url=$IMAGE_URL \
58+
org.opencontainers.image.source=$IMAGE_SOURCE
59+
60+
ARG IGC_VERSION=v2.20.5
61+
ARG IGC_VERSION_FULL=2_2.20.5+19972
62+
ARG COMPUTE_RUNTIME_VERSION=25.40.35563.10
63+
ARG COMPUTE_RUNTIME_VERSION_FULL=25.40.35563.10-0
64+
ARG IGDGMM_VERSION=22.8.2
4165
RUN mkdir /tmp/neo/ && cd /tmp/neo/ \
4266
&& wget https://github.com/intel/intel-graphics-compiler/releases/download/$IGC_VERSION/intel-igc-core-${IGC_VERSION_FULL}_amd64.deb \
4367
&& wget https://github.com/intel/intel-graphics-compiler/releases/download/$IGC_VERSION/intel-igc-opencl-${IGC_VERSION_FULL}_amd64.deb \
@@ -109,4 +133,3 @@ WORKDIR /app
109133
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
110134

111135
ENTRYPOINT [ "/app/llama-server" ]
112-

.devops/llama-cli-cann.Dockerfile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
ARG ASCEND_VERSION=8.5.0-910b-openeuler22.03-py3.10
2+
ARG BUILD_DATE=N/A
3+
ARG APP_VERSION=N/A
4+
ARG APP_REVISION=N/A
25

36
FROM ascendai/cann:$ASCEND_VERSION AS build
47

@@ -28,6 +31,20 @@ RUN echo "Building with static libs" && \
2831

2932
# TODO: use image with NNRT
3033
FROM ascendai/cann:$ASCEND_VERSION AS runtime
34+
35+
ARG BUILD_DATE=N/A
36+
ARG APP_VERSION=N/A
37+
ARG APP_REVISION=N/A
38+
ARG IMAGE_URL=https://github.com/ggml-org/llama.cpp
39+
ARG IMAGE_SOURCE=https://github.com/ggml-org/llama.cpp
40+
LABEL org.opencontainers.image.created=$BUILD_DATE \
41+
org.opencontainers.image.version=$APP_VERSION \
42+
org.opencontainers.image.revision=$APP_REVISION \
43+
org.opencontainers.image.title="llama.cpp" \
44+
org.opencontainers.image.description="LLM inference in C/C++" \
45+
org.opencontainers.image.url=$IMAGE_URL \
46+
org.opencontainers.image.source=$IMAGE_SOURCE
47+
3148
COPY --from=build /app/build/bin/llama-cli /app/build/bin/llama-completion /
3249

3350
ENV LC_ALL=C.utf8

.devops/musa.Dockerfile

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_V
66

77
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64
88

9+
ARG BUILD_DATE=N/A
10+
ARG APP_VERSION=N/A
11+
ARG APP_REVISION=N/A
12+
913
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
1014

1115
# MUSA architecture to build for (defaults to all supported archs)
@@ -37,6 +41,7 @@ RUN mkdir -p /app/lib && \
3741
RUN mkdir -p /app/full \
3842
&& cp build/bin/* /app/full \
3943
&& cp *.py /app/full \
44+
&& cp -r conversion /app/full \
4045
&& cp -r gguf-py /app/full \
4146
&& cp -r requirements /app/full \
4247
&& cp requirements.txt /app/full \
@@ -45,6 +50,19 @@ RUN mkdir -p /app/full \
4550
## Base image
4651
FROM ${BASE_MUSA_RUN_CONTAINER} AS base
4752

53+
ARG BUILD_DATE=N/A
54+
ARG APP_VERSION=N/A
55+
ARG APP_REVISION=N/A
56+
ARG IMAGE_URL=https://github.com/ggml-org/llama.cpp
57+
ARG IMAGE_SOURCE=https://github.com/ggml-org/llama.cpp
58+
LABEL org.opencontainers.image.created=$BUILD_DATE \
59+
org.opencontainers.image.version=$APP_VERSION \
60+
org.opencontainers.image.revision=$APP_REVISION \
61+
org.opencontainers.image.title="llama.cpp" \
62+
org.opencontainers.image.description="LLM inference in C/C++" \
63+
org.opencontainers.image.url=$IMAGE_URL \
64+
org.opencontainers.image.source=$IMAGE_SOURCE
65+
4866
RUN apt-get update \
4967
&& apt-get install -y libgomp1 curl \
5068
&& apt autoremove -y \

.devops/nix/package.nix

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ let
103103
vulkan-headers
104104
vulkan-loader
105105
shaderc
106+
spirv-headers
106107
];
107108
in
108109

@@ -146,7 +147,6 @@ effectiveStdenv.mkDerivation (finalAttrs: {
146147
ninja
147148
pkg-config
148149
git
149-
spirv-headers
150150
]
151151
++ optionals useCuda [
152152
cudaPackages.cuda_nvcc

.devops/openvino.Dockerfile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ ARG LIBZE1_VERSION=1.27.0-1~24.04~ppa2
1818
ARG http_proxy=
1919
ARG https_proxy=
2020

21+
ARG BUILD_DATE=N/A
22+
ARG APP_VERSION=N/A
23+
ARG APP_REVISION=N/A
24+
2125
## Build Image
2226
FROM ubuntu:${UBUNTU_VERSION} AS build
2327

@@ -77,6 +81,7 @@ RUN mkdir -p /app/lib && \
7781
RUN mkdir -p /app/full \
7882
&& cp build/ReleaseOV/bin/* /app/full/ \
7983
&& cp *.py /app/full \
84+
&& cp -r conversion /app/full \
8085
&& cp -r gguf-py /app/full \
8186
&& cp -r requirements /app/full \
8287
&& cp requirements.txt /app/full \
@@ -88,6 +93,18 @@ FROM ubuntu:${UBUNTU_VERSION} AS base
8893
# Pass proxy args to runtime stage
8994
ARG http_proxy
9095
ARG https_proxy
96+
ARG BUILD_DATE=N/A
97+
ARG APP_VERSION=N/A
98+
ARG APP_REVISION=N/A
99+
ARG IMAGE_URL=https://github.com/ggml-org/llama.cpp
100+
ARG IMAGE_SOURCE=https://github.com/ggml-org/llama.cpp
101+
LABEL org.opencontainers.image.created=$BUILD_DATE \
102+
org.opencontainers.image.version=$APP_VERSION \
103+
org.opencontainers.image.revision=$APP_REVISION \
104+
org.opencontainers.image.title="llama.cpp" \
105+
org.opencontainers.image.description="LLM inference in C/C++" \
106+
org.opencontainers.image.url=$IMAGE_URL \
107+
org.opencontainers.image.source=$IMAGE_SOURCE
91108

92109
RUN apt-get update \
93110
&& apt-get install -y libgomp1 libtbb12 curl wget ocl-icd-libopencl1 \

.devops/rocm.Dockerfile

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ ARG AMDGPU_VERSION=7.2.1
77
# Target the ROCm build image
88
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
99

10+
ARG BUILD_DATE=N/A
11+
ARG APP_VERSION=N/A
12+
ARG APP_REVISION=N/A
13+
1014
### Build image
1115
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1216

@@ -49,6 +53,7 @@ RUN mkdir -p /app/lib \
4953
RUN mkdir -p /app/full \
5054
&& cp build/bin/* /app/full \
5155
&& cp *.py /app/full \
56+
&& cp -r conversion /app/full \
5257
&& cp -r gguf-py /app/full \
5358
&& cp -r requirements /app/full \
5459
&& cp requirements.txt /app/full \
@@ -57,6 +62,19 @@ RUN mkdir -p /app/full \
5762
## Base image
5863
FROM ${BASE_ROCM_DEV_CONTAINER} AS base
5964

65+
ARG BUILD_DATE=N/A
66+
ARG APP_VERSION=N/A
67+
ARG APP_REVISION=N/A
68+
ARG IMAGE_URL=https://github.com/ggml-org/llama.cpp
69+
ARG IMAGE_SOURCE=https://github.com/ggml-org/llama.cpp
70+
LABEL org.opencontainers.image.created=$BUILD_DATE \
71+
org.opencontainers.image.version=$APP_VERSION \
72+
org.opencontainers.image.revision=$APP_REVISION \
73+
org.opencontainers.image.title="llama.cpp" \
74+
org.opencontainers.image.description="LLM inference in C/C++" \
75+
org.opencontainers.image.url=$IMAGE_URL \
76+
org.opencontainers.image.source=$IMAGE_SOURCE
77+
6078
RUN apt-get update \
6179
&& apt-get install -y libgomp1 curl \
6280
&& apt autoremove -y \

.devops/s390x.Dockerfile

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
ARG GCC_VERSION=15.2.0
22
ARG UBUNTU_VERSION=24.04
3+
ARG BUILD_DATE=N/A
4+
ARG APP_VERSION=N/A
5+
ARG APP_REVISION=N/A
36

47
### Build Llama.cpp stage
58
FROM gcc:${GCC_VERSION} AS build
@@ -34,6 +37,7 @@ RUN --mount=type=cache,target=/root/.ccache \
3437

3538
COPY *.py /opt/llama.cpp/bin
3639
COPY .devops/tools.sh /opt/llama.cpp/bin
40+
COPY conversion /opt/llama.cpp/conversion
3741

3842
COPY gguf-py /opt/llama.cpp/gguf-py
3943
COPY requirements.txt /opt/llama.cpp/gguf-py
@@ -44,14 +48,28 @@ COPY requirements /opt/llama.cpp/gguf-py/requirements
4448
FROM scratch AS collector
4549

4650
# Copy llama.cpp binaries and libraries
47-
COPY --from=build /opt/llama.cpp/bin /llama.cpp/bin
48-
COPY --from=build /opt/llama.cpp/lib /llama.cpp/lib
49-
COPY --from=build /opt/llama.cpp/gguf-py /llama.cpp/gguf-py
51+
COPY --from=build /opt/llama.cpp/bin /llama.cpp/bin
52+
COPY --from=build /opt/llama.cpp/lib /llama.cpp/lib
53+
COPY --from=build /opt/llama.cpp/gguf-py /llama.cpp/gguf-py
54+
COPY --from=build /opt/llama.cpp/conversion /llama.cpp/conversion
5055

5156

5257
### Base image
5358
FROM ubuntu:${UBUNTU_VERSION} AS base
5459

60+
ARG BUILD_DATE=N/A
61+
ARG APP_VERSION=N/A
62+
ARG APP_REVISION=N/A
63+
ARG IMAGE_URL=https://github.com/ggml-org/llama.cpp
64+
ARG IMAGE_SOURCE=https://github.com/ggml-org/llama.cpp
65+
LABEL org.opencontainers.image.created=$BUILD_DATE \
66+
org.opencontainers.image.version=$APP_VERSION \
67+
org.opencontainers.image.revision=$APP_REVISION \
68+
org.opencontainers.image.title="llama.cpp" \
69+
org.opencontainers.image.description="LLM inference in C/C++" \
70+
org.opencontainers.image.url=$IMAGE_URL \
71+
org.opencontainers.image.source=$IMAGE_SOURCE
72+
5573
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
5674
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
5775
apt update -y && \
@@ -91,6 +109,7 @@ RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
91109

92110
COPY --from=collector /llama.cpp/bin /app
93111
COPY --from=collector /llama.cpp/gguf-py /app/gguf-py
112+
COPY --from=collector /llama.cpp/conversion /app/conversion
94113

95114
RUN pip install --no-cache-dir --break-system-packages \
96115
-r /app/gguf-py/requirements.txt

0 commit comments

Comments
 (0)