Skip to content

Commit b3e89a9

Browse files
authored
Merge branch 'ggml-org:master' into carbon-fns-sampler
2 parents 33c7c6b + 48b88c3 commit b3e89a9

253 files changed

Lines changed: 17533 additions & 6657 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.devops/zendnn.Dockerfile

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
ARG UBUNTU_VERSION=24.04
2+
ARG BUILD_DATE=N/A
3+
ARG APP_VERSION=N/A
4+
ARG APP_REVISION=N/A
5+
6+
FROM ubuntu:$UBUNTU_VERSION AS build
7+
8+
RUN apt-get update && \
9+
apt-get install -y gcc-13 g++-13 build-essential git cmake libssl-dev libomp-dev libnuma-dev python3 ca-certificates
10+
11+
ENV CC=gcc-13 CXX=g++-13
12+
13+
WORKDIR /app
14+
15+
COPY . .
16+
17+
RUN cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_ZENDNN=ON && \
18+
cmake --build build -j $(nproc)
19+
20+
RUN mkdir -p /app/lib && \
21+
find build -name "*.so*" -exec cp -P {} /app/lib \;
22+
23+
RUN mkdir -p /app/full \
24+
&& cp build/bin/* /app/full \
25+
&& cp *.py /app/full \
26+
&& cp -r conversion /app/full \
27+
&& cp -r gguf-py /app/full \
28+
&& cp -r requirements /app/full \
29+
&& cp requirements.txt /app/full \
30+
&& cp .devops/tools.sh /app/full/tools.sh
31+
32+
## Base image
33+
FROM ubuntu:$UBUNTU_VERSION AS base
34+
35+
ARG BUILD_DATE=N/A
36+
ARG APP_VERSION=N/A
37+
ARG APP_REVISION=N/A
38+
ARG IMAGE_URL=https://github.com/ggml-org/llama.cpp
39+
ARG IMAGE_SOURCE=https://github.com/ggml-org/llama.cpp
40+
LABEL org.opencontainers.image.created=$BUILD_DATE \
41+
org.opencontainers.image.version=$APP_VERSION \
42+
org.opencontainers.image.revision=$APP_REVISION \
43+
org.opencontainers.image.title="llama.cpp" \
44+
org.opencontainers.image.description="LLM inference in C/C++" \
45+
org.opencontainers.image.url=$IMAGE_URL \
46+
org.opencontainers.image.source=$IMAGE_SOURCE
47+
48+
RUN apt-get update \
49+
&& apt-get install -y libgomp1 libnuma1 curl \
50+
&& apt autoremove -y \
51+
&& apt clean -y \
52+
&& rm -rf /tmp/* /var/tmp/* \
53+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
54+
&& find /var/cache -type f -delete
55+
56+
COPY --from=build /app/lib/ /app
57+
58+
### Full
59+
FROM base AS full
60+
61+
COPY --from=build /app/full /app
62+
63+
WORKDIR /app
64+
65+
RUN apt-get update \
66+
&& apt-get install -y \
67+
git \
68+
python3 \
69+
python3-pip \
70+
python3-wheel \
71+
&& pip install --break-system-packages --upgrade setuptools \
72+
&& pip install --break-system-packages -r requirements.txt \
73+
&& apt autoremove -y \
74+
&& apt clean -y \
75+
&& rm -rf /tmp/* /var/tmp/* \
76+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
77+
&& find /var/cache -type f -delete
78+
79+
ENTRYPOINT ["/app/tools.sh"]
80+
81+
### Light, CLI only
82+
FROM base AS light
83+
84+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
85+
86+
WORKDIR /app
87+
88+
ENTRYPOINT [ "/app/llama-cli" ]
89+
90+
### Server, Server only
91+
FROM base AS server
92+
93+
ENV LLAMA_ARG_HOST=0.0.0.0
94+
95+
COPY --from=build /app/full/llama-server /app
96+
97+
WORKDIR /app
98+
99+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
100+
101+
ENTRYPOINT [ "/app/llama-server" ]
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
name: "ccache-clear"
2+
description: "Delete all GitHub Actions caches matching a key prefix"
3+
inputs:
4+
key:
5+
description: "Cache key prefix to match and delete"
6+
required: true
7+
8+
runs:
9+
using: "composite"
10+
steps:
11+
- name: Clear caches
12+
shell: bash
13+
run: |
14+
CACHES=$(gh cache list --key "ccache-${{ inputs.key }}" --json id,key --jq '.[] | "\(.id) \(.key)"' 2>/dev/null)
15+
if [ -z "$CACHES" ]; then
16+
echo "No caches found with key prefix: ${{ inputs.key }}"
17+
exit 0
18+
fi
19+
while read -r id key; do
20+
echo "Deleting cache: $id ($key)"
21+
gh cache delete "$id"
22+
done <<< "$CACHES"

.github/actions/windows-setup-cuda/action.yml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,3 +96,34 @@ runs:
9696
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
9797
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
9898
echo "CUDA_PATH_V13_1=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
99+
100+
- name: Install Cuda Toolkit 13.3
101+
if: ${{ inputs.cuda_version == '13.3' }}
102+
shell: pwsh
103+
run: |
104+
mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3"
105+
choco install unzip -y
106+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_crt/windows-x86_64/cuda_crt-windows-x86_64-13.3.33-archive.zip"
107+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-13.3.29-archive.zip"
108+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-13.3.33-archive.zip"
109+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-13.3.33-archive.zip"
110+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-13.5.1.27-archive.zip"
111+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libnvvm/windows-x86_64/libnvvm-windows-x86_64-13.3.33-archive.zip"
112+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-13.3.29-archive.zip"
113+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-13.3.27-archive.zip"
114+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-13.3.27-archive.zip"
115+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cccl/windows-x86_64/cccl-windows-x86_64-13.3.3.3.1-archive.zip"
116+
unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3"
117+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3\cuda_crt-windows-x86_64-13.3.33-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3" /E /I /H /Y
118+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3\cuda_cudart-windows-x86_64-13.3.29-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3" /E /I /H /Y
119+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3\cuda_nvcc-windows-x86_64-13.3.33-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3" /E /I /H /Y
120+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3\cuda_nvrtc-windows-x86_64-13.3.33-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3" /E /I /H /Y
121+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3\libcublas-windows-x86_64-13.5.1.27-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3" /E /I /H /Y
122+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3\libnvvm-windows-x86_64-13.3.33-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3" /E /I /H /Y
123+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3\cuda_nvtx-windows-x86_64-13.3.29-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3" /E /I /H /Y
124+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3\cuda_profiler_api-windows-x86_64-13.3.27-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3" /E /I /H /Y
125+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3\visual_studio_integration-windows-x86_64-13.3.27-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3" /E /I /H /Y
126+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3\cccl-windows-x86_64-13.3.3.3.1-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3" /E /I /H /Y
127+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
128+
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
129+
echo "CUDA_PATH_V13_3=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.3" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8

.github/workflows/build-3rd-party.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ concurrency:
2222
env:
2323
GGML_NLOOP: 3
2424
GGML_N_THREADS: 1
25-
LLAMA_LOG_COLORS: 1
26-
LLAMA_LOG_PREFIX: 1
27-
LLAMA_LOG_TIMESTAMPS: 1
25+
LLAMA_ARG_LOG_COLORS: 1
26+
LLAMA_ARG_LOG_PREFIX: 1
27+
LLAMA_ARG_LOG_TIMESTAMPS: 1
2828

2929
jobs:
3030
ubuntu-24-llguidance:

.github/workflows/build-android.yml

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@ concurrency:
2727
env:
2828
GGML_NLOOP: 3
2929
GGML_N_THREADS: 1
30-
LLAMA_LOG_COLORS: 1
31-
LLAMA_LOG_PREFIX: 1
32-
LLAMA_LOG_TIMESTAMPS: 1
30+
LLAMA_ARG_LOG_COLORS: 1
31+
LLAMA_ARG_LOG_PREFIX: 1
32+
LLAMA_ARG_LOG_TIMESTAMPS: 1
3333

3434
jobs:
35-
android:
35+
default:
3636
runs-on: ubuntu-latest
3737

3838
steps:
@@ -58,7 +58,7 @@ jobs:
5858
cd examples/llama.android
5959
./gradlew build --no-daemon
6060
61-
android-ndk:
61+
ndk:
6262
runs-on: ubuntu-latest
6363
container:
6464
image: 'ghcr.io/snapdragon-toolchain/arm64-android:v0.3'
@@ -92,7 +92,7 @@ jobs:
9292
name: llama-cpp-android-arm64-cpu
9393
path: pkg-adb/llama.cpp
9494

95-
android-arm64:
95+
arm64:
9696
runs-on: ubuntu-latest
9797

9898
env:
@@ -103,12 +103,18 @@ jobs:
103103
id: checkout
104104
uses: actions/checkout@v6
105105

106-
- name: ccache
107-
uses: ggml-org/ccache-action@v1.2.21
108-
with:
109-
key: android-arm64
110-
evict-old-files: 1d
111-
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
106+
# note : disabled to spare some cache space (https://github.com/ggml-org/llama.cpp/pull/23789)
107+
# for some reason, the ccache does not improve the build time in this case
108+
# example:
109+
# cache off: https://github.com/ggerganov/tmp2/actions/runs/26534713799/job/78160400831
110+
# cache on: https://github.com/ggerganov/tmp2/actions/runs/26534713799/job/78224189394
111+
#
112+
#- name: ccache
113+
# uses: ggml-org/ccache-action@v1.2.21
114+
# with:
115+
# key: android-ubuntu-arm64
116+
# evict-old-files: 1d
117+
# save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
112118

113119
- name: Set up JDK
114120
uses: actions/setup-java@v5

.github/workflows/build-apple.yml

Lines changed: 71 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,12 @@ concurrency:
3232
env:
3333
GGML_NLOOP: 3
3434
GGML_N_THREADS: 1
35-
LLAMA_LOG_COLORS: 1
36-
LLAMA_LOG_PREFIX: 1
37-
LLAMA_LOG_TIMESTAMPS: 1
35+
LLAMA_ARG_LOG_COLORS: 1
36+
LLAMA_ARG_LOG_PREFIX: 1
37+
LLAMA_ARG_LOG_TIMESTAMPS: 1
3838

3939
jobs:
40-
macOS-latest-ios:
40+
macos-latest-arm64:
4141
runs-on: macos-latest
4242

4343
steps:
@@ -48,27 +48,66 @@ jobs:
4848
- name: ccache
4949
uses: ggml-org/ccache-action@v1.2.21
5050
with:
51-
key: macOS-latest-ios
51+
key: apple-arm64
5252
evict-old-files: 1d
5353
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
5454

5555
- name: Build
5656
id: cmake_build
5757
run: |
5858
sysctl -a
59-
cmake -B build -G Xcode \
59+
cmake -B build \
60+
-DCMAKE_BUILD_RPATH="@loader_path" \
61+
-DLLAMA_FATAL_WARNINGS=ON \
62+
-DLLAMA_BUILD_BORINGSSL=ON \
6063
-DGGML_METAL_USE_BF16=ON \
61-
-DGGML_METAL_EMBED_LIBRARY=ON \
62-
-DLLAMA_BUILD_APP=OFF \
63-
-DLLAMA_BUILD_COMMON=OFF \
64-
-DLLAMA_BUILD_EXAMPLES=OFF \
65-
-DLLAMA_BUILD_TOOLS=OFF \
66-
-DLLAMA_BUILD_TESTS=OFF \
67-
-DLLAMA_BUILD_SERVER=OFF \
68-
-DCMAKE_SYSTEM_NAME=iOS \
69-
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
70-
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
71-
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
64+
-DGGML_METAL_EMBED_LIBRARY=OFF \
65+
-DGGML_METAL_SHADER_DEBUG=ON \
66+
-DGGML_RPC=ON
67+
time cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
68+
leaks -atExit -- ./build/bin/test-thread-safety -hf ggml-org/gemma-3-270m-qat-GGUF -ngl 99 -p "$(printf 'hello %.0s' {1..128})" -n 16 -c 512 -ub 32 -np 2 -t 2 -lv 1
69+
70+
- name: Test
71+
id: cmake_test
72+
run: |
73+
cd build
74+
ctest -L main -E "test-llama-archs" --verbose --timeout 900
75+
76+
macos-latest-x64:
77+
runs-on: macos-15-intel
78+
79+
steps:
80+
- name: Clone
81+
id: checkout
82+
uses: actions/checkout@v6
83+
84+
- name: ccache
85+
uses: ggml-org/ccache-action@v1.2.21
86+
with:
87+
key: apple-x64
88+
evict-old-files: 1d
89+
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
90+
91+
- name: Build
92+
id: cmake_build
93+
run: |
94+
sysctl -a
95+
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
96+
# https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
97+
cmake -B build \
98+
-DCMAKE_BUILD_RPATH="@loader_path" \
99+
-DLLAMA_FATAL_WARNINGS=ON \
100+
-DLLAMA_BUILD_BORINGSSL=ON \
101+
-DGGML_METAL=OFF \
102+
-DGGML_RPC=ON \
103+
-DCMAKE_OSX_DEPLOYMENT_TARGET=13.3
104+
time cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
105+
106+
- name: Test
107+
id: cmake_test
108+
run: |
109+
cd build
110+
ctest -L main --verbose --timeout 900
72111
73112
macos-latest-ios-xcode:
74113
runs-on: macos-latest
@@ -117,18 +156,19 @@ jobs:
117156
xcodebuild -downloadPlatform iOS
118157
xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
119158
120-
macOS-latest-tvos:
159+
macos-latest-tvos:
121160
runs-on: macos-latest
122161

123162
steps:
124163
- name: Clone
125164
id: checkout
126165
uses: actions/checkout@v6
127166

167+
# TODO: this likely does not do anything - if yes, remove it
128168
- name: ccache
129169
uses: ggml-org/ccache-action@v1.2.21
130170
with:
131-
key: macOS-latest-tvos
171+
key: apple-tvos
132172
evict-old-files: 1d
133173
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
134174

@@ -150,14 +190,22 @@ jobs:
150190
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
151191
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
152192
153-
macOS-latest-visionos:
193+
macos-latest-visionos:
154194
runs-on: macos-latest
155195

156196
steps:
157197
- name: Clone
158198
id: checkout
159199
uses: actions/checkout@v6
160200

201+
# TODO: this likely does not do anything - if yes, remove it
202+
- name: ccache
203+
uses: ggml-org/ccache-action@v1.2.21
204+
with:
205+
key: apple-visionos
206+
evict-old-files: 1d
207+
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
208+
161209
- name: Build
162210
id: cmake_build
163211
run: |
@@ -176,7 +224,7 @@ jobs:
176224
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
177225
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
178226
179-
macOS-latest-swift:
227+
macos-latest-swift:
180228
runs-on: macos-latest
181229
needs: macos-latest-ios-xcode
182230

@@ -189,10 +237,11 @@ jobs:
189237
id: checkout
190238
uses: actions/checkout@v6
191239

240+
# TODO: this likely does not do anything - if yes, remove it
192241
- name: ccache
193242
uses: ggml-org/ccache-action@v1.2.21
194243
with:
195-
key: macOS-latest-swift
244+
key: apple-swift
196245
evict-old-files: 1d
197246
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
198247

0 commit comments

Comments
 (0)