Skip to content

Commit 5ad16c8

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents 71350b4 + 59778f0 commit 5ad16c8

917 files changed

Lines changed: 68008 additions & 49569 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.devops/intel.Dockerfile

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,15 @@ ARG ONEAPI_VERSION=2025.3.3-0-devel-ubuntu24.04
55
FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS build
66

77
ARG GGML_SYCL_F16=OFF
8+
ARG LEVEL_ZERO_VERSION=1.28.2
9+
ARG LEVEL_ZERO_UBUNTU_VERSION=u24.04
810
RUN apt-get update && \
9-
apt-get install -y git libssl-dev
11+
apt-get install -y git libssl-dev wget ca-certificates && \
12+
cd /tmp && \
13+
wget -q "https://github.com/oneapi-src/level-zero/releases/download/v${LEVEL_ZERO_VERSION}/level-zero_${LEVEL_ZERO_VERSION}%2B${LEVEL_ZERO_UBUNTU_VERSION}_amd64.deb" -O level-zero.deb && \
14+
wget -q "https://github.com/oneapi-src/level-zero/releases/download/v${LEVEL_ZERO_VERSION}/level-zero-devel_${LEVEL_ZERO_VERSION}%2B${LEVEL_ZERO_UBUNTU_VERSION}_amd64.deb" -O level-zero-devel.deb && \
15+
apt-get -o Dpkg::Options::="--force-overwrite" install -y ./level-zero.deb ./level-zero-devel.deb && \
16+
rm -f /tmp/level-zero.deb /tmp/level-zero-devel.deb
1017

1118
WORKDIR /app
1219

@@ -33,11 +40,11 @@ RUN mkdir -p /app/full \
3340

3441
FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS base
3542

36-
ARG IGC_VERSION=v2.30.1
37-
ARG IGC_VERSION_FULL=2_2.30.1+20950
38-
ARG COMPUTE_RUNTIME_VERSION=26.09.37435.1
39-
ARG COMPUTE_RUNTIME_VERSION_FULL=26.09.37435.1-0
40-
ARG IGDGMM_VERSION=22.9.0
43+
ARG IGC_VERSION=v2.20.5
44+
ARG IGC_VERSION_FULL=2_2.20.5+19972
45+
ARG COMPUTE_RUNTIME_VERSION=25.40.35563.10
46+
ARG COMPUTE_RUNTIME_VERSION_FULL=25.40.35563.10-0
47+
ARG IGDGMM_VERSION=22.8.2
4148
RUN mkdir /tmp/neo/ && cd /tmp/neo/ \
4249
&& wget https://github.com/intel/intel-graphics-compiler/releases/download/$IGC_VERSION/intel-igc-core-${IGC_VERSION_FULL}_amd64.deb \
4350
&& wget https://github.com/intel/intel-graphics-compiler/releases/download/$IGC_VERSION/intel-igc-opencl-${IGC_VERSION_FULL}_amd64.deb \
@@ -109,4 +116,3 @@ WORKDIR /app
109116
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
110117

111118
ENTRYPOINT [ "/app/llama-server" ]
112-

.devops/nix/package.nix

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ let
103103
vulkan-headers
104104
vulkan-loader
105105
shaderc
106+
spirv-headers
106107
];
107108
in
108109

@@ -146,7 +147,6 @@ effectiveStdenv.mkDerivation (finalAttrs: {
146147
ninja
147148
pkg-config
148149
git
149-
spirv-headers
150150
]
151151
++ optionals useCuda [
152152
cudaPackages.cuda_nvcc

.editorconfig

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,7 @@ insert_final_newline = unset
4545
trim_trailing_whitespace = unset
4646
insert_final_newline = unset
4747

48-
[tools/server/webui/**]
49-
indent_style = unset
50-
indent_size = unset
51-
end_of_line = unset
52-
charset = unset
53-
trim_trailing_whitespace = unset
54-
insert_final_newline = unset
55-
56-
[tools/server/public/**]
48+
[tools/ui/**]
5749
indent_style = unset
5850
indent_size = unset
5951
end_of_line = unset

.gitattributes

Lines changed: 0 additions & 4 deletions
This file was deleted.

.github/labeler.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,10 @@ android:
7373
- changed-files:
7474
- any-glob-to-any-file:
7575
- examples/llama.android/**
76-
server/webui:
76+
server/ui:
7777
- changed-files:
7878
- any-glob-to-any-file:
79-
- tools/server/webui/**
80-
- tools/server/public/**
79+
- tools/ui/**
8180
server:
8281
- changed-files:
8382
- any-glob-to-any-file:

.github/workflows/build-and-test-snapdragon.yml

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,45 @@ jobs:
5858
name: llama-cpp-android-arm64-snapdragon
5959
path: pkg-snapdragon/llama.cpp
6060

61+
linux-iot-snapdragon:
62+
runs-on: ubuntu-latest
63+
container:
64+
image: 'ghcr.io/snapdragon-toolchain/arm64-linux:v0.1'
65+
defaults:
66+
run:
67+
shell: bash
68+
69+
steps:
70+
- name: Clone
71+
uses: actions/checkout@v6
72+
with:
73+
fetch-depth: 0
74+
lfs: false
75+
76+
- name: Build Llama.CPP for Snapdragon Linux IoT
77+
id: build_llama_cpp_snapdragon_linux
78+
run: |
79+
cp docs/backend/snapdragon/CMakeUserPresets.json .
80+
cmake --preset arm64-linux-snapdragon-release -B build-snapdragon -DGGML_OPENCL=ON
81+
cmake --build build-snapdragon -j $(nproc)
82+
cmake --install build-snapdragon --prefix pkg-snapdragon/llama.cpp
83+
84+
- name: Upload Llama.CPP Snapdragon Linux IoT Build Artifact
85+
if: ${{ always() && steps.build_llama_cpp_snapdragon_linux.outcome == 'success' }}
86+
uses: actions/upload-artifact@v6
87+
with:
88+
name: llama-cpp-linux-arm64-snapdragon
89+
path: pkg-snapdragon/llama.cpp
90+
6191
test-snapdragon-qdc:
62-
name: Test on QDC Android Device (${{ matrix.device }})
63-
needs: [android-ndk-snapdragon]
64-
runs-on: ubuntu-slim
92+
name: Test on QDC Device (${{ matrix.device }})
93+
needs: [android-ndk-snapdragon, linux-iot-snapdragon]
94+
runs-on: ubuntu-24.04-arm
95+
timeout-minutes: 90
6596
strategy:
6697
fail-fast: false
6798
matrix:
68-
device: [SM8750, SM8650, SM8850]
99+
device: [SM8750, SM8850, QCS9075M]
69100

70101
steps:
71102
- name: Checkout
@@ -74,11 +105,11 @@ jobs:
74105
- name: Download build artifact
75106
uses: actions/download-artifact@v7
76107
with:
77-
name: llama-cpp-android-arm64-snapdragon
108+
name: ${{ startsWith(matrix.device, 'QCS') && 'llama-cpp-linux-arm64-snapdragon' || 'llama-cpp-android-arm64-snapdragon' }}
78109
path: pkg-snapdragon/llama.cpp
79110

80111
- name: Set up Python
81-
uses: actions/setup-python@v5
112+
uses: actions/setup-python@v6
82113
with:
83114
python-version: '3.x'
84115
cache: pip
@@ -107,7 +138,8 @@ jobs:
107138
--test all \
108139
--pkg-dir pkg-snapdragon/llama.cpp \
109140
--model-url "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_0.gguf" \
110-
--device ${{ matrix.device }}
141+
--device ${{ matrix.device }} \
142+
${{ startsWith(matrix.device, 'QCS') && '--retries 2 --retry-delay 300' || '' }}
111143
env:
112144
QDC_API_KEY: ${{ secrets.QDC_API_KEY }}
113145

.github/workflows/build-cross.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -301,16 +301,17 @@ jobs:
301301
export RISCV_ROOT_PATH=${PWD}/spacemit_toolchain
302302
cmake -B build -DLLAMA_OPENSSL=OFF \
303303
-DCMAKE_BUILD_TYPE=Release \
304-
-DGGML_OPENMP=OFF \
305304
-DLLAMA_BUILD_EXAMPLES=ON \
305+
-DGGML_CPU_REPACK=OFF \
306306
-DLLAMA_BUILD_TOOLS=ON \
307307
-DLLAMA_BUILD_TESTS=OFF \
308308
-DGGML_CPU_RISCV64_SPACEMIT=ON \
309309
-DGGML_RVV=ON \
310+
-DGGML_RV_ZVFH=ON \
310311
-DGGML_RV_ZFH=ON \
311312
-DGGML_RV_ZICBOP=ON \
312313
-DGGML_RV_ZIHINTPAUSE=ON \
313-
-DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
314+
-DGGML_RV_ZBA=ON \
314315
-DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake
315316
316317
cmake --build build --config Release -j $(nproc)

.github/workflows/build-self-hosted.yml

Lines changed: 67 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,24 @@ env:
5555
LLAMA_LOG_TIMESTAMPS: 1
5656

5757
jobs:
58+
determine-tag:
59+
name: Determine tag name
60+
runs-on: ubuntu-slim
61+
outputs:
62+
tag_name: ${{ steps.tag.outputs.name }}
63+
steps:
64+
- name: Clone
65+
uses: actions/checkout@v6
66+
with:
67+
fetch-depth: 0
68+
- name: Determine tag name
69+
id: tag
70+
uses: ./.github/actions/get-tag-name
71+
env:
72+
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
73+
5874
ggml-ci-nvidia-cuda:
75+
needs: determine-tag
5976
runs-on: [self-hosted, Linux, NVIDIA]
6077

6178
steps:
@@ -65,11 +82,14 @@ jobs:
6582

6683
- name: Test
6784
id: ggml-ci
85+
env:
86+
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
6887
run: |
6988
nvidia-smi
7089
GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
7190
7291
ggml-ci-nvidia-vulkan-cm:
92+
needs: determine-tag
7393
runs-on: [self-hosted, Linux, NVIDIA]
7494

7595
steps:
@@ -79,11 +99,14 @@ jobs:
7999

80100
- name: Test
81101
id: ggml-ci
102+
env:
103+
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
82104
run: |
83105
vulkaninfo --summary
84106
GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
85107
86108
ggml-ci-nvidia-vulkan-cm2:
109+
needs: determine-tag
87110
runs-on: [self-hosted, Linux, NVIDIA, COOPMAT2]
88111

89112
steps:
@@ -93,39 +116,40 @@ jobs:
93116

94117
- name: Test
95118
id: ggml-ci
119+
env:
120+
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
96121
run: |
97122
vulkaninfo --summary
98123
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
99124
100-
# TODO: investigate slight precision issues in some operations for test-backend-ops on the WebGPU backend.
101-
#ggml-ci-nvidia-webgpu:
102-
# runs-on: [self-hosted, Linux, NVIDIA]
125+
ggml-ci-nvidia-webgpu:
126+
runs-on: [self-hosted, Linux, NVIDIA]
103127

104-
# steps:
105-
# - name: Clone
106-
# id: checkout
107-
# uses: actions/checkout@v6
128+
steps:
129+
- name: Clone
130+
id: checkout
131+
uses: actions/checkout@v6
108132

109-
# - name: Dawn Dependency
110-
# id: dawn-depends
111-
# run: |
112-
# DAWN_VERSION="v20260317.182325"
113-
# DAWN_OWNER="google"
114-
# DAWN_REPO="dawn"
115-
# DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-ubuntu-latest-Release"
116-
# echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
117-
# curl -L -o artifact.tar.gz \
118-
# "https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
119-
# mkdir dawn
120-
# tar -xvf artifact.tar.gz -C dawn --strip-components=1
133+
- name: Dawn Dependency
134+
id: dawn-depends
135+
run: |
136+
DAWN_VERSION="v20260317.182325"
137+
DAWN_OWNER="google"
138+
DAWN_REPO="dawn"
139+
DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-ubuntu-latest-Release"
140+
echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
141+
curl -L -o artifact.tar.gz \
142+
"https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
143+
mkdir dawn
144+
tar -xvf artifact.tar.gz -C dawn --strip-components=1
121145
122-
# - name: Test
123-
# id: ggml-ci
124-
# run: |
125-
# GG_BUILD_WEBGPU=1 \
126-
# GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
127-
# GG_BUILD_WEBGPU_DAWN_DIR="$GITHUB_WORKSPACE/dawn/lib64/cmake/Dawn" \
128-
# bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
146+
- name: Test
147+
id: ggml-ci
148+
run: |
149+
GG_BUILD_WEBGPU=1 \
150+
GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
151+
GG_BUILD_WEBGPU_DAWN_DIR="$GITHUB_WORKSPACE/dawn/lib64/cmake/Dawn" \
152+
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
129153
130154
# TODO: provision AMX-compatible machine
131155
#ggml-ci-cpu-amx:
@@ -172,6 +196,7 @@ jobs:
172196
# GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
173197

174198
ggml-ci-mac-metal:
199+
needs: determine-tag
175200
runs-on: [self-hosted, macOS, ARM64]
176201

177202
steps:
@@ -181,10 +206,13 @@ jobs:
181206

182207
- name: Test
183208
id: ggml-ci
209+
env:
210+
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
184211
run: |
185212
GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
186213
187214
ggml-ci-mac-webgpu:
215+
needs: determine-tag
188216
runs-on: [self-hosted, macOS, ARM64]
189217

190218
steps:
@@ -207,11 +235,14 @@ jobs:
207235
208236
- name: Test
209237
id: ggml-ci
238+
env:
239+
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
210240
run: |
211241
GG_BUILD_WEBGPU=1 GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
212242
bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
213243
214244
ggml-ci-mac-vulkan:
245+
needs: determine-tag
215246
runs-on: [self-hosted, macOS, ARM64]
216247

217248
steps:
@@ -221,11 +252,14 @@ jobs:
221252

222253
- name: Test
223254
id: ggml-ci
255+
env:
256+
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
224257
run: |
225258
vulkaninfo --summary
226259
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
227260
228261
ggml-ci-linux-intel-vulkan:
262+
needs: determine-tag
229263
runs-on: [self-hosted, Linux, Intel]
230264

231265
steps:
@@ -237,11 +271,14 @@ jobs:
237271

238272
- name: Test
239273
id: ggml-ci
274+
env:
275+
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
240276
run: |
241277
vulkaninfo --summary
242278
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
243279
244280
ggml-ci-win-intel-vulkan:
281+
needs: determine-tag
245282
runs-on: [self-hosted, Windows, X64, Intel]
246283

247284
steps:
@@ -256,13 +293,15 @@ jobs:
256293
MSYSTEM: UCRT64
257294
CHERE_INVOKING: 1
258295
PATH: C:\msys64\ucrt64\bin;C:\msys64\usr\bin;C:\Windows\System32;${{ env.PATH }}
296+
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
259297
run: |
260298
vulkaninfo --summary
261299
# Skip python related tests with GG_BUILD_LOW_PERF=1 since Windows MSYS2 UCRT64 currently fails to create
262300
# a valid python environment for testing
263301
LLAMA_FATAL_WARNINGS=OFF GG_BUILD_NINJA=1 GG_BUILD_VULKAN=1 GG_BUILD_LOW_PERF=1 ./ci/run.sh ./results/llama.cpp ./mnt/llama.cpp
264302
265303
ggml-ci-intel-openvino-gpu-low-perf:
304+
needs: determine-tag
266305
runs-on: [self-hosted, Linux, Intel, OpenVINO]
267306

268307
concurrency:
@@ -294,6 +333,8 @@ jobs:
294333
295334
- name: Test
296335
id: ggml-ci
336+
env:
337+
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
297338
run: |
298339
source ./openvino_toolkit/setupvars.sh
299340
GG_BUILD_OPENVINO=1 GGML_OPENVINO_DEVICE=GPU GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt

0 commit comments

Comments
 (0)