Skip to content

Commit 83a0d38

Browse files
committed
Merge remote-tracking branch 'upstream/master' into k_quant_speedup
2 parents b4b6ffc + 45cac7c commit 83a0d38

499 files changed

Lines changed: 35426 additions & 10355 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.devops/nix/package.nix

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
vulkan-loader,
1919
openssl,
2020
shaderc,
21+
spirv-headers,
2122
useBlas ?
2223
builtins.all (x: !x) [
2324
useCuda
@@ -145,6 +146,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
145146
ninja
146147
pkg-config
147148
git
149+
spirv-headers
148150
]
149151
++ optionals useCuda [
150152
cudaPackages.cuda_nvcc

.devops/vulkan.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ RUN apt update && apt install -y git build-essential cmake wget xz-utils
77

88
# Install SSL and Vulkan SDK dependencies
99
RUN apt install -y libssl-dev curl \
10-
libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libvulkan-dev glslc
10+
libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libvulkan-dev glslc spirv-headers
1111

1212
# Build it
1313
WORKDIR /app

.github/labeler.yml

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -75,21 +75,13 @@ android:
7575
- examples/llama.android/**
7676
server/webui:
7777
- changed-files:
78-
- all:
79-
- any-glob-to-any-file:
80-
- tools/server/webui/**
81-
- tools/server/public/**
82-
- all-globs-to-all-files:
83-
- '!tools/server/webui/**'
84-
- '!tools/server/public/**'
78+
- any-glob-to-any-file:
79+
- tools/server/webui/**
80+
- tools/server/public/**
8581
server:
8682
- changed-files:
87-
- all:
88-
- any-glob-to-any-file:
89-
- tools/server/**
90-
- all-globs-to-all-files:
91-
- '!tools/server/webui/**'
92-
- '!tools/server/public/**'
83+
- any-glob-to-any-file:
84+
- tools/server/**
9385

9486

9587

.github/workflows/build-android.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ jobs:
5151
distribution: zulu
5252

5353
- name: Setup Android SDK
54-
uses: android-actions/setup-android@9fc6c4e9069bf8d3d10b2204b1fb8f6ef7065407 # v3
54+
uses: android-actions/setup-android@40fd30fb8d7440372e1316f5d1809ec01dcd3699 # v4.0.1
5555
with:
5656
log-accepted-android-sdk-licenses: false
5757

.github/workflows/build-riscv.yml

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -47,22 +47,10 @@ jobs:
4747
steps:
4848
- name: Install dependencies
4949
run: |
50-
sudo apt-get update
51-
52-
# Install necessary packages
53-
sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 cmake build-essential wget git-lfs
54-
5550
# Set gcc-14 and g++-14 as the default compilers
5651
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100
5752
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100
5853
59-
if ! which rustc; then
60-
# Install Rust stable version
61-
sudo apt-get install -y rustup
62-
rustup install stable
63-
rustup default stable
64-
fi
65-
6654
git lfs install
6755
6856
- name: GCC version check
@@ -74,12 +62,12 @@ jobs:
7462
id: checkout
7563
uses: actions/checkout@v6
7664

77-
# FIXME: Enable when ggml-org/ccache-action works on riscv64
78-
# - name: ccache
79-
# uses: ggml-org/ccache-action@v1.2.21
80-
# with:
81-
# key: ubuntu-riscv64-native-sanitizer-${{ matrix.sanytizer }}-${{ matrix.build_type }}
82-
# save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
65+
- name: ccache
66+
uses: ggml-org/ccache-action@afde29e5b5422e5da23cb1f639e8baecadeadfc3 # https://github.com/ggml-org/ccache-action/pull/1
67+
with:
68+
key: ubuntu-riscv64-native-sanitizer-${{ matrix.sanitizer }}-${{ matrix.build_type }}
69+
evict-old-files: 1d
70+
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
8371

8472
- name: Build
8573
id: cmake_build

.github/workflows/build-self-hosted.yml

Lines changed: 83 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,36 @@ jobs:
9797
vulkaninfo --summary
9898
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
9999
100+
# TODO: investigate slight precision issues in some operations for test-backend-ops on the WebGPU backend.
101+
#ggml-ci-nvidia-webgpu:
102+
# runs-on: [self-hosted, Linux, NVIDIA]
103+
104+
# steps:
105+
# - name: Clone
106+
# id: checkout
107+
# uses: actions/checkout@v6
108+
109+
# - name: Dawn Dependency
110+
# id: dawn-depends
111+
# run: |
112+
# DAWN_VERSION="v20260317.182325"
113+
# DAWN_OWNER="google"
114+
# DAWN_REPO="dawn"
115+
# DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-ubuntu-latest-Release"
116+
# echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
117+
# curl -L -o artifact.tar.gz \
118+
# "https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
119+
# mkdir dawn
120+
# tar -xvf artifact.tar.gz -C dawn --strip-components=1
121+
122+
# - name: Test
123+
# id: ggml-ci
124+
# run: |
125+
# GG_BUILD_WEBGPU=1 \
126+
# GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
127+
# GG_BUILD_WEBGPU_DAWN_DIR="$GITHUB_WORKSPACE/dawn/lib64/cmake/Dawn" \
128+
# bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
129+
100130
# TODO: provision AMX-compatible machine
101131
#ggml-ci-cpu-amx:
102132
# runs-on: [self-hosted, Linux, CPU, AMX]
@@ -141,61 +171,59 @@ jobs:
141171
# amd-smi static
142172
# GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
143173

144-
# TODO: sandbox Mac runners
145-
# ggml-ci-mac-metal:
146-
# runs-on: [self-hosted, macOS, ARM64]
147-
#
148-
# steps:
149-
# - name: Clone
150-
# id: checkout
151-
# uses: actions/checkout@v6
152-
#
153-
# - name: Test
154-
# id: ggml-ci
155-
# run: |
156-
# GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
157-
#
158-
# ggml-ci-mac-webgpu:
159-
# runs-on: [self-hosted, macOS, ARM64]
160-
#
161-
# steps:
162-
# - name: Clone
163-
# id: checkout
164-
# uses: actions/checkout@v6
165-
#
166-
# - name: Dawn Dependency
167-
# id: dawn-depends
168-
# run: |
169-
# DAWN_VERSION="v2.0.0"
170-
# DAWN_OWNER="reeselevine"
171-
# DAWN_REPO="dawn"
172-
# DAWN_ASSET_NAME="Dawn-5e9a4865b1635796ccc77dd30057f2b4002a1355-macos-latest-Release"
173-
# echo "Fetching release asset from https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip"
174-
# curl -L -o artifact.zip \
175-
# "https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip"
176-
# mkdir dawn
177-
# unzip artifact.zip
178-
# tar -xvf ${DAWN_ASSET_NAME}.tar.gz -C dawn --strip-components=1
179-
#
180-
# - name: Test
181-
# id: ggml-ci
182-
# run: |
183-
# GG_BUILD_WEBGPU=1 GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
184-
# bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
185-
#
186-
# ggml-ci-mac-vulkan:
187-
# runs-on: [self-hosted, macOS, ARM64]
188-
#
189-
# steps:
190-
# - name: Clone
191-
# id: checkout
192-
# uses: actions/checkout@v6
193-
#
194-
# - name: Test
195-
# id: ggml-ci
196-
# run: |
197-
# vulkaninfo --summary
198-
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
174+
ggml-ci-mac-metal:
175+
runs-on: [self-hosted, macOS, ARM64]
176+
177+
steps:
178+
- name: Clone
179+
id: checkout
180+
uses: actions/checkout@v6
181+
182+
- name: Test
183+
id: ggml-ci
184+
run: |
185+
GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
186+
187+
ggml-ci-mac-webgpu:
188+
runs-on: [self-hosted, macOS, ARM64]
189+
190+
steps:
191+
- name: Clone
192+
id: checkout
193+
uses: actions/checkout@v6
194+
195+
- name: Dawn Dependency
196+
id: dawn-depends
197+
run: |
198+
DAWN_VERSION="v20260317.182325"
199+
DAWN_OWNER="google"
200+
DAWN_REPO="dawn"
201+
DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-macos-latest-Release"
202+
echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
203+
curl -L -o artifact.tar.gz \
204+
"https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
205+
mkdir dawn
206+
tar -xvf artifact.tar.gz -C dawn --strip-components=1
207+
208+
- name: Test
209+
id: ggml-ci
210+
run: |
211+
GG_BUILD_WEBGPU=1 GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
212+
bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
213+
214+
ggml-ci-mac-vulkan:
215+
runs-on: [self-hosted, macOS, ARM64]
216+
217+
steps:
218+
- name: Clone
219+
id: checkout
220+
uses: actions/checkout@v6
221+
222+
- name: Test
223+
id: ggml-ci
224+
run: |
225+
vulkaninfo --summary
226+
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
199227
200228
ggml-ci-linux-intel-vulkan:
201229
runs-on: [self-hosted, Linux, Intel]

.github/workflows/build-vulkan.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,4 +93,5 @@ jobs:
9393
export GGML_VK_DISABLE_F16=1
9494
export GGML_VK_DISABLE_COOPMAT=1
9595
# This is using llvmpipe and runs slower than other backends
96-
ctest -L main --verbose --timeout 4800
96+
# test-backend-ops is too slow on llvmpipe, skip it
97+
ctest -L main -E test-backend-ops --verbose --timeout 900

.github/workflows/build.yml

Lines changed: 59 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,56 @@ jobs:
267267
wget https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K-be.gguf
268268
./bin/llama-completion -m stories260K-be.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
269269
270+
android-arm64:
271+
runs-on: ubuntu-latest
272+
273+
env:
274+
NDK_VERSION: "29.0.14206865"
275+
276+
steps:
277+
- name: Clone
278+
id: checkout
279+
uses: actions/checkout@v6
280+
281+
- name: ccache
282+
uses: ggml-org/ccache-action@v1.2.21
283+
with:
284+
key: android-arm64
285+
evict-old-files: 1d
286+
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
287+
288+
- name: Set up JDK
289+
uses: actions/setup-java@v5
290+
with:
291+
java-version: 17
292+
distribution: temurin
293+
294+
- name: Setup Android SDK
295+
uses: android-actions/setup-android@40fd30fb8d7440372e1316f5d1809ec01dcd3699 # v4.0.1
296+
with:
297+
log-accepted-android-sdk-licenses: false
298+
299+
- name: Install NDK
300+
run: |
301+
sdkmanager "ndk;${{ env.NDK_VERSION }}"
302+
echo "ANDROID_NDK=${ANDROID_SDK_ROOT}/ndk/${{ env.NDK_VERSION }}" >> $GITHUB_ENV
303+
304+
- name: Build
305+
id: cmake_build
306+
run: |
307+
cmake -B build \
308+
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
309+
-DANDROID_ABI=arm64-v8a \
310+
-DANDROID_PLATFORM=android-28 \
311+
-DLLAMA_FATAL_WARNINGS=ON \
312+
-DGGML_BACKEND_DL=ON \
313+
-DGGML_NATIVE=OFF \
314+
-DGGML_CPU_ALL_VARIANTS=ON \
315+
-DGGML_OPENMP=OFF \
316+
-DLLAMA_BUILD_BORINGSSL=ON \
317+
-DGGML_RPC=ON
318+
time cmake --build build --config Release -j $(nproc)
319+
270320
ubuntu-latest-rpc:
271321
runs-on: ubuntu-latest
272322

@@ -318,7 +368,7 @@ jobs:
318368
id: depends
319369
run: |
320370
sudo apt-get update
321-
sudo apt-get install -y gcc-14 g++-14 build-essential glslc libvulkan-dev libssl-dev ninja-build
371+
sudo apt-get install -y gcc-14 g++-14 build-essential glslc libvulkan-dev spirv-headers libssl-dev ninja-build
322372
echo "CC=gcc-14" >> "$GITHUB_ENV"
323373
echo "CXX=g++-14" >> "$GITHUB_ENV"
324374
@@ -1001,22 +1051,14 @@ jobs:
10011051
steps:
10021052
- name: Install dependencies
10031053
run: |
1004-
sudo apt-get update
1005-
10061054
# Install necessary packages
1007-
sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 cmake build-essential libssl-dev wget git-lfs
1055+
sudo apt-get update
1056+
sudo apt-get install -y libssl-dev
10081057
10091058
# Set gcc-14 and g++-14 as the default compilers
10101059
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100
10111060
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100
10121061
1013-
if ! which rustc; then
1014-
# Install Rust stable version
1015-
sudo apt-get install -y rustup
1016-
rustup install stable
1017-
rustup default stable
1018-
fi
1019-
10201062
git lfs install
10211063
10221064
- name: Check environment
@@ -1032,13 +1074,12 @@ jobs:
10321074
id: checkout
10331075
uses: actions/checkout@v6
10341076

1035-
# FIXME: Enable when ggml-org/ccache-action works on riscv64
1036-
# - name: ccache
1037-
# uses: ggml-org/ccache-action@v1.2.21
1038-
# with:
1039-
# key: ubuntu-cpu-riscv64-native
1040-
# evict-old-files: 1d
1041-
# save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
1077+
- name: ccache
1078+
uses: ggml-org/ccache-action@afde29e5b5422e5da23cb1f639e8baecadeadfc3 # https://github.com/ggml-org/ccache-action/pull/1
1079+
with:
1080+
key: ubuntu-cpu-riscv64-native
1081+
evict-old-files: 1d
1082+
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
10421083

10431084
- name: Build
10441085
id: cmake_build

.github/workflows/close-issue.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
steps:
1818
- uses: actions/stale@v10
1919
with:
20-
exempt-issue-labels: "refactoring,help wanted,good first issue,research 🔬,bug,roadmap"
20+
exempt-issue-labels: "refactoring,help wanted,good first issue,research 🔬,bug,roadmap,security"
2121
days-before-issue-stale: 30
2222
days-before-issue-close: 14
2323
stale-issue-label: "stale"

0 commit comments

Comments
 (0)