Skip to content

Commit 6d1a4a8

Browse files
committed
Merge upstream llama.cpp (44 more commits, up to c08d28d)
Includes: - server: Fix undefined timing measurement errors (ggml-org#21201) - server: save and clear idle slots on new task --clear-idle (ggml-org#20993) - common: fix tool call type detection for nullable/enum schemas (ggml-org#21327) - CUDA: fix FA kernel selection logic (ggml-org#21271) - kv-cache: do not quantize SWA KV cache (ggml-org#21277) + revert (ggml-org#21332) - common/parser: fix call ID detection + atomicity (ggml-org#21230) - jinja: coerce input for string-specific filters (ggml-org#21370) - Various CI, HIP, WebGPU, and documentation fixes
2 parents b76ccdc + c08d28d commit 6d1a4a8

119 files changed

Lines changed: 44319 additions & 8244 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.devops/cuda-new.Dockerfile

Lines changed: 0 additions & 97 deletions
This file was deleted.

.devops/nix/package.nix

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
rocmPackages,
1717
vulkan-headers,
1818
vulkan-loader,
19-
curl,
19+
openssl,
2020
shaderc,
2121
useBlas ?
2222
builtins.all (x: !x) [
@@ -160,7 +160,8 @@ effectiveStdenv.mkDerivation (finalAttrs: {
160160
++ optionals useMpi [ mpi ]
161161
++ optionals useRocm rocmBuildInputs
162162
++ optionals useBlas [ blas ]
163-
++ optionals useVulkan vulkanBuildInputs;
163+
++ optionals useVulkan vulkanBuildInputs
164+
++ [ openssl ];
164165

165166
cmakeFlags =
166167
[

.devops/rocm.Dockerfile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
ARG UBUNTU_VERSION=24.04
22

33
# This needs to generally match the container host's environment.
4-
ARG ROCM_VERSION=7.2
5-
ARG AMDGPU_VERSION=7.2
4+
ARG ROCM_VERSION=7.2.1
5+
ARG AMDGPU_VERSION=7.2.1
66

77
# Target the ROCm build image
88
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
@@ -12,11 +12,11 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1212

1313
# Unless otherwise specified, we make a fat build.
1414
# This is mostly tied to rocBLAS supported archs.
15-
# check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.2.0/reference/system-requirements.html
15+
# check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.2.1/reference/system-requirements.html
1616
# check https://rocm.docs.amd.com/projects/radeon-ryzen/en/latest/docs/compatibility/compatibilityrad/native_linux/native_linux_compatibility.html
1717
# check https://rocm.docs.amd.com/projects/radeon-ryzen/en/latest/docs/compatibility/compatibilityryz/native_linux/native_linux_compatibility.html
1818

19-
ARG ROCM_DOCKER_ARCH='gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1151;gfx1150;gfx1200;gfx1201'
19+
ARG ROCM_DOCKER_ARCH='gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1150;gfx1200;gfx1201'
2020

2121
# Set ROCm architectures
2222
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}

.github/labeler.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ IBM zDNN:
2727
- any-glob-to-any-file:
2828
- ggml/include/ggml-zdnn.h
2929
- ggml/src/ggml-zdnn/**
30+
AMD ZenDNN:
31+
- changed-files:
32+
- any-glob-to-any-file:
33+
- ggml/include/ggml-zendnn.h
34+
- ggml/src/ggml-zendnn/**
3035
documentation:
3136
- changed-files:
3237
- any-glob-to-any-file:

.github/workflows/build-self-hosted.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,27 @@ jobs:
213213
vulkaninfo --summary
214214
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
215215
216+
ggml-ci-win-intel-vulkan:
217+
runs-on: [self-hosted, Windows, X64, Intel]
218+
219+
steps:
220+
- name: Clone
221+
id: checkout
222+
uses: actions/checkout@v6
223+
224+
- name: Test
225+
id: ggml-ci
226+
shell: C:\msys64\usr\bin\bash.exe --noprofile --norc -eo pipefail "{0}"
227+
env:
228+
MSYSTEM: UCRT64
229+
CHERE_INVOKING: 1
230+
PATH: C:\msys64\ucrt64\bin;C:\msys64\usr\bin;C:\Windows\System32;${{ env.PATH }}
231+
run: |
232+
vulkaninfo --summary
233+
# Skip python related tests with GG_BUILD_LOW_PERF=1 since Windows MSYS2 UCRT64 currently fails to create
234+
# a valid python environment for testing
235+
LLAMA_FATAL_WARNINGS=OFF GG_BUILD_NINJA=1 GG_BUILD_VULKAN=1 GG_BUILD_LOW_PERF=1 ./ci/run.sh ./results/llama.cpp ./mnt/llama.cpp
236+
216237
ggml-ci-intel-openvino-gpu-low-perf:
217238
runs-on: [self-hosted, Linux, Intel, OpenVINO]
218239

.github/workflows/build-vulkan.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ jobs:
7272

7373
- name: Setup Vulkan SDK
7474
if: steps.cache-sdk.outputs.cache-hit != 'true'
75-
uses: ./.github/actions/linux-setup-vulkan-llvmpipe
75+
uses: ./.github/actions/linux-setup-vulkan
7676
with:
7777
path: ./vulkan_sdk
7878
version: ${{ env.VULKAN_SDK_VERSION }}

.github/workflows/build.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,7 @@ jobs:
472472
cmake -B build -S . \
473473
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
474474
-DGGML_HIP_ROCWMMA_FATTN=ON \
475+
-DGPU_TARGETS="gfx1030" \
475476
-DGGML_HIP=ON
476477
cmake --build build --config Release -j $(nproc)
477478
@@ -941,7 +942,7 @@ jobs:
941942
- name: Grab rocWMMA package
942943
id: grab_rocwmma
943944
run: |
944-
curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/7.2/pool/main/r/rocwmma-dev/rocwmma-dev_2.2.0.70200-43~24.04_amd64.deb"
945+
curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/7.2.1/pool/main/r/rocwmma-dev/rocwmma-dev_2.2.0.70201-81~24.04_amd64.deb"
945946
7z x rocwmma.deb
946947
7z x data.tar
947948
@@ -984,12 +985,13 @@ jobs:
984985
cmake -G "Unix Makefiles" -B build -S . `
985986
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
986987
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
987-
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-7.2.0/include/" `
988+
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-7.2.1/include/" `
988989
-DCMAKE_BUILD_TYPE=Release `
989990
-DLLAMA_BUILD_BORINGSSL=ON `
990991
-DROCM_DIR="${env:HIP_PATH}" `
991992
-DGGML_HIP=ON `
992993
-DGGML_HIP_ROCWMMA_FATTN=ON `
994+
-DGPU_TARGETS="gfx1100" `
993995
-DGGML_RPC=ON
994996
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
995997

.github/workflows/docker.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,10 @@ jobs:
7373
{ "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" },
7474
{ "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-arm" },
7575
{ "tag": "cpu", "dockerfile": ".devops/s390x.Dockerfile", "platforms": "linux/s390x", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-s390x" },
76-
{ "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
77-
{ "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
78-
{ "tag": "cuda13", "dockerfile": ".devops/cuda-new.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
79-
{ "tag": "cuda13", "dockerfile": ".devops/cuda-new.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
76+
{ "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "12.8.1", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
77+
{ "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "12.8.1", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
78+
{ "tag": "cuda13", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "13.1.1", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
79+
{ "tag": "cuda13", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "13.1.1", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
8080
{ "tag": "musa", "dockerfile": ".devops/musa.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
8181
{ "tag": "intel", "dockerfile": ".devops/intel.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
8282
{ "tag": "vulkan", "dockerfile": ".devops/vulkan.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" },

.github/workflows/hip-quality-check.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ env:
3535
jobs:
3636
ubuntu-22-hip-quality-check:
3737
runs-on: ubuntu-22.04
38-
container: rocm/dev-ubuntu-22.04:7.2
38+
container: rocm/dev-ubuntu-22.04:7.2.1
3939
steps:
4040
- name: Clone
4141
id: checkout
@@ -59,7 +59,7 @@ jobs:
5959
run: |
6060
cmake -B build -S . \
6161
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
62-
-DGPU_TARGETS=gfx908 \
62+
-DGPU_TARGETS=gfx942 \
6363
-DGGML_HIP=ON \
6464
-DGGML_HIP_EXPORT_METRICS=Off \
6565
-DCMAKE_HIP_FLAGS="-Werror -Wno-tautological-compare" \

.github/workflows/release.yml

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -639,8 +639,8 @@ jobs:
639639
strategy:
640640
matrix:
641641
include:
642-
- ROCM_VERSION: "7.2"
643-
gpu_targets: "gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1151;gfx1150;gfx1200;gfx1201"
642+
- ROCM_VERSION: "7.2.1"
643+
gpu_targets: "gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1150;gfx1200;gfx1201"
644644
build: 'x64'
645645

646646
steps:
@@ -662,7 +662,7 @@ jobs:
662662
sudo apt install -y build-essential git cmake wget
663663
664664
- name: Setup Legacy ROCm
665-
if: matrix.ROCM_VERSION == '7.2'
665+
if: matrix.ROCM_VERSION == '7.2.1'
666666
id: legacy_env
667667
run: |
668668
sudo mkdir --parents --mode=0755 /etc/apt/keyrings
@@ -683,7 +683,7 @@ jobs:
683683
sudo apt-get install -y libssl-dev rocm-hip-sdk
684684
685685
- name: Setup TheRock
686-
if: matrix.ROCM_VERSION != '7.2'
686+
if: matrix.ROCM_VERSION != '7.2.1'
687687
id: therock_env
688688
run: |
689689
wget https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx1151-${{ matrix.ROCM_VERSION }}.tar.gz
@@ -699,7 +699,6 @@ jobs:
699699
run: |
700700
cmake -B build -S . \
701701
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
702-
-DCMAKE_HIP_FLAGS="-mllvm --amdgpu-unroll-threshold-local=600" \
703702
-DCMAKE_BUILD_TYPE=Release \
704703
-DGGML_BACKEND_DL=ON \
705704
-DGGML_NATIVE=OFF \
@@ -717,17 +716,20 @@ jobs:
717716
id: tag
718717
uses: ./.github/actions/get-tag-name
719718

719+
- name: Get ROCm short version
720+
run: echo "ROCM_VERSION_SHORT=$(echo '${{ matrix.ROCM_VERSION }}' | cut -d '.' -f 1,2)" >> $GITHUB_ENV
721+
720722
- name: Pack artifacts
721723
id: pack_artifacts
722724
run: |
723725
cp LICENSE ./build/bin/
724-
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
726+
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
725727
726728
- name: Upload artifacts
727729
uses: actions/upload-artifact@v6
728730
with:
729-
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}.tar.gz
730-
name: llama-bin-ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}.tar.gz
731+
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.tar.gz
732+
name: llama-bin-ubuntu-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.tar.gz
731733

732734
windows-hip:
733735
runs-on: windows-2022
@@ -749,7 +751,7 @@ jobs:
749751
- name: Grab rocWMMA package
750752
id: grab_rocwmma
751753
run: |
752-
curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/7.2/pool/main/r/rocwmma-dev/rocwmma-dev_2.2.0.70200-43~24.04_amd64.deb"
754+
curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/7.2.1/pool/main/r/rocwmma-dev/rocwmma-dev_2.2.0.70201-81~24.04_amd64.deb"
753755
7z x rocwmma.deb
754756
7z x data.tar
755757
@@ -806,7 +808,7 @@ jobs:
806808
cmake -G "Unix Makefiles" -B build -S . `
807809
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
808810
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
809-
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-7.2.0/include/ -Wno-ignored-attributes -Wno-nested-anon-types" `
811+
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-7.2.1/include/ -Wno-ignored-attributes -Wno-nested-anon-types" `
810812
-DCMAKE_BUILD_TYPE=Release `
811813
-DGGML_BACKEND_DL=ON `
812814
-DGGML_NATIVE=OFF `

0 commit comments

Comments
 (0)