Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 39 additions & 7 deletions .github/workflows/build-and-test-snapdragon.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,45 @@ jobs:
name: llama-cpp-android-arm64-snapdragon
path: pkg-snapdragon/llama.cpp

linux-iot-snapdragon:
runs-on: ubuntu-latest
container:
image: 'ghcr.io/snapdragon-toolchain/arm64-linux:v0.1'
defaults:
run:
shell: bash

steps:
- name: Clone
uses: actions/checkout@v6
with:
fetch-depth: 0
lfs: false

- name: Build Llama.CPP for Snapdragon Linux IoT
id: build_llama_cpp_snapdragon_linux
run: |
cp docs/backend/snapdragon/CMakeUserPresets.json .
cmake --preset arm64-linux-snapdragon-release -B build-snapdragon -DGGML_OPENCL=ON
cmake --build build-snapdragon -j $(nproc)
cmake --install build-snapdragon --prefix pkg-snapdragon/llama.cpp

- name: Upload Llama.CPP Snapdragon Linux IoT Build Artifact
if: ${{ always() && steps.build_llama_cpp_snapdragon_linux.outcome == 'success' }}
uses: actions/upload-artifact@v6
with:
name: llama-cpp-linux-arm64-snapdragon
path: pkg-snapdragon/llama.cpp

test-snapdragon-qdc:
name: Test on QDC Android Device (${{ matrix.device }})
needs: [android-ndk-snapdragon]
runs-on: ubuntu-slim
name: Test on QDC Device (${{ matrix.device }})
needs: [android-ndk-snapdragon, linux-iot-snapdragon]
runs-on: ubuntu-24.04-arm
timeout-minutes: 90
strategy:
fail-fast: false
matrix:
device: [SM8750, SM8650, SM8850]
device: [SM8750, SM8850, QCS9075M]

steps:
- name: Checkout
Expand All @@ -74,11 +105,11 @@ jobs:
- name: Download build artifact
uses: actions/download-artifact@v7
with:
name: llama-cpp-android-arm64-snapdragon
name: ${{ startsWith(matrix.device, 'QCS') && 'llama-cpp-linux-arm64-snapdragon' || 'llama-cpp-android-arm64-snapdragon' }}
path: pkg-snapdragon/llama.cpp

- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: '3.x'
cache: pip
Expand Down Expand Up @@ -107,7 +138,8 @@ jobs:
--test all \
--pkg-dir pkg-snapdragon/llama.cpp \
--model-url "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_0.gguf" \
--device ${{ matrix.device }}
--device ${{ matrix.device }} \
${{ startsWith(matrix.device, 'QCS') && '--retries 2 --retry-delay 300' || '' }}
env:
QDC_API_KEY: ${{ secrets.QDC_API_KEY }}

Expand Down
91 changes: 65 additions & 26 deletions .github/workflows/build-self-hosted.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,22 @@ env:
LLAMA_LOG_TIMESTAMPS: 1

jobs:
determine-tag:
name: Determine tag name
runs-on: ubuntu-slim
outputs:
tag_name: ${{ steps.tag.outputs.name }}
steps:
- name: Clone
uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Determine tag name
id: tag
uses: ./.github/actions/get-tag-name

ggml-ci-nvidia-cuda:
needs: determine-tag
runs-on: [self-hosted, Linux, NVIDIA]

steps:
Expand All @@ -65,11 +80,14 @@ jobs:

- name: Test
id: ggml-ci
env:
HF_WEBUI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
nvidia-smi
GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

ggml-ci-nvidia-vulkan-cm:
needs: determine-tag
runs-on: [self-hosted, Linux, NVIDIA]

steps:
Expand All @@ -79,11 +97,14 @@ jobs:

- name: Test
id: ggml-ci
env:
HF_WEBUI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
vulkaninfo --summary
GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

ggml-ci-nvidia-vulkan-cm2:
needs: determine-tag
runs-on: [self-hosted, Linux, NVIDIA, COOPMAT2]

steps:
Expand All @@ -93,39 +114,40 @@ jobs:

- name: Test
id: ggml-ci
env:
HF_WEBUI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
vulkaninfo --summary
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

# TODO: investigate slight precision issues in some operations for test-backend-ops on the WebGPU backend.
#ggml-ci-nvidia-webgpu:
# runs-on: [self-hosted, Linux, NVIDIA]
ggml-ci-nvidia-webgpu:
runs-on: [self-hosted, Linux, NVIDIA]

# steps:
# - name: Clone
# id: checkout
# uses: actions/checkout@v6
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6

# - name: Dawn Dependency
# id: dawn-depends
# run: |
# DAWN_VERSION="v20260317.182325"
# DAWN_OWNER="google"
# DAWN_REPO="dawn"
# DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-ubuntu-latest-Release"
# echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
# curl -L -o artifact.tar.gz \
# "https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
# mkdir dawn
# tar -xvf artifact.tar.gz -C dawn --strip-components=1
- name: Dawn Dependency
id: dawn-depends
run: |
DAWN_VERSION="v20260317.182325"
DAWN_OWNER="google"
DAWN_REPO="dawn"
DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-ubuntu-latest-Release"
echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
curl -L -o artifact.tar.gz \
"https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
mkdir dawn
tar -xvf artifact.tar.gz -C dawn --strip-components=1

# - name: Test
# id: ggml-ci
# run: |
# GG_BUILD_WEBGPU=1 \
# GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
# GG_BUILD_WEBGPU_DAWN_DIR="$GITHUB_WORKSPACE/dawn/lib64/cmake/Dawn" \
# bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
- name: Test
id: ggml-ci
run: |
GG_BUILD_WEBGPU=1 \
GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
GG_BUILD_WEBGPU_DAWN_DIR="$GITHUB_WORKSPACE/dawn/lib64/cmake/Dawn" \
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

# TODO: provision AMX-compatible machine
#ggml-ci-cpu-amx:
Expand Down Expand Up @@ -172,6 +194,7 @@ jobs:
# GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

ggml-ci-mac-metal:
needs: determine-tag
runs-on: [self-hosted, macOS, ARM64]

steps:
Expand All @@ -181,10 +204,13 @@ jobs:

- name: Test
id: ggml-ci
env:
HF_WEBUI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

ggml-ci-mac-webgpu:
needs: determine-tag
runs-on: [self-hosted, macOS, ARM64]

steps:
Expand All @@ -207,11 +233,14 @@ jobs:

- name: Test
id: ggml-ci
env:
HF_WEBUI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
GG_BUILD_WEBGPU=1 GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

ggml-ci-mac-vulkan:
needs: determine-tag
runs-on: [self-hosted, macOS, ARM64]

steps:
Expand All @@ -221,11 +250,14 @@ jobs:

- name: Test
id: ggml-ci
env:
HF_WEBUI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
vulkaninfo --summary
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

ggml-ci-linux-intel-vulkan:
needs: determine-tag
runs-on: [self-hosted, Linux, Intel]

steps:
Expand All @@ -237,11 +269,14 @@ jobs:

- name: Test
id: ggml-ci
env:
HF_WEBUI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
vulkaninfo --summary
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

ggml-ci-win-intel-vulkan:
needs: determine-tag
runs-on: [self-hosted, Windows, X64, Intel]

steps:
Expand All @@ -256,13 +291,15 @@ jobs:
MSYSTEM: UCRT64
CHERE_INVOKING: 1
PATH: C:\msys64\ucrt64\bin;C:\msys64\usr\bin;C:\Windows\System32;${{ env.PATH }}
HF_WEBUI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
vulkaninfo --summary
# Skip python related tests with GG_BUILD_LOW_PERF=1 since Windows MSYS2 UCRT64 currently fails to create
# a valid python environment for testing
LLAMA_FATAL_WARNINGS=OFF GG_BUILD_NINJA=1 GG_BUILD_VULKAN=1 GG_BUILD_LOW_PERF=1 ./ci/run.sh ./results/llama.cpp ./mnt/llama.cpp

ggml-ci-intel-openvino-gpu-low-perf:
needs: determine-tag
runs-on: [self-hosted, Linux, Intel, OpenVINO]

concurrency:
Expand Down Expand Up @@ -294,6 +331,8 @@ jobs:

- name: Test
id: ggml-ci
env:
HF_WEBUI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
source ./openvino_toolkit/setupvars.sh
GG_BUILD_OPENVINO=1 GGML_OPENVINO_DEVICE=GPU GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
4 changes: 3 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ Before submitting your PR:
- provide KL divergence data calculated vs. the FP16/BF16 (whichever is the native precision) version for both the new type as well as types of similar size
- provide [performance data](https://github.com/ggml-org/llama.cpp/tree/master/tools/llama-bench) for the new type in comparison to types of similar size on pure CPU
- Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly
- If you are a new contributor, limit your open PRs to 1.
- If you are a new contributor
- Limit your open PRs to 1
- Do not submit trivial fixes (e.g. typos, formatting changes)

After submitting your PR:
- Expect requests for modifications to ensure the code meets llama.cpp's standards for quality and long-term maintainability
Expand Down
Loading
Loading