Skip to content

[IntelNav] ci: actually broaden AMDGPU_TARGETS #7

[IntelNav] ci: actually broaden AMDGPU_TARGETS

[IntelNav] ci: actually broaden AMDGPU_TARGETS #7

# IntelNav release — builds the IntelNav-patched libllama for each
# (backend x OS x arch) the Rust FFI crate (`intelnav-ggml`) expects to
# find under ~/.cache/intelnav/libllama/. Triggered on tag push matching
# `intelnav-v*` (so upstream ggml-org's own releases don't fire this),
# and manually via workflow_dispatch for dev iteration.
#
# Each job produces:
# libllama-<backend>-<os>-<arch>.tar.gz
# containing:
# bin/libllama.so* (or .dylib / .dll on other OSes)
# bin/libggml.so*
# bin/libggml-base.so*
# bin/libggml-cpu*.so (one or more, per CPU-variant build)
# bin/libggml-<backend>.so (ROCm / CUDA / Vulkan / SYCL)
# include/llama.h
# include/ggml.h
# include/ggml-*.h
# LICENSE
# INTELNAV_SHA (git rev of the fork — the FFI crate
# reads this to key its cache path)
#
# On tag push, all artifacts are uploaded to the matching GitHub Release.
# On workflow_dispatch, they stay as job artifacts for inspection.
#
# Scope today: Linux x86_64 (CPU + Vulkan + ROCm + CUDA), macOS
# arm64 (Metal), and Windows x86_64 (Vulkan). All six jobs run on
# free GitHub-hosted runners — no proprietary SDKs beyond what `apt`
# (Ubuntu), the Radeon / NVIDIA apt repos, LunarG's Vulkan SDK
# installer, and Apple's bundled toolchain provide.
#
# Still-deferred follow-ups: Linux SYCL (needs the Intel oneAPI base
# toolkit, multi-GB install) and Windows CPU/CUDA (CUDA needs the
# Windows CUDA Toolkit installer; CPU is a copy-paste of windows-x64-
# vulkan once we want it). The pack script handles both single-config
# (Linux/macOS) and multi-config (Visual Studio: build/bin/Release/)
# build layouts.
name: IntelNav release
on:
workflow_dispatch:
inputs:
make_release:
description: "Upload artifacts to a GitHub Release (requires a tag)."
type: boolean
default: false
push:
tags:
- 'intelnav-v*'
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
env:
# Common cmake args across all backends. Keep tools/server/examples
# OFF — consumers of intelnav-ggml link libllama + libggml* as a
# library and don't need the CLI tools.
CMAKE_COMMON: >-
-DLLAMA_BUILD_EXAMPLES=OFF
-DLLAMA_BUILD_TESTS=OFF
-DLLAMA_BUILD_TOOLS=OFF
-DLLAMA_BUILD_SERVER=OFF
-DCMAKE_BUILD_TYPE=Release
-DBUILD_SHARED_LIBS=ON
# ccache key suffix used by every job. Bump to bust.
CCACHE_GEN: v1
jobs:
# ---------------------------------------------------------------------
# linux-x64 CPU — universal fallback. Ships with GGML_CPU_ALL_VARIANTS
# so runtime dispatch picks SSE / AVX2 / AVX-512 as available.
# ---------------------------------------------------------------------
linux-x64-cpu:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v6
with: { fetch-depth: 0 }
- uses: ggml-org/ccache-action@v1.2.21
with:
key: intelnav-linux-x64-cpu-${{ env.CCACHE_GEN }}
evict-old-files: 1d
- name: Configure
run: |
cmake -B build \
${{ env.CMAKE_COMMON }} \
-DGGML_NATIVE=OFF \
-DGGML_CPU_ALL_VARIANTS=ON \
-DGGML_BACKEND_DL=ON
- name: Build
run: cmake --build build -j $(nproc) --target llama
- name: Package
run: .github/actions/intelnav-pack.sh linux-x64-cpu
shell: bash
- uses: actions/upload-artifact@v6
with:
name: libllama-linux-x64-cpu
path: dist/*.tar.gz
# ---------------------------------------------------------------------
# linux-x64 Vulkan — the "any GPU on Linux" backend. Covers AMD
# (without ROCm), Intel Arc, NVIDIA (without CUDA), and integrated
# graphics on every modern distro.
# ---------------------------------------------------------------------
linux-x64-vulkan:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v6
with: { fetch-depth: 0 }
- name: Install Vulkan SDK prereqs
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
build-essential curl tar xz-utils libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libssl-dev
# Pull the LunarG SDK rather than scraping apt packages that move
# around between Ubuntu LTSes. Matches upstream `build-vulkan.yml`.
- name: Install LunarG Vulkan SDK
run: |
set -e
ver="$(curl -fsSL https://vulkan.lunarg.com/sdk/latest/linux.txt)"
echo "Vulkan SDK version: $ver"
curl -fsSL "https://sdk.lunarg.com/sdk/download/${ver}/linux/vulkan_sdk.tar.xz" -o /tmp/vulkan.tar.xz
mkdir -p "$HOME/vulkan_sdk"
tar -xJf /tmp/vulkan.tar.xz -C "$HOME/vulkan_sdk" --strip-components=1
echo "VULKAN_SDK=$HOME/vulkan_sdk/x86_64" >> "$GITHUB_ENV"
echo "$HOME/vulkan_sdk/x86_64/bin" >> "$GITHUB_PATH"
echo "LD_LIBRARY_PATH=$HOME/vulkan_sdk/x86_64/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" >> "$GITHUB_ENV"
- uses: ggml-org/ccache-action@v1.2.21
with:
key: intelnav-linux-x64-vulkan-${{ env.CCACHE_GEN }}
evict-old-files: 1d
- name: Configure
run: |
cmake -B build \
${{ env.CMAKE_COMMON }} \
-DGGML_NATIVE=OFF \
-DGGML_VULKAN=ON \
-DGGML_BACKEND_DL=ON
- name: Build
run: cmake --build build -j $(nproc) --target llama
- name: Package
run: .github/actions/intelnav-pack.sh linux-x64-vulkan
shell: bash
- uses: actions/upload-artifact@v6
with:
name: libllama-linux-x64-vulkan
path: dist/*.tar.gz
# ---------------------------------------------------------------------
# linux-x64 ROCm — AMD Radeon (gfx9+) acceleration, the headline
# IntelNav win. AMDGPU_TARGETS covers every shipping Radeon arch
# IntelNav users are realistically running, with native bytecode for
# each so no HSA_OVERRIDE_GFX_VERSION dance is required at runtime.
# ---------------------------------------------------------------------
linux-x64-rocm:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v6
with: { fetch-depth: 0 }
- name: Install ROCm
run: |
# The GH jammy runner ships pre-existing rocm-* bits from the
# Ubuntu archive (e.g. rocm-cmake 5.0) which conflict with
# Radeon's own 6.x packages. Purge first, then pin Radeon's
# repo with apt preferences so the full 6.x stack resolves
# as a single coherent set.
sudo apt-get update
sudo apt-get install -y --no-install-recommends wget gnupg
sudo apt-get purge -y 'rocm-*' 'hip-*' 'miopen-*' 'hsa-*' 'hipblas*' 'rocblas*' 'rocfft*' || true
sudo apt-get autoremove -y || true
sudo mkdir -p /etc/apt/keyrings
wget -qO - https://repo.radeon.com/rocm/rocm.gpg.key \
| sudo gpg --dearmor -o /etc/apt/keyrings/rocm.gpg
# ROCm 6.3+ is required by ggml-cuda's vendor header — it
# references `__hip_fp8_e4m3` which older HIP runtimes don't
# define. Pin to a current point release on the 6.4 line.
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.4 jammy main" \
| sudo tee /etc/apt/sources.list.d/rocm.list
printf 'Package: *\nPin: origin "repo.radeon.com"\nPin-Priority: 600\n' \
| sudo tee /etc/apt/preferences.d/rocm-pin-600 >/dev/null
# Install a minimal HIP + BLAS set rather than the `rocm-dev`
# / `rocm-libs` metas — those pull in `rccl`, which 6.4
# hard-depends on AMD kernel driver libs (libdrm-amdgpu-*)
# that don't exist on GH-hosted runners. Our build only
# needs hipcc + HIP headers + hipBLAS + rocBLAS.
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
hipcc hip-dev rocm-cmake rocm-device-libs \
hipblas-dev rocblas-dev comgr hsa-rocr-dev \
rocminfo rocm-llvm
echo "/opt/rocm/bin" >> $GITHUB_PATH
echo "/opt/rocm/llvm/bin" >> $GITHUB_PATH
- uses: ggml-org/ccache-action@v1.2.21
with:
key: intelnav-linux-x64-rocm-${{ env.CCACHE_GEN }}
evict-old-files: 1d
- name: Configure
env:
# Native bytecode for every shipping Radeon arch we'd plausibly
# see in IntelNav users' machines. Build cost is ~30 % more
# than a narrow target list, but the resulting tarball runs
# without HSA_OVERRIDE_GFX_VERSION on any of them. The runtime
# `gpu_compat` shim still applies as belt-and-suspenders for
# arches that get added to consumer-grade hardware after a
# given tarball was cut.
# gfx9xx — Vega + CDNA1/2/3 (radeon vii, MI50/100/200/300)
# gfx103x — RDNA2 (RX 6x00, e.g. 6600 = gfx1032)
# gfx110x — RDNA3 (RX 7x00)
# gfx115x — RDNA3.5 (Strix Point APU iGPUs)
# gfx120x — RDNA4 (RX 9x00)
AMDGPU_TARGETS: "gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1031;gfx1032;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1200;gfx1201"
run: |
cmake -B build \
${{ env.CMAKE_COMMON }} \
-DGGML_NATIVE=OFF \
-DGGML_HIP=ON \
-DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \
-DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \
-DAMDGPU_TARGETS=$AMDGPU_TARGETS \
-DGGML_BACKEND_DL=ON
- name: Build
run: cmake --build build -j $(nproc) --target llama
- name: Package
run: .github/actions/intelnav-pack.sh linux-x64-rocm
shell: bash
- uses: actions/upload-artifact@v6
with:
name: libllama-linux-x64-rocm
path: dist/*.tar.gz
# ---------------------------------------------------------------------
# linux-x64 CUDA — NVIDIA acceleration. Uses Jimver's cuda-toolkit
# action to install a pinned CUDA version on the hosted runner
# (~3 GB download cached per runner). Test runs against this
# artifact require an NVIDIA GPU, so the intelnav repo exercises it
# only on self-hosted `[gpu, nvidia]` runners — but we publish the
# build unconditionally so those runners have something to pull.
# ---------------------------------------------------------------------
linux-x64-cuda:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v6
with: { fetch-depth: 0 }
# Install CUDA 12.6 from NVIDIA's own apt repo directly — the
# Jimver action's sub-package name for cublas drifted between
# CUDA 11 (cuda-cublas-*) and CUDA 12 (libcublas-*), and pinning
# the real package names keeps us independent of that.
- name: Install CUDA 12.6
run: |
set -e
sudo apt-get update
sudo apt-get install -y --no-install-recommends ca-certificates gnupg curl
sudo mkdir -p /etc/apt/keyrings
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub \
| sudo gpg --dearmor -o /etc/apt/keyrings/nvidia-cuda.gpg
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/nvidia-cuda.gpg] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /" \
| sudo tee /etc/apt/sources.list.d/nvidia-cuda.list
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
cuda-nvcc-12-6 cuda-cudart-dev-12-6 libcublas-dev-12-6 libcublas-12-6
echo "/usr/local/cuda-12.6/bin" >> $GITHUB_PATH
echo "CUDA_PATH=/usr/local/cuda-12.6" >> $GITHUB_ENV
- uses: ggml-org/ccache-action@v1.2.21
with:
key: intelnav-linux-x64-cuda-${{ env.CCACHE_GEN }}
evict-old-files: 1d
- name: Configure
run: |
# Narrow arch list keeps the CUDA build under the hosted
# runner's 6-hour ceiling and within practical ccache reuse.
# 86 covers RTX 30-series / A100-like; 89 covers RTX 40.
# Users with older (Turing sm_75) or newer (Hopper sm_90)
# hardware rebuild from source locally — add them back when
# there's real CI demand.
cmake -B build \
${{ env.CMAKE_COMMON }} \
-DGGML_NATIVE=OFF \
-DGGML_CUDA=ON \
-DCMAKE_CUDA_ARCHITECTURES="86;89" \
-DGGML_BACKEND_DL=ON
- name: Build
run: cmake --build build -j $(nproc) --target llama
- name: Package
run: .github/actions/intelnav-pack.sh linux-x64-cuda
shell: bash
- uses: actions/upload-artifact@v6
with:
name: libllama-linux-x64-cuda
path: dist/*.tar.gz
# ---------------------------------------------------------------------
# macOS arm64 Metal — Apple Silicon via Metal Performance Shaders.
# macos-14 is the first free runner with an M1; anything older is
# Intel and we'd build a different arch. Metal is on by default in
# mainline llama.cpp; we just need to keep it explicit.
# ---------------------------------------------------------------------
macos-arm64-metal:
runs-on: macos-14
steps:
- uses: actions/checkout@v6
with: { fetch-depth: 0 }
- uses: ggml-org/ccache-action@v1.2.21
with:
key: intelnav-macos-arm64-metal-${{ env.CCACHE_GEN }}
evict-old-files: 1d
- name: Configure
run: |
cmake -B build \
${{ env.CMAKE_COMMON }} \
-DGGML_NATIVE=OFF \
-DGGML_METAL=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
-DGGML_BACKEND_DL=ON
- name: Build
run: cmake --build build -j $(sysctl -n hw.ncpu) --target llama
- name: Package
run: .github/actions/intelnav-pack.sh macos-arm64-metal
shell: bash
- uses: actions/upload-artifact@v6
with:
name: libllama-macos-arm64-metal
path: dist/*.tar.gz
# ---------------------------------------------------------------------
# windows-x64 Vulkan — broad GPU coverage for Windows users without
# pulling proprietary SDKs. Vulkan covers AMD, Intel, NVIDIA, iGPUs;
# NVIDIA-only users will pick a CUDA artifact when that lands.
#
# Visual Studio is a multi-config generator: artifacts land at
# build/bin/Release/, which intelnav-pack.sh picks up. Recipe mirrors
# ggml-org/llama.cpp's release.yml `windows-latest-cmake` matrix
# entry for `vulkan/x64` — same VulkanSDK installer, same cmake args.
# ---------------------------------------------------------------------
windows-x64-vulkan:
runs-on: windows-2022
env:
# Pinned to match upstream's release.yml — bump together when
# LunarG ships a new SDK we want.
VULKAN_VERSION: 1.4.313.2
steps:
- uses: actions/checkout@v6
with: { fetch-depth: 0 }
- uses: ggml-org/ccache-action@v1.2.21
with:
key: intelnav-windows-x64-vulkan-${{ env.CCACHE_GEN }}
variant: ccache
evict-old-files: 1d
- name: Install Vulkan SDK
run: |
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe"
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
- name: Configure
run: cmake -B build ${{ env.CMAKE_COMMON }} -DGGML_NATIVE=OFF -DGGML_VULKAN=ON -DGGML_BACKEND_DL=ON
- name: Build
run: cmake --build build --config Release -j --target llama
- name: Package
run: .github/actions/intelnav-pack.sh windows-x64-vulkan
shell: bash
- uses: actions/upload-artifact@v6
with:
name: libllama-windows-x64-vulkan
path: dist/*.tar.gz
# ---------------------------------------------------------------------
# Upload to Release. Only runs on tag push or when
# workflow_dispatch.inputs.make_release is true.
# ---------------------------------------------------------------------
publish-release:
needs: [linux-x64-cpu, linux-x64-vulkan, linux-x64-rocm, linux-x64-cuda, macos-arm64-metal, windows-x64-vulkan]
if: startsWith(github.ref, 'refs/tags/intelnav-v') || inputs.make_release
runs-on: ubuntu-22.04
# `attestations: write` is required for GitHub's built-in
# actions/attest-build-provenance to sign the artifacts with a
# keyless sigstore identity scoped to this workflow run. Consumers
# — and specifically task #14's runtime downloader — verify the
# attestation before caching and dlopening the blob, closing the
# supply-chain RCE vector flagged in docs/dev/M0_AUDIT.md S1.
permissions:
contents: write
attestations: write
id-token: write
steps:
- uses: actions/checkout@v6
- name: Download artifacts
uses: actions/download-artifact@v6
with:
path: artifacts
- name: Collect artifacts
id: collect
run: |
mkdir -p dist
find artifacts -name '*.tar.gz' -exec cp {} dist/ \;
ls -la dist/
# Emit a newline-separated list for the attestor below.
{
echo "paths<<EOF"
ls dist/*.tar.gz
echo "EOF"
} >> "$GITHUB_OUTPUT"
- name: Generate build-provenance attestation
uses: actions/attest-build-provenance@v3
with:
subject-path: ${{ steps.collect.outputs.paths }}
- name: Publish
uses: softprops/action-gh-release@v2
with:
files: dist/*.tar.gz
fail_on_unmatched_files: true
generate_release_notes: true