Skip to content

[IntelNav] ci: install minimal HIP set, avoid rccl (needs kernel driver) #5

[IntelNav] ci: install minimal HIP set, avoid rccl (needs kernel driver)

[IntelNav] ci: install minimal HIP set, avoid rccl (needs kernel driver) #5

# IntelNav release — builds the IntelNav-patched libllama for each
# (backend x OS x arch) the Rust FFI crate (`intelnav-ggml`) expects to
# find under ~/.cache/intelnav/libllama/. Triggered on tag push matching
# `intelnav-v*` (so upstream ggml-org's own releases don't fire this),
# and manually via workflow_dispatch for dev iteration.
#
# Each job produces:
# libllama-<backend>-<os>-<arch>.tar.gz
# containing:
# bin/libllama.so* (or .dylib / .dll on other OSes)
# bin/libggml.so*
# bin/libggml-base.so*
# bin/libggml-cpu*.so (one or more, per CPU-variant build)
# bin/libggml-<backend>.so (ROCm / CUDA / Vulkan / SYCL)
# include/llama.h
# include/ggml.h
# include/ggml-*.h
# LICENSE
# INTELNAV_SHA (git rev of the fork — the FFI crate
# reads this to key its cache path)
#
# On tag push, all artifacts are uploaded to the matching GitHub Release.
# On workflow_dispatch, they stay as job artifacts for inspection.
#
# Scope for the first cut: Linux x86_64 only, with CPU + Vulkan + ROCm.
# Each is a free runner and none require proprietary SDKs beyond what
# `apt` and the ROCm/Vulkan repos provide. CUDA, Metal, SYCL, Windows,
# and macOS arm64 are follow-ups — deliberately left out so this
# workflow actually ships and starts producing artifacts this week.
name: IntelNav release
on:
workflow_dispatch:
inputs:
make_release:
description: "Upload artifacts to a GitHub Release (requires a tag)."
type: boolean
default: false
push:
tags:
- 'intelnav-v*'
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
env:
# Common cmake args across all backends. Keep tools/server/examples
# OFF — consumers of intelnav-ggml link libllama + libggml* as a
# library and don't need the CLI tools.
CMAKE_COMMON: >-
-DLLAMA_BUILD_EXAMPLES=OFF
-DLLAMA_BUILD_TESTS=OFF
-DLLAMA_BUILD_TOOLS=OFF
-DLLAMA_BUILD_SERVER=OFF
-DCMAKE_BUILD_TYPE=Release
-DBUILD_SHARED_LIBS=ON
# ccache key suffix used by every job. Bump to bust.
CCACHE_GEN: v1
jobs:
# ---------------------------------------------------------------------
# linux-x64 CPU — universal fallback. Ships with GGML_CPU_ALL_VARIANTS
# so runtime dispatch picks SSE / AVX2 / AVX-512 as available.
# ---------------------------------------------------------------------
linux-x64-cpu:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v6
with: { fetch-depth: 0 }
- uses: ggml-org/ccache-action@v1.2.21
with:
key: intelnav-linux-x64-cpu-${{ env.CCACHE_GEN }}
evict-old-files: 1d
- name: Configure
run: |
cmake -B build \
${{ env.CMAKE_COMMON }} \
-DGGML_NATIVE=OFF \
-DGGML_CPU_ALL_VARIANTS=ON \
-DGGML_BACKEND_DL=ON
- name: Build
run: cmake --build build -j $(nproc) --target llama
- name: Package
run: .github/actions/intelnav-pack.sh linux-x64-cpu
shell: bash
- uses: actions/upload-artifact@v6
with:
name: libllama-linux-x64-cpu
path: dist/*.tar.gz
# ---------------------------------------------------------------------
# linux-x64 Vulkan — the "any GPU on Linux" backend. Covers AMD
# (without ROCm), Intel Arc, NVIDIA (without CUDA), and integrated
# graphics on every modern distro.
# ---------------------------------------------------------------------
linux-x64-vulkan:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v6
with: { fetch-depth: 0 }
- name: Install Vulkan SDK prereqs
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
build-essential curl tar xz-utils libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libssl-dev
# Pull the LunarG SDK rather than scraping apt packages that move
# around between Ubuntu LTSes. Matches upstream `build-vulkan.yml`.
- name: Install LunarG Vulkan SDK
run: |
set -e
ver="$(curl -fsSL https://vulkan.lunarg.com/sdk/latest/linux.txt)"
echo "Vulkan SDK version: $ver"
curl -fsSL "https://sdk.lunarg.com/sdk/download/${ver}/linux/vulkan_sdk.tar.xz" -o /tmp/vulkan.tar.xz
mkdir -p "$HOME/vulkan_sdk"
tar -xJf /tmp/vulkan.tar.xz -C "$HOME/vulkan_sdk" --strip-components=1
echo "VULKAN_SDK=$HOME/vulkan_sdk/x86_64" >> "$GITHUB_ENV"
echo "$HOME/vulkan_sdk/x86_64/bin" >> "$GITHUB_PATH"
echo "LD_LIBRARY_PATH=$HOME/vulkan_sdk/x86_64/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" >> "$GITHUB_ENV"
- uses: ggml-org/ccache-action@v1.2.21
with:
key: intelnav-linux-x64-vulkan-${{ env.CCACHE_GEN }}
evict-old-files: 1d
- name: Configure
run: |
cmake -B build \
${{ env.CMAKE_COMMON }} \
-DGGML_NATIVE=OFF \
-DGGML_VULKAN=ON \
-DGGML_BACKEND_DL=ON
- name: Build
run: cmake --build build -j $(nproc) --target llama
- name: Package
run: .github/actions/intelnav-pack.sh linux-x64-vulkan
shell: bash
- uses: actions/upload-artifact@v6
with:
name: libllama-linux-x64-vulkan
path: dist/*.tar.gz
# ---------------------------------------------------------------------
# linux-x64 ROCm — AMD Radeon (gfx9+) acceleration, the headline
# IntelNav win. The user's RX 6600 (gfx1032) is covered by setting
# AMDGPU_TARGETS to include gfx1030 (handles gfx1031/1032 via override).
# ---------------------------------------------------------------------
linux-x64-rocm:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v6
with: { fetch-depth: 0 }
- name: Install ROCm
run: |
# The GH jammy runner ships pre-existing rocm-* bits from the
# Ubuntu archive (e.g. rocm-cmake 5.0) which conflict with
# Radeon's own 6.x packages. Purge first, then pin Radeon's
# repo with apt preferences so the full 6.x stack resolves
# as a single coherent set.
sudo apt-get update
sudo apt-get install -y --no-install-recommends wget gnupg
sudo apt-get purge -y 'rocm-*' 'hip-*' 'miopen-*' 'hsa-*' 'hipblas*' 'rocblas*' 'rocfft*' || true
sudo apt-get autoremove -y || true
sudo mkdir -p /etc/apt/keyrings
wget -qO - https://repo.radeon.com/rocm/rocm.gpg.key \
| sudo gpg --dearmor -o /etc/apt/keyrings/rocm.gpg
# ROCm 6.3+ is required by ggml-cuda's vendor header — it
# references `__hip_fp8_e4m3` which older HIP runtimes don't
# define. Pin to a current point release on the 6.4 line.
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.4 jammy main" \
| sudo tee /etc/apt/sources.list.d/rocm.list
printf 'Package: *\nPin: origin "repo.radeon.com"\nPin-Priority: 600\n' \
| sudo tee /etc/apt/preferences.d/rocm-pin-600 >/dev/null
# Install a minimal HIP + BLAS set rather than the `rocm-dev`
# / `rocm-libs` metas — those pull in `rccl`, which 6.4
# hard-depends on AMD kernel driver libs (libdrm-amdgpu-*)
# that don't exist on GH-hosted runners. Our build only
# needs hipcc + HIP headers + hipBLAS + rocBLAS.
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
hipcc hip-dev rocm-cmake rocm-device-libs \
hipblas-dev rocblas-dev comgr hsa-rocr-dev \
rocminfo rocm-llvm
echo "/opt/rocm/bin" >> $GITHUB_PATH
echo "/opt/rocm/llvm/bin" >> $GITHUB_PATH
- uses: ggml-org/ccache-action@v1.2.21
with:
key: intelnav-linux-x64-rocm-${{ env.CCACHE_GEN }}
evict-old-files: 1d
- name: Configure
env:
# gfx1030 bytecode is binary-compatible with gfx1031/1032 via
# HSA_OVERRIDE_GFX_VERSION=10.3.0 at run time — one artifact
# covers the RX 6600 / 6700 / 6800 family. Add more archs
# here as we get hardware to test on.
AMDGPU_TARGETS: "gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
run: |
cmake -B build \
${{ env.CMAKE_COMMON }} \
-DGGML_NATIVE=OFF \
-DGGML_HIP=ON \
-DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \
-DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \
-DAMDGPU_TARGETS=$AMDGPU_TARGETS \
-DGGML_BACKEND_DL=ON
- name: Build
run: cmake --build build -j $(nproc) --target llama
- name: Package
run: .github/actions/intelnav-pack.sh linux-x64-rocm
shell: bash
- uses: actions/upload-artifact@v6
with:
name: libllama-linux-x64-rocm
path: dist/*.tar.gz
# ---------------------------------------------------------------------
# linux-x64 CUDA — NVIDIA acceleration. Uses Jimver's cuda-toolkit
# action to install a pinned CUDA version on the hosted runner
# (~3 GB download cached per runner). Test runs against this
# artifact require an NVIDIA GPU, so the intelnav repo exercises it
# only on self-hosted `[gpu, nvidia]` runners — but we publish the
# build unconditionally so those runners have something to pull.
# ---------------------------------------------------------------------
linux-x64-cuda:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v6
with: { fetch-depth: 0 }
# Install CUDA 12.6 from NVIDIA's own apt repo directly — the
# Jimver action's sub-package name for cublas drifted between
# CUDA 11 (cuda-cublas-*) and CUDA 12 (libcublas-*), and pinning
# the real package names keeps us independent of that.
- name: Install CUDA 12.6
run: |
set -e
sudo apt-get update
sudo apt-get install -y --no-install-recommends ca-certificates gnupg curl
sudo mkdir -p /etc/apt/keyrings
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub \
| sudo gpg --dearmor -o /etc/apt/keyrings/nvidia-cuda.gpg
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/nvidia-cuda.gpg] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /" \
| sudo tee /etc/apt/sources.list.d/nvidia-cuda.list
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
cuda-nvcc-12-6 cuda-cudart-dev-12-6 libcublas-dev-12-6 libcublas-12-6
echo "/usr/local/cuda-12.6/bin" >> $GITHUB_PATH
echo "CUDA_PATH=/usr/local/cuda-12.6" >> $GITHUB_ENV
- uses: ggml-org/ccache-action@v1.2.21
with:
key: intelnav-linux-x64-cuda-${{ env.CCACHE_GEN }}
evict-old-files: 1d
- name: Configure
run: |
# Narrow arch list keeps the CUDA build under the hosted
# runner's 6-hour ceiling and within practical ccache reuse.
# 86 covers RTX 30-series / A100-like; 89 covers RTX 40.
# Users with older (Turing sm_75) or newer (Hopper sm_90)
# hardware rebuild from source locally — add them back when
# there's real CI demand.
cmake -B build \
${{ env.CMAKE_COMMON }} \
-DGGML_NATIVE=OFF \
-DGGML_CUDA=ON \
-DCMAKE_CUDA_ARCHITECTURES="86;89" \
-DGGML_BACKEND_DL=ON
- name: Build
run: cmake --build build -j $(nproc) --target llama
- name: Package
run: .github/actions/intelnav-pack.sh linux-x64-cuda
shell: bash
- uses: actions/upload-artifact@v6
with:
name: libllama-linux-x64-cuda
path: dist/*.tar.gz
# ---------------------------------------------------------------------
# macOS arm64 Metal — Apple Silicon via Metal Performance Shaders.
# macos-14 is the first free runner with an M1; anything older is
# Intel and we'd build a different arch. Metal is on by default in
# mainline llama.cpp; we just need to keep it explicit.
# ---------------------------------------------------------------------
macos-arm64-metal:
runs-on: macos-14
steps:
- uses: actions/checkout@v6
with: { fetch-depth: 0 }
- uses: ggml-org/ccache-action@v1.2.21
with:
key: intelnav-macos-arm64-metal-${{ env.CCACHE_GEN }}
evict-old-files: 1d
- name: Configure
run: |
cmake -B build \
${{ env.CMAKE_COMMON }} \
-DGGML_NATIVE=OFF \
-DGGML_METAL=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
-DGGML_BACKEND_DL=ON
- name: Build
run: cmake --build build -j $(sysctl -n hw.ncpu) --target llama
- name: Package
run: .github/actions/intelnav-pack.sh macos-arm64-metal
shell: bash
- uses: actions/upload-artifact@v6
with:
name: libllama-macos-arm64-metal
path: dist/*.tar.gz
# ---------------------------------------------------------------------
# Upload to Release. Only runs on tag push or when
# workflow_dispatch.inputs.make_release is true.
# ---------------------------------------------------------------------
publish-release:
needs: [linux-x64-cpu, linux-x64-vulkan, linux-x64-rocm, linux-x64-cuda, macos-arm64-metal]
if: startsWith(github.ref, 'refs/tags/intelnav-v') || inputs.make_release
runs-on: ubuntu-22.04
# `attestations: write` is required for GitHub's built-in
# actions/attest-build-provenance to sign the artifacts with a
# keyless sigstore identity scoped to this workflow run. Consumers
# — and specifically task #14's runtime downloader — verify the
# attestation before caching and dlopening the blob, closing the
# supply-chain RCE vector flagged in docs/dev/M0_AUDIT.md S1.
permissions:
contents: write
attestations: write
id-token: write
steps:
- uses: actions/checkout@v6
- name: Download artifacts
uses: actions/download-artifact@v6
with:
path: artifacts
- name: Collect artifacts
id: collect
run: |
mkdir -p dist
find artifacts -name '*.tar.gz' -exec cp {} dist/ \;
ls -la dist/
# Emit a newline-separated list for the attestor below.
{
echo "paths<<EOF"
ls dist/*.tar.gz
echo "EOF"
} >> "$GITHUB_OUTPUT"
- name: Generate build-provenance attestation
uses: actions/attest-build-provenance@v3
with:
subject-path: ${{ steps.collect.outputs.paths }}
- name: Publish
uses: softprops/action-gh-release@v2
with:
files: dist/*.tar.gz
fail_on_unmatched_files: true
generate_release_notes: true