diff --git a/.github/actions/cleanup-processes-linux/action.yml b/.github/actions/cleanup-processes-linux/action.yml
new file mode 100644
index 00000000000..58649fcc41b
--- /dev/null
+++ b/.github/actions/cleanup-processes-linux/action.yml
@@ -0,0 +1,19 @@
+name: 'Cleanup GPU Processes (Linux)'
+description: 'Kill zombie whisper/GPU processes on self-hosted Linux runners'
+
+runs:
+ using: 'composite'
+ steps:
+ - name: Kill zombie processes
+ shell: bash
+ run: |
+ echo "=== Cleaning up stale processes ==="
+ pkill -f "whisper-cli" 2>/dev/null || true
+ pkill -f "whisper-bench" 2>/dev/null || true
+ pkill -f "whisper-server" 2>/dev/null || true
+ pkill -f "ctest.*whisper" 2>/dev/null || true
+ if command -v rocm-smi &>/dev/null; then
+ echo "=== GPU process check ==="
+ rocm-smi --showpids 2>/dev/null || true
+ fi
+ echo "=== Cleanup complete ==="
diff --git a/.github/actions/cleanup-processes-windows/action.yml b/.github/actions/cleanup-processes-windows/action.yml
new file mode 100644
index 00000000000..91a9424dd22
--- /dev/null
+++ b/.github/actions/cleanup-processes-windows/action.yml
@@ -0,0 +1,15 @@
+name: 'Cleanup GPU Processes (Windows)'
+description: 'Kill zombie whisper/GPU processes on self-hosted Windows runners'
+
+runs:
+ using: 'composite'
+ steps:
+ - name: Kill zombie processes
+ shell: pwsh
+ run: |
+ Write-Host "=== Cleaning up stale processes ==="
+ $processNames = @("whisper-cli", "whisper-bench", "whisper-server", "ctest")
+ foreach ($name in $processNames) {
+ Get-Process -Name $name -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
+ }
+ Write-Host "=== Cleanup complete ==="
diff --git a/.github/workflows/bindings-go.yml b/.github/workflows/bindings-go.yml
deleted file mode 100644
index 83473e4636a..00000000000
--- a/.github/workflows/bindings-go.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-name: Bindings Tests (Go)
-on:
- push:
- paths:
- - bindings/go/**
- - whisper.h
- pull_request:
- paths:
- - bindings/go/**
- - whisper.h
-
-jobs:
- ubuntu-22:
- runs-on: ubuntu-22.04
- steps:
- - uses: actions/setup-go@v6
- with:
- go-version: '^1.23'
- - uses: actions/checkout@v6
- - run: |
- cd bindings/go
- make test
diff --git a/.github/workflows/bindings-ruby.yml b/.github/workflows/bindings-ruby.yml
deleted file mode 100644
index c3f158e26e4..00000000000
--- a/.github/workflows/bindings-ruby.yml
+++ /dev/null
@@ -1,21 +0,0 @@
-name: Bindings Tests (Ruby)
-
-on:
- push:
- branches:
- - master
- pull_request:
- types: [opened, synchronize, reopened]
-
-jobs:
- ubuntu-22:
- runs-on: ubuntu-22.04
- defaults:
- run:
- working-directory: bindings/ruby
- steps:
- - uses: ruby/setup-ruby@v1
- with:
- ruby-version: '3.2'
- - uses: actions/checkout@v6
- - run: rake test
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index fb115b22abb..5c4710b1663 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -1,77 +1,83 @@
-name: CI
+name: AMD Build & Release
+# ──────────────────────────────────────────────────────────────────────────────
+# Triggers
+# ──────────────────────────────────────────────────────────────────────────────
on:
- push:
- branches:
- - master
- tags:
- - 'v*'
- paths: ['.github/workflows/build.yml',
- '**/CMakeLists.txt',
- '**/Makefile',
- '**/*.mk',
- '**/*.cmake',
- '**/*.in',
- '**/*.h',
- '**/*.hpp',
- '**/*.c',
- '**/*.cpp',
- '**/*.cu',
- '**/*.cuh',
- '**/*.cl',
- '**/*.swift',
- '**/*.m',
- '**/*.mm',
- '**/*.metal',
- '**/*.comp',
- '**/*.java']
-
- pull_request:
- types: [opened, synchronize, reopened]
+ schedule:
+ - cron: '0 2 * * 1' # Weekly – every Monday at 02:00 UTC
workflow_dispatch:
inputs:
create_release:
- description: 'Create new release'
+ description: 'Create GitHub Release'
required: true
type: boolean
+ default: false
pre_release_tag:
- description: 'Pre-release tag name'
+ description: 'Pre-release tag name (optional, overrides auto-tag)'
required: false
type: string
run_type:
- description: 'Workflow type to run'
+ description: 'Workflow scope'
required: true
type: choice
options:
- - full-ci
- - release-only
+ - full-ci # all jobs
+ - release-only # release-producing jobs only
+ default: full-ci
+ gfx_targets:
+ description: 'ROCm GPU targets (comma-separated)'
+ required: false
+ type: string
+ default: 'gfx1151,gfx1150,gfx120X,gfx110X'
+ rocm_version:
+ description: 'ROCm version (e.g. 7.12.0)'
+ required: false
+ type: string
+ default: '7.12.0'
+ push:
+ tags:
+ - 'v*'
+ pull_request:
+ branches:
+ - master
+ - main
+ types: [opened, synchronize, reopened]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
permissions:
- contents: write # for creating release
+ contents: write
+# ──────────────────────────────────────────────────────────────────────────────
+# Shared environment
+# ──────────────────────────────────────────────────────────────────────────────
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
- ubuntu_image: "ubuntu:22.04"
+ GFX_TARGETS: ${{ github.event.inputs.gfx_targets || 'gfx1151,gfx1150,gfx120X,gfx110X' }}
+ ROCM_VERSION: ${{ github.event.inputs.rocm_version || '7.12.0' }}
+ FLEXML_URL: "https://github.com/lemonade-sdk/whisper.cpp/releases/download/deps/flexmlrt1.7.0-win.zip"
VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite"
jobs:
+
+# ════════════════════════════════════════════════════════════════════════════════
+# 0. Determine release tag
+# ════════════════════════════════════════════════════════════════════════════════
determine-tag:
runs-on: ubuntu-latest
outputs:
- tag_name: ${{ steps.tag.outputs.name }}
+ tag_name: ${{ steps.tag.outputs.name }}
+ version: ${{ steps.tag.outputs.version }}
should_release: ${{ steps.tag.outputs.should_release }}
-
steps:
- - name: Checkout with full history
- uses: actions/checkout@v6
+ - uses: actions/checkout@v4
with:
fetch-depth: 0
- - name: Determine tag name
+ - name: Determine tag and version
id: tag
shell: bash
run: |
@@ -80,1481 +86,1255 @@ jobs:
CUSTOM_TAG="${{ github.event.inputs.pre_release_tag }}"
SHOULD_RELEASE="false"
- echo "Raw values:"
- echo "BUILD_NUMBER: $BUILD_NUMBER"
- echo "SHORT_HASH: $SHORT_HASH"
- echo "BRANCH_NAME: ${{ env.BRANCH_NAME }}"
- echo "CUSTOM_TAG: $CUSTOM_TAG"
-
if [[ "${{ github.ref_type }}" == "tag" ]]; then
- echo "Using pushed tag name"
+ # Triggered by sync.yml pushing a vX.Y.Z tag — this is the primary release path
TAG_NAME="${{ github.ref_name }}"
SHOULD_RELEASE="true"
elif [[ -n "$CUSTOM_TAG" ]]; then
- echo "Using custom tag"
- TAG_NAME="${CUSTOM_TAG}"
+ TAG_NAME="$CUSTOM_TAG"
SHOULD_RELEASE="true"
elif [[ "${{ github.event.inputs.create_release }}" == "true" ]]; then
- echo "Manual release requested"
- SHOULD_RELEASE="true"
TAG_NAME="b${BUILD_NUMBER}"
- elif [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
- echo "Using master branch format"
+ SHOULD_RELEASE="true"
+ elif [[ "${{ env.BRANCH_NAME }}" == "main" || "${{ env.BRANCH_NAME }}" == "master" ]]; then
TAG_NAME="b${BUILD_NUMBER}"
SHOULD_RELEASE="false"
else
- echo "Using non-master branch format"
- SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
- TAG_NAME="${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}"
+ SAFE=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
+ TAG_NAME="${SAFE}-b${BUILD_NUMBER}-${SHORT_HASH}"
SHOULD_RELEASE="false"
fi
- echo "Final tag name: $TAG_NAME"
- echo "Should release: $SHOULD_RELEASE"
- echo "name=$TAG_NAME" >> $GITHUB_OUTPUT
- echo "should_release=$SHOULD_RELEASE" >> $GITHUB_OUTPUT
-
-
- ubuntu-22:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
- runs-on: ubuntu-22.04
-
- strategy:
- fail-fast: false
- matrix:
- arch: [linux/amd64, linux/ppc64le]
-
- steps:
- - name: Clone
- uses: actions/checkout@v6
-
- - name: Set up QEMU
- uses: docker/setup-qemu-action@v3
-
- - name: Build ${{ matrix.arch }}
- run: |
- docker run --platform ${{ matrix.arch }} --rm \
- -v ${{ github.workspace }}:/workspace \
- -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
- set -e
- export DEBIAN_FRONTEND=noninteractive
- sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
- sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-
- apt update
- apt install -y build-essential libsdl2-dev cmake git
- cmake -B build
- cmake --build build --config Release -j $(nproc)'
-
- ubuntu-22-arm64:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
- runs-on: ubuntu-22.04
-
- strategy:
- fail-fast: false
- matrix:
- arch: [linux/arm64]
-
- steps:
- - name: Clone
- uses: actions/checkout@v6
-
- - name: Set up QEMU
- uses: docker/setup-qemu-action@v3
-
- - name: Build ${{ matrix.arch }}
- run: |
- docker run --platform ${{ matrix.arch }} --rm \
- -v ${{ github.workspace }}:/workspace \
- -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
- set -e
- export DEBIAN_FRONTEND=noninteractive
- sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
- sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-
- apt update
- apt install -y build-essential libsdl2-dev cmake git
- cmake -B build -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8-a
- cmake --build build --config Release -j $(nproc)'
-
- ubuntu-22-arm-v7:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
- runs-on: ubuntu-22.04
-
- strategy:
- fail-fast: false
- matrix:
- arch: [linux/arm/v7]
-
- steps:
- - name: Clone
- uses: actions/checkout@v6
-
- - name: Set up QEMU
- uses: docker/setup-qemu-action@v3
+ # Version used in artifact filenames — keep leading 'v' to match lemonade expectations
+ # e.g. v1.8.4 → v1.8.4, b1234 → b1234
+ VERSION="${TAG_NAME}"
- - name: Build ${{ matrix.arch }}
- run: |
- docker run --platform ${{ matrix.arch }} --rm \
- -v ${{ github.workspace }}:/workspace \
- -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
- set -e
- export DEBIAN_FRONTEND=noninteractive
- sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
- sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-
- apt update
- apt install -y build-essential libsdl2-dev cmake git
- cmake -B build -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv7-a+fp
- cmake --build build --config Release -j $(nproc)'
-
- macOS-latest:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
- runs-on: macOS-latest
-
- strategy:
- matrix:
- destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
+ echo "name=$TAG_NAME" >> $GITHUB_OUTPUT
+ echo "version=$VERSION" >> $GITHUB_OUTPUT
+ echo "should_release=$SHOULD_RELEASE" >> $GITHUB_OUTPUT
+# ════════════════════════════════════════════════════════════════════════════════
+# 1. ROCm matrix (Linux + Windows per GFX target)
+# ════════════════════════════════════════════════════════════════════════════════
+ prepare-rocm-matrix:
+ runs-on: ubuntu-latest
+ outputs:
+ ubuntu_matrix: ${{ steps.m.outputs.ubuntu_matrix }}
+ windows_matrix: ${{ steps.m.outputs.windows_matrix }}
steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v6
-
- - name: ccache
- uses: hendrikmuhs/ccache-action@v1.2.16
- with:
- key: macOS-latest-swift
- evict-old-files: 1d
-
- - name: Dependencies
+ - name: Build matrix JSON
+ id: m
run: |
- brew update
- cmake --version
- brew install sdl2
+ targets="${{ env.GFX_TARGETS }}"
+ arr=$(echo "$targets" | tr ',' '\n' | sed 's/^ *//;s/ *$//' | jq -R . | jq -s .)
- - name: Build
- run: |
- sysctl -a
- cmake -B build -G Xcode \
- -DGGML_METAL_USE_BF16=ON \
- -DGGML_METAL_EMBED_LIBRARY=ON \
- -DWHISPER_BUILD_EXAMPLES=OFF \
- -DWHISPER_BUILD_TESTS=OFF \
- -DWHISPER_BUILD_SERVER=OFF \
- -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
- cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
-
-
-# freeBSD-latest:
-# runs-on: macos-13
-#
-# steps:
-# - name: Clone
-# uses: actions/checkout@v6
-#
-# - name: Build
-# uses: cross-platform-actions/action@v0.27.0
-# with:
-# operating_system: freebsd
-# version: '14.2'
-# run: |
-# sudo pkg update
-# sudo pkg install -y gmake sdl2 cmake git
-# cmake -B build
-# cmake --build build --config Release
-
- ubuntu-22-gcc:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
- runs-on: ubuntu-22.04
+ ubuntu_matrix=$(echo "$arr" | jq -c \
+ '{gfx_target: ., build: ["Release"], sdl2: ["ON"], arch: ["linux/amd64"]}')
- strategy:
- fail-fast: false
- matrix:
- build: [Debug, Release]
- arch: [linux/amd64, linux/ppc64le]
+ windows_matrix=$(echo "$arr" | jq -c \
+ '{gfx_target: ., build: ["Release"], sdl2: ["ON"], arch: ["x64"], s2arc: ["x64"], s2ver: ["2.28.5"]}')
- steps:
- - name: Clone
- uses: actions/checkout@v6
-
- - name: Set up QEMU
- uses: docker/setup-qemu-action@v3
-
- - name: Build ${{ matrix.arch }}
- run: |
- docker run --platform ${{ matrix.arch }} --rm \
- -v ${{ github.workspace }}:/workspace \
- -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
- set -e
- export DEBIAN_FRONTEND=noninteractive
- sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
- sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-
- apt update
- apt install -y build-essential cmake libsdl2-dev git
- cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
- make
- ctest -L gh --output-on-failure'
-
- ubuntu-22-gcc-arm64:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
- runs-on: ubuntu-22.04
+ echo "ubuntu_matrix=$ubuntu_matrix" >> $GITHUB_OUTPUT
+ echo "windows_matrix=$windows_matrix" >> $GITHUB_OUTPUT
- strategy:
- fail-fast: false
- matrix:
- build: [Debug, Release]
- arch: [linux/arm64]
-
- steps:
- - name: Clone
- uses: actions/checkout@v6
-
- - name: Set up QEMU
- uses: docker/setup-qemu-action@v3
-
- - name: Build ${{ matrix.arch }}
- run: |
- docker run --platform ${{ matrix.arch }} --rm \
- -v ${{ github.workspace }}:/workspace \
- -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
- set -e
- export DEBIAN_FRONTEND=noninteractive
- sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
- sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-
- apt update
- apt install -y build-essential cmake libsdl2-dev git
- cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8-a
- make
- ctest -L gh --output-on-failure'
-
- ubuntu-22-gcc-arm-v7:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
+# ════════════════════════════════════════════════════════════════════════════════
+# 2. ROCm — Linux
+# ════════════════════════════════════════════════════════════════════════════════
+ linux-rocm:
runs-on: ubuntu-22.04
-
- strategy:
- fail-fast: false
- matrix:
- build: [Debug, Release]
- arch: [linux/arm/v7]
-
- steps:
- - name: Clone
- uses: actions/checkout@v6
-
- - name: Set up QEMU
- uses: docker/setup-qemu-action@v3
-
- - name: Build ${{ matrix.arch }}
- run: |
- docker run --platform ${{ matrix.arch }} --rm \
- -v ${{ github.workspace }}:/workspace \
- -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
- set -e
- export DEBIAN_FRONTEND=noninteractive
- sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
- sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-
- apt update
- apt install -y build-essential cmake libsdl2-dev git
- cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv7-a+fp
- make
- ctest -L gh --output-on-failure'
-
- ubuntu-22-clang:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
- runs-on: ubuntu-22.04
-
- strategy:
- fail-fast: false
- matrix:
- build: [Debug, Release]
- #arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
- # TODO: arm/v7 disabled due to clang bug
- # https://github.com/ggerganov/whisper.cpp/actions/runs/9657764109/job/26637633042?pr=2256#step:4:1990
- arch: [linux/amd64, linux/arm64, linux/ppc64le]
-
- steps:
- - name: Clone
- uses: actions/checkout@v6
-
- - name: Set up QEMU
- uses: docker/setup-qemu-action@v3
-
- - name: Build ${{ matrix.arch }}
- run: |
- docker run --platform ${{ matrix.arch }} --rm \
- -v ${{ github.workspace }}:/workspace \
- -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
- set -e
- export DEBIAN_FRONTEND=noninteractive
- sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
- sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-
- apt update
- apt install -y clang build-essential cmake libsdl2-dev git
- cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
- make
- ctest -L gh --output-on-failure'
-
- ubuntu-22-gcc-sanitized:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
- runs-on: ubuntu-22.04
-
+ needs: [determine-tag, prepare-rocm-matrix]
strategy:
+ matrix: ${{ fromJson(needs.prepare-rocm-matrix.outputs.ubuntu_matrix) }}
fail-fast: false
- matrix:
- sanitizer: [ADDRESS, THREAD, UNDEFINED]
- arch: [linux/amd64]
steps:
- - name: Clone
- uses: actions/checkout@v6
-
- - name: Set up QEMU
- uses: docker/setup-qemu-action@v3
-
- - name: Build ${{ matrix.arch }}
- run: |
- docker run --platform ${{ matrix.arch }} --rm \
- -v ${{ github.workspace }}:/workspace \
- -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
- set -e
- export DEBIAN_FRONTEND=noninteractive
- sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
- sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-
- apt update
- apt install -y build-essential cmake git
- cmake . -DCMAKE_BUILD_TYPE=Debug \
- -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON \
- -DGGML_OPENMP=OFF
- make
- ctest -L gh --output-on-failure'
-
- ubuntu-22-cmake-sycl:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
- runs-on: ubuntu-22.04
-
- strategy:
- fail-fast: false
- matrix:
- dwhisper_sycl: [ON]
- dcmake_c_compiler: [icx]
- dcmake_cxx_compiler: [icpx]
- arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
-
- continue-on-error: true
-
- steps:
- - name: Clone
- uses: actions/checkout@v6
-
- - name: add oneAPI to apt
- shell: bash
- run: |
- cd /tmp
- wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
- sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
- rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
- sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
-
- - name: install oneAPI dpcpp compiler
- shell: bash
+ - name: Free disk space
run: |
- sudo apt update
- sudo apt install intel-oneapi-compiler-dpcpp-cpp git
+ sudo rm -rf /usr/local/lib/android /opt/ghc /usr/local/share/boost \
+ /usr/share/dotnet /usr/local/.ghcup /opt/hostedtoolcache/CodeQL
+ sudo docker image prune --all --force 2>/dev/null || true
- - name: install oneAPI MKL library
- shell: bash
- run: |
- sudo apt install intel-oneapi-mkl-devel git
+ - uses: actions/checkout@v4
- - name: Clone
- id: checkout
- uses: actions/checkout@v6
+ - name: Install dependencies
+ run: sudo apt-get update && sudo apt-get install -y cmake ninja-build curl build-essential libsdl2-dev git patchelf
- - name: Build
- id: cmake_build
+ - name: Download ROCm tarball
run: |
- source /opt/intel/oneapi/setvars.sh
- mkdir build
- cd build
- cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
- cmake --build . --config Release -j $(nproc)
-
- ubuntu-22-cmake-sycl-fp16:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
- runs-on: ubuntu-22.04
-
- strategy:
- fail-fast: false
- matrix:
- dwhisper_sycl: [ON]
- dcmake_c_compiler: [icx]
- dcmake_cxx_compiler: [icpx]
- arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
+ source ci/resolve-rocm-version.sh linux "${{ matrix.gfx_target }}" "${{ env.ROCM_VERSION }}"
+ echo "DETECTED_ROCM_VERSION=$ROCM_RESOLVED_VERSION" >> $GITHUB_ENV
+ sudo mkdir -p /opt/rocm
+ curl -L "$ROCM_TARBALL_URL" | sudo tar --use-compress-program=gzip -xf - -C /opt/rocm --strip-components=1
- continue-on-error: true
-
- steps:
- - name: Clone
- uses: actions/checkout@v6
-
- - name: add oneAPI to apt
- shell: bash
+ - name: Set ROCm env
run: |
- cd /tmp
- wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
- sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
- rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
- sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
+ echo "HIP_PATH=/opt/rocm" >> $GITHUB_ENV
+ echo "ROCM_PATH=/opt/rocm" >> $GITHUB_ENV
+ echo "HIP_PLATFORM=amd" >> $GITHUB_ENV
+ echo "/opt/rocm/bin:/opt/rocm/llvm/bin:$PATH" >> $GITHUB_PATH
- - name: install oneAPI dpcpp compiler
- shell: bash
+ - name: Find bitcode path
run: |
- sudo apt update
- sudo apt install intel-oneapi-compiler-dpcpp-cpp git
+ BITCODE_PATH=$(find /opt/rocm -type d -name bitcode -print -quit)
+ [ -z "$BITCODE_PATH" ] && { echo "::error::bitcode dir not found"; exit 1; }
+ echo "ROCM_BITCODE_PATH=$BITCODE_PATH" >> $GITHUB_ENV
- - name: install oneAPI MKL library
- shell: bash
+ - name: Configure CMake
run: |
- sudo apt install intel-oneapi-mkl-devel
-
- - name: Clone
- id: checkout
- uses: actions/checkout@v6
+ source ci/map-gpu-target.sh "${{ matrix.gfx_target }}"
+ cmake -S . -B build -G Ninja \
+ -DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \
+ -DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \
+ -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm --rocm-device-lib-path=${{ env.ROCM_BITCODE_PATH }}" \
+ -DCMAKE_PREFIX_PATH=/opt/rocm \
+ -DCMAKE_BUILD_TYPE=${{ matrix.build }} \
+ -DGPU_TARGETS="$MAPPED_GPU_TARGET" \
+ -DGGML_HIP=ON \
+ -DWHISPER_BUILD_SERVER=ON \
+ -DWHISPER_SDL2=${{ matrix.sdl2 }}
- name: Build
- id: cmake_build
run: |
- source /opt/intel/oneapi/setvars.sh
- mkdir build
- cd build
- cmake -DGGML_SYCL_F16=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
- cmake --build . --config Release -j $(nproc)
+ cmake --build build --config ${{ matrix.build }} -j$(nproc) > build.log 2>&1
+ exit_code=$?
+ grep -E "error:|FAILED|Linking|Built target|warning:" build.log || true
+ if [ $exit_code -ne 0 ]; then
+ echo "--- Last 100 lines of build log ---"
+ tail -100 build.log
+ echo "Build failed with exit code $exit_code"
+ exit $exit_code
+ fi
+ echo "Build succeeded."
- windows-msys2:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
- runs-on: windows-latest
+ - name: Verify build output
+ run: |
+ if [ ! -f build/bin/whisper-cli ]; then
+ echo "::error::whisper-cli not found - build likely truncated"
+ ls -lh build/bin/ 2>/dev/null || true
+ exit 1
+ fi
+ if [ ! -f build/bin/whisper-server ]; then
+ echo "::error::whisper-server not found - build likely truncated"
+ ls -lh build/bin/ 2>/dev/null || true
+ exit 1
+ fi
+ echo "Build output:"; ls -lh build/bin/whisper-cli build/bin/whisper-server
+
+ - name: Inspect shared library dependencies
+ run: |
+ echo "--- ldd whisper-cli ---"
+ ldd build/bin/whisper-cli || true
+ echo "--- ldd whisper-server ---"
+ ldd build/bin/whisper-server || true
+ echo "--- missing libs ---"
+ MISSING_CLI=$(ldd build/bin/whisper-cli 2>/dev/null | grep "not found" || true)
+ MISSING_SRV=$(ldd build/bin/whisper-server 2>/dev/null | grep "not found" || true)
+ if [ -z "$MISSING_CLI" ] && [ -z "$MISSING_SRV" ]; then
+ echo "All dependencies resolved."
+ else
+ [ -n "$MISSING_CLI" ] && echo "whisper-cli missing:" && echo "$MISSING_CLI"
+ [ -n "$MISSING_SRV" ] && echo "whisper-server missing:" && echo "$MISSING_SRV"
+ fi
+ echo "--- .so* files in build/bin/ ---"
+ ls -lh build/bin/*.so* 2>/dev/null || echo "(none)"
+
+ - name: Copy ROCm runtime libs
+ run: |
+ BIN="build/bin"
+ mkdir -p "$BIN"
+
+ cp -a build/src/libwhisper.so* "$BIN"/ 2>/dev/null || true
+ cp -a build/ggml/src/libggml.so* "$BIN"/ 2>/dev/null || true
+ cp -a build/ggml/src/libggml-base.so* "$BIN"/ 2>/dev/null || true
+ cp -a build/ggml/src/libggml-cpu.so* "$BIN"/ 2>/dev/null || true
+ cp -a build/ggml/src/ggml-hip/libggml-hip.so* "$BIN"/ 2>/dev/null || true
+ export LD_LIBRARY_PATH="$PWD/$BIN:/opt/rocm/lib:/opt/rocm/lib/llvm/lib:/opt/rocm/lib/rocm_sysdeps/lib:${LD_LIBRARY_PATH:-}"
+
+ ROCM_ROOTS="/opt/rocm/lib /opt/rocm/lib/llvm/lib /opt/rocm/lib/rocm_sysdeps/lib"
+
+ for binary in "$BIN"/*; do
+ [ -f "$binary" ] && [ -x "$binary" ] && file "$binary" | grep -q ELF || continue
+ ldd "$binary" 2>/dev/null | awk '{print $3}' | grep -E "^/opt/rocm" | while read lib; do
+ [ -f "$lib" ] || continue
+ cp -n "$lib" "$BIN/" 2>/dev/null || true
+ # Also copy the soname symlink if it exists alongside the real file
+ dir=$(dirname "$lib")
+ base=$(basename "$lib")
+ find "$dir" -maxdepth 1 -name "${base%%.*}.so*" -exec cp -Pn {} "$BIN/" \; 2>/dev/null || true
+ done
+ done
+
+ # Transitive deps: repeat ldd over any newly copied ROCm .so to catch indirect deps
+ for pass in 1 2; do
+ for lib in "$BIN"/lib*.so*; do
+ [ -f "$lib" ] && [ ! -L "$lib" ] || continue
+ ldd "$lib" 2>/dev/null | awk '{print $3}' | grep -E "^/opt/rocm" | while read dep; do
+ [ -f "$dep" ] || continue
+ cp -n "$dep" "$BIN/" 2>/dev/null || true
+ done
+ done
+ done
+
+ # Kernel library data dirs (loaded at runtime by path, not via soname)
+ [ -d /opt/rocm/lib/rocblas/library ] && { mkdir -p "$BIN/rocblas"; cp -r /opt/rocm/lib/rocblas/library "$BIN/rocblas/"; }
+ [ -d /opt/rocm/lib/hipblaslt/library ] && { mkdir -p "$BIN/hipblaslt"; cp -r /opt/rocm/lib/hipblaslt/library "$BIN/hipblaslt/"; }
+
+ - name: Set portable RPATH
+ run: |
+ cd build/bin
+ for f in *.so* whisper-*; do
+ [ -f "$f" ] && [ ! -L "$f" ] && file "$f" | grep -q ELF && patchelf --set-rpath '$ORIGIN' "$f" 2>/dev/null || true
+ done
+
+ - name: Package
+ run: |
+ VER="${{ needs.determine-tag.outputs.version }}"
+ ARCHIVE="whisper-${VER}-linux-rocm-${{ matrix.gfx_target }}.tar.gz"
+ STAGE="whisper-${VER}-linux-rocm-${{ matrix.gfx_target }}"
+ mkdir -p "$STAGE" && cp -r build/bin/* "$STAGE/"
+ tar -czf "$ARCHIVE" "$STAGE"
+ echo "ARCHIVE=$ARCHIVE" >> $GITHUB_ENV
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: ${{ env.ARCHIVE }}
+ path: ${{ env.ARCHIVE }}
+# ════════════════════════════════════════════════════════════════════════════════
+# 3. ROCm — Windows
+# ════════════════════════════════════════════════════════════════════════════════
+ windows-rocm:
+ runs-on: windows-2022
+ needs: [determine-tag, prepare-rocm-matrix]
strategy:
+ matrix: ${{ fromJson(needs.prepare-rocm-matrix.outputs.windows_matrix) }}
fail-fast: false
- matrix:
- include:
- - { sys: UCRT64, env: ucrt-x86_64, build: Release }
- - { sys: CLANG64, env: clang-x86_64, build: Release }
steps:
- - name: Clone
- uses: actions/checkout@v6
+ - uses: actions/checkout@v4
- - name: Setup ${{ matrix.sys }}
- uses: msys2/setup-msys2@v2
- with:
- update: true
- msystem: ${{matrix.sys}}
- install: >-
- base-devel
- git
- mingw-w64-${{matrix.env}}-toolchain
- mingw-w64-${{matrix.env}}-cmake
- mingw-w64-${{matrix.env}}-SDL2
- mingw-w64-${{matrix.env}}-openblas
+ - name: Install Ninja
+ shell: powershell
+ run: choco install ninja -y
- - name: Build using CMake
- shell: msys2 {0}
+ - name: Fetch SDL2 and patch header
+ if: matrix.sdl2 == 'ON'
+ shell: powershell
run: |
- cmake -B build -DWHISPER_SDL2=ON
- cmake --build build --config ${{ matrix.build }} -j $(nproc)
+ $url = "https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip"
+ Invoke-WebRequest -Uri $url -OutFile sdl2.zip
+ 7z x sdl2.zip
+ $cmake = Get-ChildItem -Recurse -Filter "sdl2-config.cmake" | Select-Object -First 1
+ if ($cmake) { echo "SDL2_DIR=$($cmake.DirectoryName)" >> $env:GITHUB_ENV }
+ else { Write-Error "sdl2-config.cmake not found"; exit 1 }
+ $hdr = Get-ChildItem -Recurse -Filter "SDL_endian.h" | Select-Object -First 1
+ if ($hdr) {
+ $c = Get-Content $hdr.FullName -Raw
+ if ($c -match 'extern void _m_prefetch') {
+ $c = $c -replace 'extern void _m_prefetch\(void \*__P\);','// extern void _m_prefetch(void *__P);'
+ Set-Content $hdr.FullName $c
+ }
+ } else { Write-Error "SDL_endian.h not found"; exit 1 }
+
+ - name: Download and extract ROCm tarball
+ shell: powershell
+ run: |
+ $gfx = "${{ matrix.gfx_target }}"
+ $ver = "${{ env.ROCM_VERSION }}"
+ $base = switch ($gfx) {
+ "gfx110X" { "gfx110X-all" }
+ "gfx120X" { "gfx120X-all" }
+ "gfx1150" { "gfx1150" }
+ "gfx1151" { "gfx1151" }
+ "gfx1152" { "gfx1152" }
+ default { $gfx }
+ }
+ $url = "https://repo.amd.com/rocm/tarball/therock-dist-windows-${base}-${ver}.tar.gz"
+ Write-Host "ROCm URL: $url"
+ "DETECTED_ROCM_VERSION=$ver" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
+ curl.exe -L --retry 3 --retry-delay 5 -o rocm.tar.gz $url
+ if ($LASTEXITCODE -ne 0) { Write-Error "curl failed with exit code $LASTEXITCODE"; exit 1 }
+ New-Item -ItemType Directory -Force -Path "C:\opt\rocm" | Out-Null
+ tar -xzf rocm.tar.gz -C C:\opt\rocm --strip-components=1
+
+ - name: Map GPU target
+ id: gpu
+ shell: powershell
+ run: |
+ $gfx = "${{ matrix.gfx_target }}"
+ $mapped = switch ($gfx) {
+ "gfx110X" { "gfx1100;gfx1101;gfx1102" }
+ "gfx120X" { "gfx1200;gfx1201" }
+ default { $gfx }
+ }
+ Write-Host "Mapped GPU target: $gfx -> $mapped"
+ "mapped=$mapped" | Out-File $env:GITHUB_OUTPUT -Append -Encoding utf8
+
+ - name: Configure CMake
+ shell: powershell
+ run: |
+ $env:HIP_PATH = "C:\opt\rocm"
+ $env:HIP_PLATFORM = "amd"
+ $env:PATH = "$env:HIP_PATH\bin;$env:HIP_PATH\lib\llvm\bin;$env:PATH"
+ cmake -S . -B build `
+ -G "Ninja Multi-Config" `
+ -DGPU_TARGETS="${{ steps.gpu.outputs.mapped }}" `
+ -DGGML_HIP=ON `
+ -DCMAKE_C_COMPILER="$env:HIP_PATH/lib/llvm/bin/amdclang.exe" `
+ -DCMAKE_CXX_COMPILER="$env:HIP_PATH/lib/llvm/bin/amdclang++.exe" `
+ -DCMAKE_HIP_COMPILER="$env:HIP_PATH/lib/llvm/bin/amdclang++.exe" `
+ "-DCMAKE_C_FLAGS='-D__PRFCHWINTRIN_H'" `
+ "-DCMAKE_CXX_FLAGS='-D__PRFCHWINTRIN_H'" `
+ "-DCMAKE_HIP_FLAGS=--rocm-path=C:/opt/rocm" `
+ -DCMAKE_PREFIX_PATH="$env:HIP_PATH" `
+ -DCMAKE_BUILD_TYPE=${{ matrix.build }} `
+ -DWHISPER_BUILD_SERVER=ON `
+ -DWHISPER_SDL2=${{ matrix.sdl2 }}
- - name: Clean after building using CMake
- shell: msys2 {0}
- run: |
- rm -rf build
+ - name: Build
+ shell: powershell
+ run: |
+ cmake --build build --config ${{ matrix.build }} -j $env:NUMBER_OF_PROCESSORS > build.log 2>&1
+ $exit = $LASTEXITCODE
+ # Show only errors and link steps - keeps log under GitHub's line limit
+ Get-Content build.log | Select-String -Pattern "error:|FAILED|Linking|Built target|warning: " | Write-Host
+ if ($exit -ne 0) {
+ Write-Host "--- Last 100 lines of build log ---"
+ Get-Content build.log -Tail 100
+ Write-Error "Build failed with exit code $exit"
+ exit $exit
+ }
+ Write-Host "Build succeeded."
+
+ - name: Copy ROCm DLLs
+ shell: powershell
+ run: |
+ $bin = "build/bin/${{ matrix.build }}"
+ $rocBin = "C:\opt\rocm\bin"
+ @("amdhip64_*.dll","amd_comgr*.dll","libhipblas.dll","rocblas.dll",
+ "rocsolver.dll","hipblaslt.dll","libhipblaslt.dll","hipblas.dll") | ForEach-Object {
+ Get-ChildItem $rocBin -Name $_ -ErrorAction SilentlyContinue |
+ ForEach-Object { Copy-Item (Join-Path $rocBin $_) (Join-Path $bin $_) }
+ }
+ $rocLib = Join-Path $rocBin "rocblas\library"
+ if (Test-Path $rocLib) { Copy-Item $rocLib -Destination (Join-Path $bin "rocblas\library") -Recurse -Force }
+ $hipLib = Join-Path $rocBin "hipblaslt\library"
+ if (Test-Path $hipLib) { Copy-Item $hipLib -Destination (Join-Path $bin "hipblaslt\library") -Recurse -Force }
- - name: Build using CMake w/ OpenBLAS
- shell: msys2 {0}
- run: |
- cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
- cmake --build build --config ${{ matrix.build }} -j $(nproc)
+ - name: Copy SDL2.dll
+ if: matrix.sdl2 == 'ON'
+ shell: powershell
+ run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" "build/bin/${{ matrix.build }}"
+
+ - name: Verify build output
+ shell: powershell
+ run: |
+ $bin = "build/bin/${{ matrix.build }}"
+ if (-not (Test-Path "$bin/whisper-cli.exe")) {
+ Write-Error "whisper-cli.exe not found in $bin - build likely truncated"
+ Get-ChildItem $bin -ErrorAction SilentlyContinue | Format-Table Name, Length
+ exit 1
+ }
+ if (-not (Test-Path "$bin/whisper-server.exe")) {
+ Write-Error "whisper-server.exe not found in $bin - build likely truncated"
+ Get-ChildItem $bin -ErrorAction SilentlyContinue | Format-Table Name, Length
+ exit 1
+ }
+ Write-Host "Build output:"
+ Get-ChildItem $bin -Filter "*.exe" | Format-Table Name, Length
+
+ - name: Package
+ shell: powershell
+ run: |
+ $a = "whisper-${{ needs.determine-tag.outputs.version }}-windows-rocm-${{ matrix.gfx_target }}.zip"
+ Compress-Archive -Path "build/bin/${{ matrix.build }}/*" -DestinationPath $a -Force
+ "ARCHIVE=$a" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: ${{ env.ARCHIVE }}
+ path: ${{ env.ARCHIVE }}
- windows:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
- runs-on: windows-latest
+# ════════════════════════════════════════════════════════════════════════════════
+# 4. Vulkan — Linux
+# ════════════════════════════════════════════════════════════════════════════════
+ linux-vulkan:
+ runs-on: ubuntu-latest
needs: determine-tag
- strategy:
- matrix:
- build: [Release]
- arch: [Win32, x64]
- sdl2: [ON]
- include:
- - arch: Win32
- s2arc: x86
- jnaPath: win32-x86
- - arch: x64
- s2arc: x64
- jnaPath: win32-x86-64
- - sdl2: ON
- s2ver: 2.28.5
-
steps:
- - name: Clone
- uses: actions/checkout@v6
+ - uses: actions/checkout@v4
- - name: Add msbuild to PATH
- uses: microsoft/setup-msbuild@v2
-
- - name: Fetch SDL2 and set SDL2_DIR
- if: matrix.sdl2 == 'ON'
+ - name: Install dependencies
run: |
- C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
- 7z x sdl2.zip
- echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
-
- - name: Configure
- run: >
- cmake -S . -B ./build -A ${{ matrix.arch }}
- -DCMAKE_BUILD_TYPE=${{ matrix.build }}
- -DBUILD_SHARED_LIBS=ON
- -DWHISPER_SDL2=${{ matrix.sdl2 }}
+ sudo apt-get update
+ sudo apt-get install -y build-essential cmake git libsdl2-dev pkg-config libvulkan-dev vulkan-tools
+ sudo apt-get install -y glslc || sudo apt-get install -y shaderc
- - name: Build
+ - name: Check Vulkan availability
run: |
- cd ./build
- msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
-
- - name: Copy SDL2.dll
- if: matrix.sdl2 == 'ON'
- run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
-
- - name: Upload SDL2.dll
- if: matrix.sdl2 == 'ON'
- uses: actions/upload-artifact@v6
- with:
- name: ${{ matrix.s2arc }}_SDL2.dll
- path: build/bin/${{ matrix.build }}/SDL2.dll
-
- - name: Upload whisper dll
- uses: actions/upload-artifact@v6
- with:
- name: whisper_${{ matrix.arch }}.dll
- path: build/bin/${{ matrix.build }}/whisper.dll
+ if ! command -v glslc >/dev/null 2>&1 && ! command -v glslangValidator >/dev/null 2>&1; then
+ echo "::error::No GLSL compiler found (glslc / shaderc)"; exit 1
+ fi
- - name: Upload ggml dll
- uses: actions/upload-artifact@v6
- with:
- name: ggml_${{ matrix.arch }}.dll
- path: build/bin/${{ matrix.build }}/ggml.dll
+ - name: Configure CMake
+ run: |
+ cmake -B build \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DGGML_NATIVE=OFF \
+ -DGGML_AVX=ON \
+ -DGGML_AVX2=ON \
+ -DGGML_FMA=ON \
+ -DGGML_AVX512=OFF \
+ -DCMAKE_C_FLAGS="-march=x86-64-v3" \
+ -DCMAKE_CXX_FLAGS="-march=x86-64-v3" \
+ -DGGML_VULKAN=ON \
+ -DWHISPER_BUILD_EXAMPLES=ON \
+ -DWHISPER_BUILD_TESTS=OFF \
+ -DWHISPER_BUILD_SERVER=ON
- - name: Upload ggml base dll
- uses: actions/upload-artifact@v6
- with:
- name: ggml_base_${{ matrix.arch }}.dll
- path: build/bin/${{ matrix.build }}/ggml-base.dll
+ - name: Build
+ run: cmake --build build --config Release -j$(nproc)
- - name: Upload ggml cpu dll
- uses: actions/upload-artifact@v6
- with:
- name: ggml_cpu_${{ matrix.arch }}.dll
- path: build/bin/${{ matrix.build }}/ggml-cpu.dll
+ - name: Validate Vulkan artifacts
+ run: |
+ VFILES=$(find build -type f \( -iname "*vulkan*.so*" -o -iname "*vulkan*" \) 2>/dev/null | wc -l)
+ if [ "$VFILES" -eq 0 ]; then
+ echo "::warning::No Vulkan-related artifacts found"
+ else
+ echo "Vulkan artifacts found: $VFILES file(s)"
+ fi
- - name: Pack bin artifacts
- shell: pwsh
+ - name: Package
run: |
- Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-bin-${{ matrix.arch }}.zip"
+ VER="${{ needs.determine-tag.outputs.version }}"
+ ARCHIVE="whisper-${VER}-linux-vulkan-x86_64.tar.gz"
+ STAGE="whisper-${VER}-linux-vulkan-x86_64"
+ mkdir -p "$STAGE"
+ cp -r build/bin/* "$STAGE/" 2>/dev/null || true
+ find build -name "*.so*" -exec cp {} "$STAGE/" \; 2>/dev/null || true
+ tar -czf "$ARCHIVE" "$STAGE"
+ echo "ARCHIVE=$ARCHIVE" >> $GITHUB_ENV
- - name: Upload binaries
- if: matrix.sdl2 == 'ON' && ${{ needs.determine-tag.outputs.should_release }}
- uses: actions/upload-artifact@v6
+ - uses: actions/upload-artifact@v4
with:
- name: whisper-bin-${{ matrix.arch }}.zip
- path: whisper-bin-${{ matrix.arch }}.zip
+ name: ${{ env.ARCHIVE }}
+ path: ${{ env.ARCHIVE }}
- windows-blas:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
+# ════════════════════════════════════════════════════════════════════════════════
+# 5. Vulkan — Windows
+# ════════════════════════════════════════════════════════════════════════════════
+ windows-vulkan:
runs-on: windows-latest
-
- strategy:
- matrix:
- build: [Release]
- arch: [Win32, x64]
- blas: [ON]
- sdl2: [ON]
- blasver: [0.3.29]
- include:
- - arch: Win32
- s2arc: x86
- blasfile: x86
- - arch: x64
- s2arc: x64
- blasfile: x64_64
- - sdl2: ON
- s2ver: 2.28.5
+ needs: determine-tag
steps:
- - name: Clone
- uses: actions/checkout@v6
-
- - name: Export GitHub Actions cache environment variables
- uses: actions/github-script@v8
- with:
- script: |
- core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
- core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
+ - uses: actions/checkout@v4
- - name: Add msbuild to PATH
- uses: microsoft/setup-msbuild@v2
+ - uses: microsoft/setup-msbuild@v2
- - name: Install OpenBLAS and pkgconfiglite
- if: matrix.blas == 'ON'
+ - name: Install Vulkan SDK
+ shell: pwsh
run: |
- Invoke-WebRequest "https://github.com/OpenMathLib/OpenBLAS/releases/download/v${{matrix.blasver}}/OpenBLAS-${{matrix.blasver}}_${{matrix.blasfile}}.zip" -OutFile "OpenBLAS-${{matrix.blasver}}.zip"
- Expand-Archive "OpenBLAS-${{matrix.blasver}}.zip" -DestinationPath "OpenBLAS-${{matrix.blasver}}"
- choco install pkgconfiglite
+ winget install --id KhronosGroup.VulkanSDK -e --silent --accept-package-agreements --accept-source-agreements
+ $sdk = Get-ChildItem "C:\VulkanSDK" -ErrorAction SilentlyContinue | Select-Object -First 1
+ if (-not $sdk) { throw "Vulkan SDK not found under C:\VulkanSDK" }
+ "VULKAN_SDK=$($sdk.FullName)" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
- - name: Fetch SDL2 and set SDL2_DIR
- if: matrix.sdl2 == 'ON'
+ - name: Fetch SDL2
+ shell: pwsh
run: |
- C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
+ C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-2.28.5/SDL2-devel-2.28.5-VC.zip
7z x sdl2.zip
- echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
-
- - name: Configure
- run: >
- cmake -S . -B ./build -A ${{ matrix.arch }}
- -DCMAKE_TOOLCHAIN_FILE="$env:VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake"
- -DCMAKE_BUILD_TYPE=${{ matrix.build }}
- -DGGML_BLAS=${{ matrix.blas }}
- -DGGML_BLAS_VENDOR=OpenBLAS
- -DBLAS_LIBRARIES="$env:GITHUB_WORKSPACE/OpenBLAS-${{matrix.blasver}}/lib/libopenblas.lib"
- -DBLAS_INCLUDE_DIRS="$env:GITHUB_WORKSPACE/OpenBLAS-${{matrix.blasver}}/include"
- -DWHISPER_SDL2=${{ matrix.sdl2 }}
+ "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-2.28.5/cmake" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
- - name: Build
+ - name: Configure CMake
+ shell: pwsh
run: |
- cd ./build
- msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
+ cmake -S . -B ./build -A x64 `
+ -DCMAKE_BUILD_TYPE=Release `
+ -DBUILD_SHARED_LIBS=ON `
+ -DGGML_VULKAN=ON `
+ -DWHISPER_BUILD_SERVER=ON `
+ -DWHISPER_SDL2=ON `
+ -DVULKAN_SDK="$env:VULKAN_SDK"
- - name: Copy openblas.dll
- if: matrix.blas == 'ON'
- run: copy "$env:GITHUB_WORKSPACE/OpenBLAS-${{matrix.blasver}}/bin/libopenblas.dll" build/bin/${{ matrix.build }}
+ - name: Build
+ run: cd ./build && msbuild ALL_BUILD.vcxproj -t:build -p:configuration=Release -p:platform=x64
- name: Copy SDL2.dll
- if: matrix.sdl2 == 'ON'
- run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
+ shell: pwsh
+ run: copy "$env:SDL2_DIR/../lib/x64/SDL2.dll" build/bin/Release
- - name: Pack bin artifacts
+ - name: Package
shell: pwsh
run: |
- Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-blas-bin-${{ matrix.arch }}.zip"
+ $a = "whisper-${{ needs.determine-tag.outputs.version }}-windows-vulkan-x64.zip"
+ Compress-Archive -Path "build/bin/Release/*" -DestinationPath $a -Force
+ "ARCHIVE=$a" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
- - name: Upload binaries
- if: matrix.blas == 'ON' && matrix.sdl2 == 'ON' && ${{ needs.determine-tag.outputs.should_release }}
- uses: actions/upload-artifact@v6
+ - uses: actions/upload-artifact@v4
with:
- name: whisper-blas-bin-${{ matrix.arch }}.zip
- path: whisper-blas-bin-${{ matrix.arch }}.zip
-
- windows-cublas:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
- runs-on: windows-2022
+ name: ${{ env.ARCHIVE }}
+ path: ${{ env.ARCHIVE }}
+
+# ════════════════════════════════════════════════════════════════════════════════
+# 6. NPU (VitisAI / RyzenAI) — Windows only (self-hosted runner)
+# ════════════════════════════════════════════════════════════════════════════════
+ windows-npu:
+ runs-on: [self-hosted, Windows, stx, rai300_400]
needs: determine-tag
- strategy:
- fail-fast: false
- matrix:
- build: [Release]
- arch: [x64]
- cublas: [ON]
- sdl2: [ON]
- cuda-toolkit: [12.4.0, 11.8.0]
- include:
- - arch: x64
- sdl2: ON
- sdl2_ver: 2.28.5
- steps:
- - name: Clone repository
- uses: actions/checkout@v6
-
- - name: Install Ninja
- id: install_ninja
- run: |
- choco install ninja
-
- - name: Install ccache
- uses: hendrikmuhs/ccache-action@v1.2.16
- with:
- key: ${{ github.job }}-${{ matrix.cuda-toolkit }}-${{ matrix.build }}
- variant: sccache
- evict-old-files: 5d
-
- - name: Install Cuda Toolkit 11.8.0
- if: ${{ matrix.cuda-toolkit == '11.8.0' }}
- run: |
- $CUDA_VERSION = ${{ matrix.cuda-toolkit }}
- $CUDA_TOOLKIT_DIR = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_VERSION"
- $CUDA_DOWNLOAD = "https://developer.download.nvidia.com/compute/cuda/redist"
-
- # Components versions
- $CUDART_VER = "11.8.89"
- $NVCC_VER = "11.8.89"
- $NVRTC_VER = "11.8.89"
- $CUBLAS_VER = "11.8.1.74"
- $NVTX_VER = "11.8.86"
- $VS_VER = "11.8.86"
- $NVPROF_VER = "11.8.87"
- $CCCL_VER = "11.8.89"
-
- # Create the directory where the CUDA Toolkit will be installed
- mkdir -p $CUDA_TOOLKIT_DIR
-
- # Install unzip to extract the downloaded files
- choco install unzip -y
-
- # Download all the required components
- curl -O "$CUDA_DOWNLOAD/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-${CUDART_VER}-archive.zip"
- curl -O "$CUDA_DOWNLOAD/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-${NVCC_VER}-archive.zip"
- curl -O "$CUDA_DOWNLOAD/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive.zip"
- curl -O "$CUDA_DOWNLOAD/libcublas/windows-x86_64/libcublas-windows-x86_64-${CUBLAS_VER}-archive.zip"
- curl -O "$CUDA_DOWNLOAD/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-${NVTX_VER}-archive.zip"
- curl -O "$CUDA_DOWNLOAD/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-${VS_VER}-archive.zip"
- curl -O "$CUDA_DOWNLOAD/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive.zip"
- curl -O "$CUDA_DOWNLOAD/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-${CCCL_VER}-archive.zip"
-
- # Extract all the downloaded files to the CUDA Toolkit directory
- unzip '*.zip' -d $CUDA_TOOLKIT_DIR
-
- # Copy all the extracted files to the main CUDA Toolkit directory
- xcopy "$CUDA_TOOLKIT_DIR\cuda_cudart-windows-x86_64-${CUDART_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
- xcopy "$CUDA_TOOLKIT_DIR\cuda_nvcc-windows-x86_64-${NVCC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
- xcopy "$CUDA_TOOLKIT_DIR\cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
- xcopy "$CUDA_TOOLKIT_DIR\libcublas-windows-x86_64-${CUBLAS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
- xcopy "$CUDA_TOOLKIT_DIR\cuda_nvtx-windows-x86_64-${NVTX_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
- xcopy "$CUDA_TOOLKIT_DIR\cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
- xcopy "$CUDA_TOOLKIT_DIR\cuda_cccl-windows-x86_64-${CCCL_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
- xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-
- # Visual Studio integration
- xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\v170\BuildCustomizations" /E /I /H /Y
-
- # Set environment variables
- echo "$CUDA_TOOLKIT_DIR\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- echo "$CUDA_TOOLKIT_DIR\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- echo "CUDA_PATH=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
- echo "CUDA_PATH_V11_8=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
-
- - name: Install Cuda Toolkit 12.4.0
- if: ${{ matrix.cuda-toolkit == '12.4.0' }}
- run: |
- $CUDA_VERSION = ${{ matrix.cuda-toolkit }}
- $CUDA_TOOLKIT_DIR = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_VERSION"
- $CUDA_DOWNLOAD = "https://developer.download.nvidia.com/compute/cuda/redist"
-
- # Components versions
- $CUDART_VER = "12.4.127"
- $NVCC_VER = "12.4.131"
- $NVRTC_VER = "12.4.127"
- $CUBLAS_VER = "12.4.5.8"
- $NVTX_VER = "12.4.127"
- $PROFILER_VER = "12.4.127"
- $VS_VER = "12.4.127"
- $NVPROF_VER = "12.4.128"
- $CCCL_VER = "12.4.127"
-
- # Create the directory where the CUDA Toolkit will be installed
- mkdir -p $CUDA_TOOLKIT_DIR
-
- # Install unzip to extract the downloaded files
- choco install unzip -y
-
- # Download all the required components
- curl -O "$CUDA_DOWNLOAD/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-${CUDART_VER}-archive.zip"
- curl -O "$CUDA_DOWNLOAD/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-${NVCC_VER}-archive.zip"
- curl -O "$CUDA_DOWNLOAD/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive.zip"
- curl -O "$CUDA_DOWNLOAD/libcublas/windows-x86_64/libcublas-windows-x86_64-${CUBLAS_VER}-archive.zip"
- curl -O "$CUDA_DOWNLOAD/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-${NVTX_VER}-archive.zip"
- curl -O "$CUDA_DOWNLOAD/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-${PROFILER_VER}-archive.zip"
- curl -O "$CUDA_DOWNLOAD/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-${VS_VER}-archive.zip"
- curl -O "$CUDA_DOWNLOAD/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive.zip"
- curl -O "$CUDA_DOWNLOAD/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-${CCCL_VER}-archive.zip"
-
- # Extract all the downloaded files to the CUDA Toolkit directory
- unzip -q '*.zip' -d $CUDA_TOOLKIT_DIR
-
- # Copy all the extracted files to the main CUDA Toolkit directory
- xcopy "$CUDA_TOOLKIT_DIR\cuda_cudart-windows-x86_64-${CUDART_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
- xcopy "$CUDA_TOOLKIT_DIR\cuda_nvcc-windows-x86_64-${NVCC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
- xcopy "$CUDA_TOOLKIT_DIR\cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
- xcopy "$CUDA_TOOLKIT_DIR\libcublas-windows-x86_64-${CUBLAS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
- xcopy "$CUDA_TOOLKIT_DIR\cuda_nvtx-windows-x86_64-${NVTX_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
- xcopy "$CUDA_TOOLKIT_DIR\cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
- xcopy "$CUDA_TOOLKIT_DIR\cuda_cccl-windows-x86_64-${CCCL_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
- xcopy "$CUDA_TOOLKIT_DIR\cuda_profiler_api-windows-x86_64-${PROFILER_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
- xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-
- # Visual Studio integration
- xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\v170\BuildCustomizations" /E /I /H /Y
-
- # Set environment variables
- echo "$CUDA_TOOLKIT_DIR\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- echo "$CUDA_TOOLKIT_DIR\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- echo "CUDA_PATH=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
- echo "CUDA_PATH_V12_2=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
-
- - name: Add msbuild to PATH
- uses: microsoft/setup-msbuild@v2
-
- - name: Install 7-Zip
- run: choco install 7zip -y
-
- - name: Fetch SDL2 and set SDL2_DIR
- if: matrix.sdl2 == 'ON'
- run: |
- Invoke-WebRequest -Uri https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.sdl2_ver }}/SDL2-devel-${{ matrix.sdl2_ver }}-VC.zip -OutFile sdl2.zip
- 7z x sdl2.zip
- echo "SDL2_DIR=${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" | Out-File -FilePath $env:GITHUB_ENV -Append
- echo "${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" > SDL2_PATH.txt
-
- - name: Install cmake
- run: choco install cmake
+ continue-on-error: true # runner may be offline; don't block release
- - name: Build Project
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: microsoft/setup-msbuild@v2
+
+ - name: Install CMake if not available
+ shell: powershell
+ run: |
+ $installed = Get-Command cmake -ErrorAction SilentlyContinue
+ if (-not $installed) {
+ $ver = "3.28.1"
+ $url = "https://github.com/Kitware/CMake/releases/download/v$ver/cmake-$ver-windows-x86_64.msi"
+ Invoke-WebRequest -Uri $url -OutFile cmake.msi
+ Start-Process msiexec.exe -ArgumentList "/i cmake.msi /quiet /norestart" -Wait
+ $p = "C:\Program Files\CMake\bin"
+ $env:PATH = "$p;$env:PATH"
+ echo $p >> $env:GITHUB_PATH
+ cmake --version
+ if ($LASTEXITCODE -ne 0) { Write-Error "CMake install failed"; exit 1 }
+ } else { cmake --version }
+
+ - name: Download FlexML Runtime
+ shell: powershell
+ run: |
+ Invoke-WebRequest -Uri "${{ env.FLEXML_URL }}" -OutFile flexmlrt.zip
+ if (-Not (Test-Path "flexmlrt.zip")) { Write-Error "flexmlrt.zip not downloaded"; exit 1 }
+ if ((Get-Item "flexmlrt.zip").Length -eq 0) { Write-Error "flexmlrt.zip is empty"; exit 1 }
+ Write-Host "FlexML: $([math]::Round((Get-Item 'flexmlrt.zip').Length/1MB,2)) MB downloaded"
+
+ - name: Extract FlexML Runtime
+ shell: powershell
+ run: |
+ tar xvf flexmlrt.zip
+ if ($LASTEXITCODE -ne 0) { Write-Error "Extraction failed"; exit 1 }
+ $dirs = Get-ChildItem -Directory | Where-Object { $_.Name -like "flexmlrt*" }
+ if (-not $dirs) { Write-Error "No flexmlrt directory found after extraction"; exit 1 }
+ Write-Host "Extracted: $($dirs.Name)"
+
+ - name: Setup FlexML, configure and build
shell: cmd
run: |
- call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
- cmake --version
- where cmake
- if "${{ matrix.cuda-toolkit }}" == "11.8.0" (
- set CUDA_FLAGS=-allow-unsupported-compiler -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH -D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR
- ) else (
- set CUDA_FLAGS=
- )
- cmake -S . -B build -G "Ninja Multi-Config" ^
- -DCMAKE_BUILD_TYPE=${{ matrix.build }} ^
- -DGGML_CUDA=${{ matrix.cublas }} ^
- -DWHISPER_SDL2=${{ matrix.sdl2 }} ^
- -DSDL2_DIR="%SDL2_DIR%" ^
- -DCMAKE_POLICY_VERSION_MINIMUM=3.5 ^
- -DCMAKE_CUDA_FLAGS="%CUDA_FLAGS%"
- set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
- cmake --build build --config ${{ matrix.build }} -j %NUMBER_OF_PROCESSORS%
-
- - name: Check sccache status after build
- run: |
- sccache --show-stats
-
- - name: Copy CUDA DLLs
- run: |
- Get-ChildItem "$env:CUDA_PATH\bin\" -Filter "*.dll" |
- Copy-Item -Destination "build/bin/${{ matrix.build }}"
-
- - name: Copy SDL2.dll
- if: matrix.sdl2 == 'ON'
- run: copy "$env:SDL2_DIR/../lib/${{ matrix.arch }}/SDL2.dll" build/bin/${{ matrix.build }}
-
- - name: Pack bin artifacts
- shell: pwsh
- run: |
- Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip"
-
- - name: Upload binaries
- if: ${{ needs.determine-tag.outputs.should_release }}
- uses: actions/upload-artifact@v6
+ cd flexmlrt
+ call setup.bat
+ if errorlevel 1 ( echo ERROR: FlexML setup.bat failed! & exit /b 1 )
+ cd ..
+ cmake -B build -A x64 -DCMAKE_BUILD_TYPE=Release -DWHISPER_VITISAI=ON -DWHISPER_BUILD_SERVER=ON
+ if errorlevel 1 ( echo ERROR: CMake configure failed! & exit /b 1 )
+ cmake --build build --config Release -j
+ if errorlevel 1 ( echo ERROR: Build failed! & exit /b 1 )
+
+ - name: List build output
+ shell: powershell
+ run: |
+ if (Test-Path "build/bin/Release") {
+ Get-ChildItem -Path "build/bin/Release" -Recurse | Format-Table Name, Length
+ } else { Write-Error "build/bin/Release not found"; exit 1 }
+
+ - name: Copy FlexML DLLs to build output
+ shell: powershell
+ run: |
+ $copied = 0
+ if (Test-Path "flexmlrt/bin") {
+ $d = Get-ChildItem -Path "flexmlrt/bin/*.dll" -ErrorAction SilentlyContinue
+ if ($d) { Copy-Item "flexmlrt/bin/*.dll" "build/bin/Release/" -Force; $copied += $d.Count }
+ }
+ if (Test-Path "flexmlrt/lib") {
+ $d = Get-ChildItem -Path "flexmlrt/lib/*.dll" -ErrorAction SilentlyContinue
+ if ($d) { Copy-Item "flexmlrt/lib/*.dll" "build/bin/Release/" -Force; $copied += $d.Count }
+ }
+ Write-Host "FlexML DLLs copied: $copied"
+
+ - name: Package
+ shell: powershell
+ run: |
+ $a = "whisper-${{ needs.determine-tag.outputs.version }}-windows-npu-x64.zip"
+ Compress-Archive -Path "build/bin/Release/*" -DestinationPath $a -Force
+ if (-not (Test-Path $a)) { Write-Error "Package creation failed"; exit 1 }
+ $mb = [math]::Round((Get-Item $a).Length/1MB,2)
+ Write-Host "Package: $a ($mb MB)"
+ "ARCHIVE=$a" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
+
+ - name: Build summary
+ shell: powershell
+ run: |
+ Write-Host "NPU build complete. Artifact: $env:ARCHIVE"
+
+ - uses: actions/upload-artifact@v4
with:
- name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip
- path: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip
-
- emscripten:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
- runs-on: ubuntu-22.04
+ name: ${{ env.ARCHIVE }}
+ path: ${{ env.ARCHIVE }}
- strategy:
- matrix:
- build: [Release]
+# ════════════════════════════════════════════════════════════════════════════════
+# 7. Metal — macOS (arm64)
+# ════════════════════════════════════════════════════════════════════════════════
+ macos-metal:
+ runs-on: macos-latest
+ needs: determine-tag
steps:
- - name: Clone
- uses: actions/checkout@v6
+ - uses: actions/checkout@v4
- - name: Setup emsdk
- uses: mymindstorm/setup-emsdk@v14
+ - name: Install dependencies
+ run: brew install cmake ninja
- - name: Verify
- run: emcc -v
+ - name: Configure CMake
+ run: |
+ cmake -B build \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DCMAKE_MACOSX_RPATH=ON \
+ -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
+ -DCMAKE_INSTALL_RPATH="@loader_path" \
+ -DGGML_METAL=ON \
+ -DWHISPER_BUILD_EXAMPLES=ON \
+ -DWHISPER_BUILD_TESTS=OFF \
+ -DWHISPER_BUILD_SERVER=ON
- name: Build
run: |
- emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
- make
+ cmake --build build --config Release -j$(sysctl -n hw.logicalcpu) > build.log 2>&1
+ exit_code=$?
+ grep -E "error:|FAILED|Linking|Built target" build.log || true
+ if [ $exit_code -ne 0 ]; then
+ tail -100 build.log
+ exit $exit_code
+ fi
+ echo "Build succeeded."
- ios-xcode-build:
- runs-on: macos-latest
- needs: determine-tag
+ - name: Verify build output
+ run: |
+ if [ ! -f build/bin/whisper-cli ]; then
+ echo "::error::whisper-cli not found"
+ ls -lh build/bin/ 2>/dev/null || true
+ exit 1
+ fi
- strategy:
- matrix:
- build: [Release]
+ if [ ! -f build/bin/whisper-server ]; then
+ echo "::error::whisper-server not found"
+ ls -lh build/bin/ 2>/dev/null || true
+ exit 1
+ fi
- steps:
- - name: Checkout code
- uses: actions/checkout@v6
+ echo "--- build/bin ---"
+ ls -lh build/bin/
- - name: Configure
- run: |
- cp models/for-tests-ggml-base.en.bin models/ggml-base.en.bin
- mkdir models/ggml-base.en-encoder.mlmodelc
+ echo "--- macOS dylibs produced by build ---"
+ find build -name "*.dylib" -print | sort
- - name: Build
- id: cmake_build
- run: |
- sysctl -a
- mkdir build
- cd build
- cmake -G Xcode .. \
- -DGGML_METAL_USE_BF16=ON \
- -DGGML_METAL_EMBED_LIBRARY=ON \
- -DWHISPER_BUILD_EXAMPLES=OFF \
- -DWHISPER_BUILD_TESTS=OFF \
- -DWHISPER_BUILD_SERVER=OFF \
- -DCMAKE_SYSTEM_NAME=iOS \
- -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
- -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
- cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
+ echo "--- whisper-server dependencies before packaging ---"
+ otool -L build/bin/whisper-server
- - name: xcodebuild for swift package
- id: xcodebuild
+ - name: Package
run: |
- ./build-xcframework.sh
+ set -euo pipefail
- - name: Build objc example
- run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO FRAMEWORK_FOLDER_PATH=./build-ios build
+ VER="${{ needs.determine-tag.outputs.version }}"
+ ARCHIVE="whisper-${VER}-darwin-metal-arm64.tar.gz"
+ STAGE="whisper-${VER}-darwin-metal-arm64"
- - name: Build swiftui example
- run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
+ rm -rf "$STAGE" "$ARCHIVE"
+ mkdir -p "$STAGE"
- - name: Pack artifacts
- id: pack_artifacts
- run: |
- zip --symlinks -r whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip build-apple/whisper.xcframework
+ cp -R build/bin/* "$STAGE/" 2>/dev/null || true
- - name: Upload artifacts
- if: ${{ needs.determine-tag.outputs.should_release }}
- uses: actions/upload-artifact@v6
- with:
- path: whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip
- name: whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip
+ # whisper-server depends on libwhisper / ggml dylibs that CMake may
+ # leave under build/src and build/ggml/src rather than build/bin.
+ # Package all produced dylibs next to the executables so @loader_path
+ # can resolve them on downstream machines and GitHub macOS runners.
+ while IFS= read -r lib; do
+ cp -P "$lib" "$STAGE/"
+ done < <(find build -name "*.dylib" -print | sort)
- android:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
- runs-on: ubuntu-22.04
+ # Make dylib lookup portable inside the extracted archive.
+ for target in "$STAGE"/whisper-* "$STAGE"/*.dylib; do
+ [ -e "$target" ] || continue
- steps:
- - name: Clone
- uses: actions/checkout@v6
- with:
- path: whisper
+ install_name_tool -add_rpath "@loader_path" "$target" 2>/dev/null || true
- - name: Install Java
- uses: actions/setup-java@v5
- with:
- distribution: zulu
- java-version: 21
+ if [ "${target##*.}" = "dylib" ] && [ ! -L "$target" ]; then
+ install_name_tool -id "@rpath/$(basename "$target")" "$target" 2>/dev/null || true
+ fi
- - name: Setup Android SDK
- uses: android-actions/setup-android@v3
+ while IFS= read -r dep; do
+ case "$dep" in
+ "$PWD"/build/*|/Users/runner/work/whisper.cpp-rocm/*)
+ install_name_tool -change "$dep" "@rpath/$(basename "$dep")" "$target" 2>/dev/null || true
+ ;;
+ esac
+ done < <(otool -L "$target" 2>/dev/null | awk 'NR > 1 {print $1}')
+ done
- - name: Build
- run: |
- cd whisper/examples/whisper.android
- ./gradlew assembleRelease --no-daemon
+ echo "--- packaged files ---"
+ find "$STAGE" -maxdepth 1 -type f -o -type l | sort
- - name: Build with external ggml
- run: |
- export PATH_TO_GGML=$PWD/ggml
- cd whisper/examples/whisper.android
- ./gradlew assembleRelease --no-daemon
+ echo "--- whisper-server dependencies after packaging ---"
+ otool -L "$STAGE/whisper-server"
+ otool -l "$STAGE/whisper-server" | grep -A2 LC_RPATH || true
- android_java:
- runs-on: ubuntu-22.04
+ if otool -L "$STAGE/whisper-server" | grep -q "/Users/runner/work/whisper.cpp-rocm"; then
+ echo "::error::whisper-server still references non-portable build paths"
+ exit 1
+ fi
- steps:
- - name: Clone
- uses: actions/checkout@v6
+ if ! find "$STAGE" -maxdepth 1 \( -type f -o -type l \) -name "libwhisper*.dylib" | grep -q .; then
+ echo "::error::packaged archive is missing libwhisper dylib"
+ exit 1
+ fi
- - name: set up JDK 11
- uses: actions/setup-java@v5
- with:
- java-version: '11'
- distribution: 'temurin'
- cache: gradle
+ set +e
+ DYLD_LIBRARY_PATH="$PWD/$STAGE" "$STAGE/whisper-server" --help > whisper-server-smoke.log 2>&1
+ smoke_status=$?
+ set -e
- - name: Setup Android SDK
- uses: android-actions/setup-android@v3
- with:
- cmdline-tools-version: 9.0
+ cat whisper-server-smoke.log
- - name: Build
- run: |
- cd examples/whisper.android.java
- chmod +x ./gradlew
- ./gradlew assembleRelease
+ if grep -q "Library not loaded" whisper-server-smoke.log; then
+ echo "::error::whisper-server has unresolved dylib dependencies"
+ exit 1
+ fi
- bindings-java:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
- needs: ['windows']
- runs-on: windows-latest
- steps:
- - uses: actions/checkout@v6
+ echo "whisper-server smoke command exited with status ${smoke_status}"
- - name: Install Java
- uses: actions/setup-java@v5
- with:
- distribution: zulu
- java-version: 20
+ tar -czf "$ARCHIVE" "$STAGE"
+ echo "ARCHIVE=$ARCHIVE" >> "$GITHUB_ENV"
- - name: Download Whisper Windows lib
- uses: actions/download-artifact@v7
+ - uses: actions/upload-artifact@v4
with:
- name: whisper_x64.dll
+ name: ${{ env.ARCHIVE }}
+ path: ${{ env.ARCHIVE }}
- - name: Download GGML Windows lib
- uses: actions/download-artifact@v7
- with:
- name: ggml_x64.dll
+# ════════════════════════════════════════════════════════════════════════════════
+# 8. CPU — Linux
+# ════════════════════════════════════════════════════════════════════════════════
+ linux-cpu:
+ runs-on: ubuntu-latest
+ needs: determine-tag
- - name: Download GGML Base Windows lib
- uses: actions/download-artifact@v7
- with:
- name: ggml_base_x64.dll
+ steps:
+ - uses: actions/checkout@v4
- - name: Download GGML CPU Windows lib
- uses: actions/download-artifact@v7
- with:
- name: ggml_cpu_x64.dll
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y build-essential cmake git libsdl2-dev pkg-config
+ echo "cmake $(cmake --version | head -1)"
+ echo "gcc $(gcc --version | head -1)"
+
+ - name: Configure CMake
+ run: |
+ cmake -B build \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DGGML_NATIVE=OFF \
+ -DGGML_AVX=ON \
+ -DGGML_AVX2=ON \
+ -DGGML_FMA=ON \
+ -DGGML_AVX512=OFF \
+ -DCMAKE_C_FLAGS="-march=x86-64-v3" \
+ -DCMAKE_CXX_FLAGS="-march=x86-64-v3" \
+ -DWHISPER_BUILD_EXAMPLES=ON \
+ -DWHISPER_BUILD_TESTS=OFF \
+ -DWHISPER_BUILD_SERVER=ON
- - name: Download SDL2.dll
- uses: actions/download-artifact@v7
- with:
- name: x64_SDL2.dll
+ - name: Build
+ run: cmake --build build --config Release -j$(nproc)
- - name: List downloaded files
- shell: pwsh
+ - name: List build output
run: |
- Get-ChildItem -Path "." -Recurse -Filter "*.dll"
+ find build/bin -type f | sort
+ find build/bin -type f -executable | while read f; do ls -lh "$f"; done
- - name: Move DLL to correct location
- shell: pwsh
+ - name: Package
run: |
- New-Item -Path "build\bin\Release" -ItemType Directory -Force
-
- Copy-Item -Path "whisper.dll" -Destination "build\bin\Release\whisper.dll" -Force
- Write-Host "Copied whisper.dll to build\bin\Release\whisper.dll directory"
+ VER="${{ needs.determine-tag.outputs.version }}"
+ ARCHIVE="whisper-${VER}-linux-cpu-x86_64.tar.gz"
+ STAGE="whisper-${VER}-linux-cpu-x86_64"
+ mkdir -p "$STAGE"
+ cp -r build/bin/* "$STAGE/" 2>/dev/null || true
+ find build -name "*.so*" -exec cp {} "$STAGE/" \; 2>/dev/null || true
+ printf "whisper.cpp CPU build for Linux\nDate: %s\nArch: %s\n" \
+ "$(date -u +"%Y-%m-%d %H:%M:%S UTC")" "$(uname -m)" > "$STAGE/README.txt"
+ tar -czf "$ARCHIVE" "$STAGE"
+ echo "ARCHIVE=$ARCHIVE" >> $GITHUB_ENV
- Copy-Item -Path "ggml.dll" -Destination "build\bin\Release\ggml.dll" -Force
- Write-Host "Copied ggml.dll to build\bin\Release\ggml.dll directory"
+ - uses: actions/upload-artifact@v4
+ with:
+ name: ${{ env.ARCHIVE }}
+ path: ${{ env.ARCHIVE }}
- Copy-Item -Path "ggml-base.dll" -Destination "build\bin\Release\ggml-base.dll" -Force
- Write-Host "Copied ggml-base.dll to build\bin\Release\ggml-base.dll directory"
+# ════════════════════════════════════════════════════════════════════════════════
+# 8. CPU — Windows
+# ════════════════════════════════════════════════════════════════════════════════
+ windows-cpu:
+ runs-on: windows-latest
+ needs: determine-tag
- Copy-Item -Path "ggml-cpu.dll" -Destination "build\bin\Release\ggml-cpu.dll" -Force
- Write-Host "Copied ggml-cpu.dll to build\bin\Release\ggml-cpu.dll directory"
+ steps:
+ - uses: actions/checkout@v4
- Copy-Item -Path "SDL2.dll" -Destination "build\bin\Release\SDL2.dll" -Force
- Write-Host "Copied SDL2.dll to build\bin\Release\SDL2.dll directory"
+ - uses: microsoft/setup-msbuild@v2
- - name: List build release files
+ - name: Fetch SDL2
shell: pwsh
run: |
- Get-ChildItem -Path "build\Release" -Recurse -Filter "*.dll"
-
- - name: Build
- run: |
- models\download-ggml-model.cmd tiny.en models/
- cd bindings/java
- chmod +x ./gradlew
- ./gradlew build --info
+ C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-2.28.5/SDL2-devel-2.28.5-VC.zip
+ 7z x sdl2.zip
+ "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-2.28.5/cmake" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
- - name: Pack jar artifacts
+ - name: Configure CMake
shell: pwsh
run: |
- Compress-Archive -Path "bindings/java/build/libs/whispercpp-*.jar" -DestinationPath "whispercpp.jar.zip"
+ cmake -S . -B ./build -A x64 `
+ -DCMAKE_BUILD_TYPE=Release `
+ -DGGML_NATIVE=OFF `
+ -DGGML_AVX=ON `
+ -DGGML_AVX2=ON `
+ -DGGML_FMA=ON `
+ -DGGML_AVX512=OFF `
+ -DBUILD_SHARED_LIBS=ON `
+ -DWHISPER_BUILD_SERVER=ON `
+ -DWHISPER_SDL2=ON
- - name: Upload jar
- uses: actions/upload-artifact@v6
- with:
- name: whispercpp.jar.zip
- path: whispercpp.jar.zip
-
-# - name: Publish package
-# if: ${{ github.ref == 'refs/heads/master' }}
-# uses: gradle/gradle-build-action@v2.4.2
-# with:
-# arguments: publish
-# build-root-directory: bindings/java
-# env:
-# MAVEN_USERNAME: ${{ secrets.JIRA_USER }}
-# MAVEN_PASSWORD: ${{ secrets.JIRA_PASS }}
-# PGP_SECRET: ${{ secrets.GPG_PRIVATE_KEY }}
-# PGP_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
-
- quantize:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
- runs-on: ubuntu-22.04
+ - name: Build
+ run: cd ./build && msbuild ALL_BUILD.vcxproj -t:build -p:configuration=Release -p:platform=x64
- steps:
- - name: Clone
- uses: actions/checkout@v6
+ - name: Copy SDL2.dll
+ shell: pwsh
+ run: copy "$env:SDL2_DIR/../lib/x64/SDL2.dll" build/bin/Release
- - name: Test quantize
+ - name: Package
+ shell: pwsh
run: |
- ./models/download-ggml-model.sh tiny.en
- cmake -B build
- cmake --build build --config Release
- ./build/bin/whisper-quantize models/ggml-tiny.en.bin models/ggml-tiny.en-q4_0.bin q4_0
-
- release:
- if: ${{ github.event.inputs.create_release == 'true' || github.event.inputs.pre_release_tag != '' || startsWith(github.ref, 'refs/tags/v') }}
-
- runs-on: ubuntu-latest
-
- needs:
- - determine-tag
- - ios-xcode-build
- - windows
- - windows-blas
- - windows-cublas
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v6
- with:
- fetch-depth: 0
+ $a = "whisper-${{ needs.determine-tag.outputs.version }}-windows-cpu-x64.zip"
+ Compress-Archive -Path "build/bin/Release/*" -DestinationPath $a -Force
+ "ARCHIVE=$a" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
- - name: ccache
- uses: hendrikmuhs/ccache-action@v1.2.16
+ - uses: actions/upload-artifact@v4
with:
- key: release
- evict-old-files: 1d
+ name: ${{ env.ARCHIVE }}
+ path: ${{ env.ARCHIVE }}
+
+# ════════════════════════════════════════════════════════════════════════════════
+# 9. Publish GitHub Release
+# ════════════════════════════════════════════════════════════════════════════════
+# Shared model download step (reused across all test jobs via inline steps)
+# Models: ggml-tiny.bin from HuggingFace ggerganov/whisper.cpp
+# ggml-tiny-encoder-vitisai.rai from amd/whisper-tiny-onnx-npu
+# ════════════════════════════════════════════════════════════════════════════════
+
+# ════════════════════════════════════════════════════════════════════════════════
+# 9. Test — CPU Windows (GitHub-hosted, no GPU needed)
+# ════════════════════════════════════════════════════════════════════════════════
+ test-cpu-windows:
+ runs-on: windows-latest
+ needs: [determine-tag, windows-cpu]
+ if: needs.windows-cpu.result == 'success'
+ continue-on-error: true
+ steps:
+ - uses: actions/checkout@v4
- # Downloads all the artifacts from the previous jobs
- - name: Download artifacts
- id: download-artifact
- uses: actions/download-artifact@v7
+ - name: Download artifact
+ uses: actions/download-artifact@v4
with:
+ name: whisper-${{ needs.determine-tag.outputs.version }}-windows-cpu-x64.zip
path: ./artifact
- - name: Move artifacts
- id: move_artifacts
- run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
-
- - name: Create release
- id: create_release
- uses: ggml-org/action-create-release@v1
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- with:
- tag_name: ${{ needs.determine-tag.outputs.tag_name }}
- prerelease: ${{ github.event.inputs.pre_release_tag != '' }}
- draft: true
-
- - name: Upload release
- id: upload_release
- uses: actions/github-script@v3
- with:
- github-token: ${{secrets.GITHUB_TOKEN}}
- script: |
- const path = require('path');
- const fs = require('fs');
- const release_id = '${{ steps.create_release.outputs.id }}';
- for (let file of await fs.readdirSync('./artifact/release')) {
- if (path.extname(file) === '.zip') {
- console.log('uploadReleaseAsset', file);
- await github.repos.uploadReleaseAsset({
- owner: context.repo.owner,
- repo: context.repo.repo,
- release_id: release_id,
- name: file,
- data: await fs.readFileSync(`./artifact/release/${file}`)
- });
- }
- }
-
- coreml-base-en:
- if: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/master') ||
- github.event.inputs.create_release == 'true' ||
- github.event.inputs.pre_release_tag != '' ||
- startsWith(github.ref, 'refs/tags/v') }}
- runs-on: macos-latest
- needs: determine-tag
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v6
-
- - name: Set environment variables
- id: set_vars
+ - name: Extract
+ shell: pwsh
run: |
- echo "MODEL_NAME=base.en" >> $GITHUB_ENV
- echo "GEN_MODEL_NAME=whisper-${{ needs.determine-tag.outputs.tag_name }}-ggml-base.en-encoder.mlmodelc" >> $GITHUB_ENV
+ New-Item -ItemType Directory -Force -Path bin | Out-Null
+ Expand-Archive -Path (Get-ChildItem artifact -Filter "*.zip" | Select-Object -First 1).FullName -DestinationPath bin -Force
- - name: Download model
+ - name: Download tiny model
+ shell: pwsh
run: |
- ./models/download-ggml-model.sh ${{ env.MODEL_NAME }}
+ New-Item -ItemType Directory -Force -Path models | Out-Null
+ Invoke-WebRequest -Uri "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -OutFile "models\ggml-tiny.bin"
- - name: Generate CoreML model
+ - name: Run and verify
+ shell: pwsh
run: |
- python3.11 -m venv venv
- source venv/bin/activate
- pip install ane_transformers openai-whisper coremltools
- ./models/generate-coreml-model.sh ${{ env.MODEL_NAME }}
+ .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result
+ $text = Get-Content "jfk-result.txt" -Raw
+ Write-Host $text
+ if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS" -ForegroundColor Green }
+ else { Write-Error "FAIL: expected words not found"; exit 1 }
- vad:
- if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
- github.event.inputs.run_type == 'full-ci' }}
+# ════════════════════════════════════════════════════════════════════════════════
+# 10. Test — CPU Linux (GitHub-hosted, no GPU needed)
+# ════════════════════════════════════════════════════════════════════════════════
+ test-cpu-linux:
runs-on: ubuntu-latest
-
+ needs: [determine-tag, linux-cpu]
+ if: needs.linux-cpu.result == 'success'
+ continue-on-error: true
steps:
- - name: Checkout
- uses: actions/checkout@v6
+ - uses: actions/checkout@v4
- - name: Build
- shell: bash
+ - name: Download artifact
+ uses: actions/download-artifact@v4
+ with:
+ name: whisper-${{ needs.determine-tag.outputs.version }}-linux-cpu-x86_64.tar.gz
+ path: ./artifact
+
+ - name: Extract
run: |
- cmake -B build
- cmake --build build --config Release
+ mkdir -p bin
+ tar -xzf artifact/*.tar.gz --strip-components=1 -C bin
+ chmod +x bin/whisper-cli
- - name: Test
- shell: bash
+ - name: Download tiny model
run: |
- ctest -R ^test-vad$ --test-dir build --output-on-failure -VV
+ mkdir -p models
+ curl -L -o models/ggml-tiny.bin "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin"
-# TODO: simplify the following workflows using a matrix
- ggml-ci-x64-cpu-low-perf:
- runs-on: ubuntu-22.04
+ - name: Run and verify
+ run: |
+ LD_LIBRARY_PATH=$(pwd)/bin:$LD_LIBRARY_PATH ./bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result
+ cat jfk-result.txt
+ grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt && echo "PASS" || { echo "FAIL"; exit 1; }
+# ════════════════════════════════════════════════════════════════════════════════
+# 11. Test — Vulkan Windows (stx-halo, has Vulkan driver)
+# ════════════════════════════════════════════════════════════════════════════════
+ test-vulkan-windows:
+ runs-on: [self-hosted, Windows, stx-halo]
+ needs: [determine-tag, windows-vulkan]
+ if: needs.windows-vulkan.result == 'success'
+ continue-on-error: true
steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v6
+ - uses: actions/checkout@v4
- - name: ccache
- uses: ggml-org/ccache-action@v1.2.16
+ - name: Download artifact
+ uses: actions/download-artifact@v4
with:
- key: ggml-ci-x64-cpu-low-perf
- evict-old-files: 1d
+ name: whisper-${{ needs.determine-tag.outputs.version }}-windows-vulkan-x64.zip
+ path: ./artifact
- - name: Dependencies
- id: depends
+ - name: Extract
+ shell: powershell
run: |
- sudo apt-get update
- sudo apt-get install build-essential libcurl4-openssl-dev
+ New-Item -ItemType Directory -Force -Path bin | Out-Null
+ Expand-Archive -Path (Get-ChildItem artifact -Filter "*.zip" | Select-Object -First 1).FullName -DestinationPath bin -Force
- - name: Test
- id: ggml-ci
+ - name: Download tiny model
+ shell: powershell
run: |
- LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+ New-Item -ItemType Directory -Force -Path models | Out-Null
+ Invoke-WebRequest -Uri "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -OutFile "models\ggml-tiny.bin"
- ggml-ci-arm64-cpu-low-perf:
- runs-on: ubuntu-22.04-arm
+ - name: Run and verify
+ shell: powershell
+ run: |
+ .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result
+ $text = Get-Content "jfk-result.txt" -Raw
+ Write-Host $text
+ if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS" }
+ else { Write-Error "FAIL: expected words not found"; exit 1 }
+# ════════════════════════════════════════════════════════════════════════════════
+# 12. Test — Vulkan Linux (stx-halo, has Vulkan driver)
+# ════════════════════════════════════════════════════════════════════════════════
+ test-vulkan-linux:
+ runs-on: [self-hosted, Linux, stx-halo]
+ needs: [determine-tag, linux-vulkan]
+ if: needs.linux-vulkan.result == 'success'
+ continue-on-error: true
steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v6
+ - uses: actions/checkout@v4
- - name: ccache
- uses: ggml-org/ccache-action@v1.2.16
+ - name: Download artifact
+ uses: actions/download-artifact@v4
with:
- key: ggml-ci-arm64-cpu-low-perf
- evict-old-files: 1d
+ name: whisper-${{ needs.determine-tag.outputs.version }}-linux-vulkan-x86_64.tar.gz
+ path: ./artifact
- - name: Dependencies
- id: depends
+ - name: Extract
run: |
- sudo apt-get update
- sudo apt-get install build-essential libcurl4-openssl-dev
+ mkdir -p bin
+ tar -xzf artifact/*.tar.gz --strip-components=1 -C bin
+ chmod +x bin/whisper-cli
- - name: Test
- id: ggml-ci
+ - name: Download tiny model
run: |
- LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+ mkdir -p models
+ curl -L -o models/ggml-tiny.bin "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin"
- ggml-ci-x64-cpu-high-perf:
- runs-on: ubuntu-22.04
+ - name: Run and verify
+ run: |
+ LD_LIBRARY_PATH=$(pwd)/bin:$LD_LIBRARY_PATH ./bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result
+ cat jfk-result.txt
+ grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt && echo "PASS" || { echo "FAIL"; exit 1; }
+# ════════════════════════════════════════════════════════════════════════════════
+# 13. Test — ROCm Windows (stx-halo, gfx1151)
+# ════════════════════════════════════════════════════════════════════════════════
+ test-rocm-windows:
+ runs-on: [self-hosted, Windows, stx-halo]
+ needs: [determine-tag, windows-rocm]
+ if: needs.windows-rocm.result == 'success'
+ continue-on-error: true
steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v6
+ - uses: actions/checkout@v4
- - name: ccache
- uses: ggml-org/ccache-action@v1.2.16
+ - name: Download ROCm artifact (gfx1151 - stx-halo GPU target)
+ uses: actions/download-artifact@v4
with:
- key: ggml-ci-x64-cpu-high-perf
- evict-old-files: 1d
+ name: whisper-${{ needs.determine-tag.outputs.version }}-windows-rocm-gfx1151.zip
+ path: ./artifact
- - name: Dependencies
- id: depends
+ - name: Extract
+ shell: powershell
run: |
- sudo apt-get update
- sudo apt-get install build-essential libcurl4-openssl-dev
+ New-Item -ItemType Directory -Force -Path bin | Out-Null
+ Expand-Archive -Path (Get-ChildItem artifact -Filter "*.zip" | Select-Object -First 1).FullName -DestinationPath bin -Force
- - name: Test
- id: ggml-ci
+ - name: Download tiny model
+ shell: powershell
run: |
- LLAMA_ARG_THREADS=$(nproc) bash ./ci/run.sh ./tmp/results ./tmp/mnt
+ New-Item -ItemType Directory -Force -Path models | Out-Null
+ Invoke-WebRequest -Uri "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -OutFile "models\ggml-tiny.bin"
- ggml-ci-arm64-cpu-high-perf:
- runs-on: ubuntu-22.04-arm
+ - name: Run and verify
+ shell: powershell
+ run: |
+ .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result
+ $text = Get-Content "jfk-result.txt" -Raw
+ Write-Host $text
+ if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS" }
+ else { Write-Error "FAIL: expected words not found"; exit 1 }
+# ════════════════════════════════════════════════════════════════════════════════
+# 14. Test — ROCm Linux (stx-halo, gfx1151)
+# ════════════════════════════════════════════════════════════════════════════════
+ test-rocm-linux:
+ runs-on: [self-hosted, Linux, stx-halo]
+ needs: [determine-tag, linux-rocm]
+ if: needs.linux-rocm.result == 'success'
+ continue-on-error: true
steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v6
+ - uses: actions/checkout@v4
- - name: ccache
- uses: ggml-org/ccache-action@v1.2.16
+ - name: Download ROCm artifact (gfx1151)
+ uses: actions/download-artifact@v4
with:
- key: ggml-ci-arm64-cpu-high-perf
- evict-old-files: 1d
+ name: whisper-${{ needs.determine-tag.outputs.version }}-linux-rocm-gfx1151.tar.gz
+ path: ./artifact
- - name: Dependencies
- id: depends
+ - name: Extract
run: |
- sudo apt-get update
- sudo apt-get install build-essential libcurl4-openssl-dev
+ mkdir -p bin
+ tar -xzf artifact/*.tar.gz --strip-components=1 -C bin
+ chmod +x bin/whisper-cli
- - name: Test
- id: ggml-ci
+ - name: Download tiny model
run: |
- LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+ mkdir -p models
+ curl -L -o models/ggml-tiny.bin "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin"
- ggml-ci-arm64-cpu-high-perf-sve:
- runs-on: ubuntu-22.04-arm
+ - name: Run and verify
+ run: |
+ LD_LIBRARY_PATH=$(pwd)/bin:$LD_LIBRARY_PATH ./bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result
+ cat jfk-result.txt
+ grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt && echo "PASS" || { echo "FAIL"; exit 1; }
+# ════════════════════════════════════════════════════════════════════════════════
+# 15. Test — NPU Windows (rai300_400 runner, needs FlexML + .rai model)
+# ════════════════════════════════════════════════════════════════════════════════
+ test-npu-windows:
+ runs-on: [self-hosted, Windows, stx, rai300_400]
+ needs: [determine-tag, windows-npu]
+ if: needs.windows-npu.result == 'success'
+ continue-on-error: true
steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v6
+ - uses: actions/checkout@v4
- - name: ccache
- uses: ggml-org/ccache-action@v1.2.16
+ - name: Download NPU artifact
+ uses: actions/download-artifact@v4
with:
- key: ggml-ci-arm64-cpu-high-perf-sve
- evict-old-files: 1d
+ name: whisper-${{ needs.determine-tag.outputs.version }}-windows-npu-x64.zip
+ path: ./artifact
- - name: Dependencies
- id: depends
+ - name: Extract
+ shell: powershell
run: |
- sudo apt-get update
- sudo apt-get install build-essential libcurl4-openssl-dev
+ New-Item -ItemType Directory -Force -Path bin | Out-Null
+ Expand-Archive -Path (Get-ChildItem artifact -Filter "*.zip" | Select-Object -First 1).FullName -DestinationPath bin -Force
- - name: Test
- id: ggml-ci
+ - name: Download FlexML Runtime and setup environment
+ shell: powershell
run: |
- LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
-
- ggml-ci-x64-nvidia-cuda:
- runs-on: [self-hosted, Linux, mnt-root, NVIDIA]
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v6
+ Invoke-WebRequest -Uri "${{ env.FLEXML_URL }}" -OutFile flexmlrt.zip
+ tar xvf flexmlrt.zip
+ Remove-Item flexmlrt.zip
- - name: Test
- id: ggml-ci
+ - name: Setup FlexML environment
+ shell: cmd
run: |
- nvidia-smi
- GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp
-
- ggml-ci-x64-nvidia-vulkan-cm:
- runs-on: [self-hosted, Linux, mnt-root, NVIDIA]
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v6
+ cd flexmlrt
+ call setup.bat
+ if errorlevel 1 ( echo ERROR: FlexML setup failed! & exit /b 1 )
- - name: Test
- id: ggml-ci
+ - name: Copy FlexML DLLs to bin
+ shell: powershell
run: |
- vulkaninfo --summary
- GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp
-
- ggml-ci-x64-nvidia-vulkan-cm2:
- runs-on: [self-hosted, Linux, mnt-root, NVIDIA, COOPMAT2]
-
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v6
+ $flexml = Get-ChildItem -Directory | Where-Object { $_.Name -like "flexmlrt*" } | Select-Object -First 1
+ foreach ($sub in @("bin","lib")) {
+ $path = Join-Path $flexml.FullName $sub
+ if (Test-Path $path) {
+ Get-ChildItem "$path\*.dll" -ErrorAction SilentlyContinue |
+ ForEach-Object { Copy-Item $_.FullName "bin\" -Force }
+ }
+ }
- - name: Test
- id: ggml-ci
+ - name: Download models (ggml weights + .rai NPU encoder)
+ shell: powershell
run: |
- vulkaninfo --summary
- GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp
-
- #ggml-ci-x64-cpu-amx:
- # runs-on: [self-hosted, Linux, X64, CPU, AMX]
+ New-Item -ItemType Directory -Force -Path models | Out-Null
+ Invoke-WebRequest -Uri "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -OutFile "models\ggml-tiny.bin"
+ Invoke-WebRequest -Uri "https://huggingface.co/amd/whisper-tiny-onnx-npu/resolve/main/ggml-tiny-encoder-vitisai.rai" -OutFile "models\ggml-tiny-encoder-vitisai.rai"
+ Write-Host "Models:"
+ Get-ChildItem models | Format-Table Name, Length
- # steps:
- # - name: Clone
- # id: checkout
- # uses: actions/checkout@v6
-
- # - name: Test
- # id: ggml-ci
- # run: |
- # bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp
+ - name: Run and verify (NPU encoder + CPU decoder)
+ shell: powershell
+ run: |
+ .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result
+ $text = Get-Content "jfk-result.txt" -Raw
+ Write-Host $text
+ if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS: transcription correct" -ForegroundColor Green }
+ else { Write-Error "FAIL: expected words not found"; exit 1 }
- ggml-ci-mac-metal:
- runs-on: [self-hosted, macOS, ARM64]
+# ════════════════════════════════════════════════════════════════════════════════
+# 16. Publish GitHub Release (only after all tests pass or are skipped)
+# ════════════════════════════════════════════════════════════════════════════════
+ release:
+ if: |
+ always() &&
+ needs.determine-tag.outputs.should_release == 'true' &&
+ (needs.test-cpu-windows.result == 'success' || needs.test-cpu-windows.result == 'skipped') &&
+ (needs.test-cpu-linux.result == 'success' || needs.test-cpu-linux.result == 'skipped') &&
+ (needs.test-vulkan-windows.result == 'success' || needs.test-vulkan-windows.result == 'skipped') &&
+ (needs.test-vulkan-linux.result == 'success' || needs.test-vulkan-linux.result == 'skipped') &&
+ (needs.test-rocm-windows.result == 'success' || needs.test-rocm-windows.result == 'skipped') &&
+ (needs.test-rocm-linux.result == 'success' || needs.test-rocm-linux.result == 'skipped') &&
+ (needs.test-npu-windows.result == 'success' || needs.test-npu-windows.result == 'skipped') &&
+ (needs.macos-metal.result == 'success' || needs.macos-metal.result == 'skipped')
+ runs-on: ubuntu-latest
+ needs:
+ - determine-tag
+ - linux-rocm
+ - windows-rocm
+ - linux-vulkan
+ - windows-vulkan
+ - windows-npu
+ - macos-metal
+ - linux-cpu
+ - windows-cpu
+ - test-cpu-windows
+ - test-cpu-linux
+ - test-vulkan-windows
+ - test-vulkan-linux
+ - test-rocm-windows
+ - test-rocm-linux
+ - test-npu-windows
steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v6
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
- - name: Test
- id: ggml-ci
+ - name: Download all artifacts
+ uses: actions/download-artifact@v4
+ with:
+ path: ./artifacts
+
+ - name: Flatten artifacts into release/
run: |
- GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/whisper.cpp ~/mnt/whisper.cpp
+ mkdir -p release
+ find ./artifacts -mindepth 2 \( -name '*.zip' -o -name '*.tar.gz' \) -exec mv {} release/ \;
+ echo "Release assets:"
+ ls -lh release/
- ggml-ci-mac-vulkan:
- runs-on: [self-hosted, macOS, ARM64]
+ - name: Delete existing release/tag if present
+ env:
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: |
+ TAG="${{ needs.determine-tag.outputs.tag_name }}"
+ RELEASE_ID=$(gh api repos/${{ github.repository }}/releases/tags/"$TAG" --jq '.id' 2>/dev/null || true)
+ if [ -n "$RELEASE_ID" ]; then
+ echo "Deleting existing release $RELEASE_ID for tag $TAG"
+ gh api -X DELETE repos/${{ github.repository }}/releases/"$RELEASE_ID"
+ fi
+ git push --delete origin "refs/tags/$TAG" 2>/dev/null || true
- steps:
- - name: Clone
- id: checkout
- uses: actions/checkout@v6
+ - name: Create release
+ id: create_release
+ uses: ggml-org/action-create-release@v1
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ with:
+ tag_name: ${{ needs.determine-tag.outputs.tag_name }}
+ release_name: "whisper.cpp ${{ needs.determine-tag.outputs.tag_name }} - AMD Builds"
+ prerelease: ${{ github.event.inputs.pre_release_tag != '' }}
+ draft: false
+ body: |
+ ## AMD whisper.cpp ${{ needs.determine-tag.outputs.tag_name }}
+
+ AMD-based pre-built binaries of [whisper.cpp ${{ needs.determine-tag.outputs.tag_name }}](https://github.com/ggerganov/whisper.cpp/releases/tag/${{ needs.determine-tag.outputs.tag_name }}) with full hardware acceleration across ROCm GPU (iGPU and dGPU), NPU (RyzenAI), and CPU — for Linux and Windows.
+ All ROCm runtime libraries (ROCm ${{ env.ROCM_VERSION }}) are bundled. No drivers or separate installs required — download, extract, and run.
+
+ ### Packages
+
+ | Target | Linux | Windows |
+ |---|---|---|
+ | ROCm gfx1151 (Ryzen AI MAX+ Pro 395) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-rocm-gfx1151.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-rocm-gfx1151.zip` |
+ | ROCm gfx1150 (Ryzen AI 300) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-rocm-gfx1150.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-rocm-gfx1150.zip` |
+ | ROCm gfx120X (RDNA4 dGPU) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-rocm-gfx120X.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-rocm-gfx120X.zip` |
+ | ROCm gfx110X (RDNA3 dGPU & iGPU) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-rocm-gfx110X.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-rocm-gfx110X.zip` |
+ | Vulkan (cross-vendor) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-vulkan-x86_64.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-vulkan-x64.zip` |
+ | NPU (RyzenAI) | — | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-npu-x64.zip` |
+ | Metal (Apple Silicon) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-darwin-metal-arm64.tar.gz` | — |
+ | CPU only | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-cpu-x86_64.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-cpu-x64.zip` |
+
+ - name: Upload release assets
+ uses: actions/github-script@v7
+ with:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ script: |
+ const fs = require('fs');
+ const path = require('path');
+ const id = '${{ steps.create_release.outputs.id }}';
+ for (const file of fs.readdirSync('./release')) {
+ if (!file.endsWith('.zip') && !file.endsWith('.tar.gz')) continue;
+ console.log('Uploading:', file);
+ await github.rest.repos.uploadReleaseAsset({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ release_id: id,
+ name: file,
+ data: fs.readFileSync(`./release/${file}`),
+ });
+ }
- - name: Test
- id: ggml-ci
+ - name: Update README download links
run: |
- vulkaninfo --summary
- GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/whisper.cpp ~/mnt/whisper.cpp
+ TAG="${{ needs.determine-tag.outputs.tag_name }}"
+ # Replace the placeholder tag in all download URLs with the actual release tag
+ sed -i "s|/releases/download/[^/]*/whisper-[^-]*-|/releases/download/${TAG}/whisper-${TAG}-|g" README.md
+ git config user.name "github-actions[bot]"
+ git config user.email "github-actions[bot]@users.noreply.github.com"
+ git add README.md
+ git diff --cached --quiet || git commit -m "docs: update download links to ${TAG}"
+ git push
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
deleted file mode 100644
index 6c0de0ece70..00000000000
--- a/.github/workflows/docker.yml
+++ /dev/null
@@ -1,77 +0,0 @@
-name: Publish Docker image
-
-on:
- pull_request:
- push:
- branches:
- - master
-
-jobs:
- push_to_registry:
- name: Push Docker image to Docker Hub
- if: github.event.pull_request.draft == false
-
- runs-on: ubuntu-22.04
- env:
- COMMIT_SHA: ${{ github.sha }}
- strategy:
- fail-fast: false
- matrix:
- config:
- - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" }
- - { tag: "main-musa", dockerfile: ".devops/main-musa.Dockerfile", platform: "linux/amd64" }
- - { tag: "main-intel", dockerfile: ".devops/main-intel.Dockerfile", platform: "linux/amd64" }
- - { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
- - { tag: "main-vulkan", dockerfile: ".devops/main-vulkan.Dockerfile", platform: "linux/amd64" }
-
- steps:
- - name: Check out the repo
- uses: actions/checkout@v6
-
- - name: Set up QEMU
- uses: docker/setup-qemu-action@v3
- with:
- image: tonistiigi/binfmt:qemu-v7.0.0-28
-
- - name: Set up Docker Buildx
- uses: docker/setup-buildx-action@v3
-
- - name: Log in to Docker Hub
- uses: docker/login-action@v3
- with:
- registry: ghcr.io
- username: ${{ github.repository_owner }}
- password: ${{ secrets.GITHUB_TOKEN }}
-
- - name: Free up disk space
- run: |
- sudo apt-get remove -y '^dotnet-.*' '^llvm-.*' '^mysql-.*' '^postgresql-.*'
- sudo apt-get autoremove -y
- sudo apt-get autoclean
-
- sudo rm -rf /usr/share/dotnet
- sudo rm -rf /usr/local/lib/android
- sudo rm -rf /opt/ghc
- sudo rm -rf /opt/hostedtoolcache/CodeQL
-
- docker system prune -af
-
- df -h
-
- - name: Generate tags
- id: tags
- run: |
- TAGS="ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}"
- if [ "${{ github.event_name }}" == "push" ]; then
- TAGS="$TAGS,ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
- fi
- echo "tags=$TAGS" >> $GITHUB_OUTPUT
-
- - name: Build and push Docker image (tagged)
- uses: docker/build-push-action@v6
- with:
- context: .
- push: ${{ github.event_name == 'push' }}
- platforms: ${{ matrix.config.platform }}
- tags: ${{ steps.tags.outputs.tags }}
- file: ${{ matrix.config.dockerfile }}
diff --git a/.github/workflows/examples-wasm.yml b/.github/workflows/examples-wasm.yml
deleted file mode 100644
index 927438cdad8..00000000000
--- a/.github/workflows/examples-wasm.yml
+++ /dev/null
@@ -1,97 +0,0 @@
-name: Examples WASM
-on:
- push:
- branches: ["master"]
-
- workflow_dispatch:
-
-permissions:
- contents: read
- pages: write
- id-token: write
-
-concurrency:
- group: "pages"
- cancel-in-progress: false
-
-jobs:
- deploy-wasm-github-pages:
- environment:
- name: github-pages
- url: ${{ steps.deployment.outputs.page_url }}
- runs-on: ubuntu-latest
- steps:
- - name: Checkout
- uses: actions/checkout@v6
-
- - name: Setup Pages
- uses: actions/configure-pages@v5
-
- - name: Setup emsdk
- uses: mymindstorm/setup-emsdk@v14
-
- - name: Build WASM Examples
- # Enable for real build later in whisper.cpp
- run: |
- mkdir -p build-em && cd build-em
- emcmake cmake .. -DCMAKE_BUILD_TYPE=Release
- make -j
-
- - name: Create staging directory
- run: mkdir -p staging
-
- - name: Create .nojekyll file in staging directory
- run: touch staging/.nojekyll
-
- - name: Copy application files
- run: |
- build_dir=build-em/bin
-
- ls ${build_dir}
-
- # command.wasm
- target_dir=staging/command.wasm
- mkdir -p ${target_dir}
- cp ${build_dir}/command.wasm/{index.html,command.js,helpers.js} ${target_dir}
- cp ${build_dir}/libcommand.js ${target_dir}
-
- # bench.wasm
- target_dir=staging/bench.wasm
- mkdir -p ${target_dir}
- cp ${build_dir}/bench.wasm/{index.html,bench.js,helpers.js} ${target_dir}
- cp ${build_dir}/libbench.js ${target_dir}
-
- # stream.wasm
- target_dir=staging/stream.wasm
- mkdir -p ${target_dir}
- cp ${build_dir}/stream.wasm/{index.html,stream.js,helpers.js} ${target_dir}
- cp ${build_dir}/libstream.js ${target_dir}
-
- # wchess.wasm
- target_dir=staging/wchess.wasm
- mkdir -p ${target_dir}
- cp -r ${build_dir}/wchess.wasm/{index.html,css,img,js} ${target_dir}
- cp ${build_dir}/wchess.wasm.js ${target_dir}
-
- # whisper.wasm (this will be the main example page)
- target_dir=staging
- mkdir -p ${target_dir}
- cp ${build_dir}/whisper.wasm/{index.html,main.js,helpers.js} ${target_dir}
- cp ${build_dir}/libmain.js ${target_dir}
-
- # Copy Cross-Origin Isolation service worker
- cp -v examples/coi-serviceworker.js staging/
-
- - name: List files in staging directory (for debugging)
- run: |
- echo "Files in staging directory:"
- find staging -type f | sort
-
- - name: Upload artifact
- uses: actions/upload-pages-artifact@v4
- with:
- path: ./staging
-
- - name: Deploy to GitHub Pages
- id: deployment
- uses: actions/deploy-pages@v4
diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml
deleted file mode 100644
index 1c9ade5a300..00000000000
--- a/.github/workflows/examples.yml
+++ /dev/null
@@ -1,48 +0,0 @@
-name: Examples Tests
-on:
- push:
- paths:
- - examples/addon.node/**
- - whisper.h
- pull_request:
- paths:
- - examples/addon.node/**
- - whisper.h
-
-jobs:
- addon_node-ubuntu-22:
- runs-on: ubuntu-22.04
- strategy:
- matrix:
- node-version: [ 16.x, 18.x ]
- steps:
- - name: Clone
- uses: actions/checkout@v6
-
- - name: Dependencies
- run: |
- sudo apt-get update
- sudo apt-get install build-essential git
- sudo apt-get install cmake
- sudo apt-get install libsdl2-dev
-
- - name: Use Node.js ${{ matrix.node-version }}
- uses: actions/setup-node@v6
- with:
- node-version: ${{ matrix.node-version }}
- cache: 'npm'
-
- - name: Install package.json dependencies
- working-directory: ./examples/addon.node
- run: npm install
-
- - name: Compile addon.node
- run: npx cmake-js compile -T addon.node -B Release
-
- - name: Download test model
- run: |
- bash ./models/download-ggml-model.sh base.en
- - name: Test
- run: |
- cd examples/addon.node
- npm run test
diff --git a/.github/workflows/sync.yml b/.github/workflows/sync.yml
new file mode 100644
index 00000000000..47693822a04
--- /dev/null
+++ b/.github/workflows/sync.yml
@@ -0,0 +1,146 @@
+name: Sync Upstream & Auto-Release
+
+# Runs daily to detect new upstream whisper.cpp releases.
+# When a new release is found:
+# - clean merge → pushes main + creates tag vX.Y.Z → triggers build.yml
+# - conflict → opens a PR for manual resolution, does NOT tag
+
+on:
+ workflow_dispatch:
+ inputs:
+ upstream_tag:
+ description: 'Force a specific upstream tag (e.g. v1.8.5). Leave blank to auto-detect latest.'
+ required: false
+ type: string
+ dry_run:
+ description: 'Dry run — merge locally but do not push or tag'
+ required: false
+ type: boolean
+ default: false
+
+permissions:
+ contents: write
+ pull-requests: write
+
+jobs:
+ sync-and-tag:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout (full history + tags)
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ token: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Configure git identity
+ run: |
+ git config user.name "github-actions[bot]"
+ git config user.email "github-actions[bot]@users.noreply.github.com"
+
+ - name: Add upstream remote
+ run: git remote add upstream https://github.com/ggerganov/whisper.cpp || true
+
+ # ── Detect which upstream release to target ──────────────────────────
+ - name: Detect upstream release
+ id: upstream
+ env:
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: |
+ if [[ -n "${{ github.event.inputs.upstream_tag }}" ]]; then
+ UPSTREAM_TAG="${{ github.event.inputs.upstream_tag }}"
+ echo "Using manually specified tag: $UPSTREAM_TAG"
+ else
+ UPSTREAM_TAG=$(gh api repos/ggerganov/whisper.cpp/releases/latest --jq '.tag_name')
+ echo "Latest upstream release: $UPSTREAM_TAG"
+ fi
+
+ # Strip leading 'v' for use in artifact filenames
+ VERSION="${UPSTREAM_TAG#v}"
+
+ echo "tag=$UPSTREAM_TAG" >> $GITHUB_OUTPUT
+ echo "version=$VERSION" >> $GITHUB_OUTPUT
+
+ # ── Check if we already have a release for this upstream version ──────
+ - name: Check if already released
+ id: check
+ run: |
+ git fetch --tags
+ # Our tags match the upstream tag exactly (e.g. v1.8.4)
+ EXISTING=$(git tag -l "${{ steps.upstream.outputs.tag }}" | head -1)
+ if [[ -n "$EXISTING" ]]; then
+ echo "already_released=true" >> $GITHUB_OUTPUT
+ echo "::notice::Already have release $EXISTING — nothing to do."
+ else
+ echo "already_released=false" >> $GITHUB_OUTPUT
+ echo "New upstream release detected: ${{ steps.upstream.outputs.tag }}"
+ fi
+
+ # ── Merge upstream tag into main ─────────────────────────────────────
+ - name: Fetch upstream tags
+ if: steps.check.outputs.already_released == 'false'
+ run: git fetch upstream --tags
+
+ - name: Attempt merge
+ if: steps.check.outputs.already_released == 'false'
+ run: |
+ git merge "${{ steps.upstream.outputs.tag }}" --no-edit || echo "CONFLICT=true" >> $GITHUB_ENV
+
+ # ── Conflict path: open PR, do NOT tag ───────────────────────────────
+ - name: Open conflict PR
+ if: steps.check.outputs.already_released == 'false' && env.CONFLICT == 'true'
+ env:
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: |
+ CONFLICT_FILES=$(git diff --name-only --diff-filter=U | tr '\n' ', ')
+ git merge --abort
+
+ BRANCH="sync/${{ steps.upstream.outputs.tag }}"
+ git checkout -b "$BRANCH"
+ # Resolve conflicts by preferring upstream (theirs) so the branch is pushable
+ git merge "${{ steps.upstream.outputs.tag }}" --no-edit --strategy-option=theirs || true
+ git add -A
+ git commit -m "chore: merge upstream ${{ steps.upstream.outputs.tag }} (auto-resolved via theirs)" --allow-empty
+ git push origin "$BRANCH"
+
+ TAG="${{ steps.upstream.outputs.tag }}"
+ echo "## Upstream sync: ${TAG}" > /tmp/pr-body.md
+ echo "" >> /tmp/pr-body.md
+ echo "Conflicts were detected during automatic merge. Files affected:" >> /tmp/pr-body.md
+ echo "" >> /tmp/pr-body.md
+ echo " ${CONFLICT_FILES}" >> /tmp/pr-body.md
+ echo "" >> /tmp/pr-body.md
+ echo "This PR was auto-resolved using upstream (theirs) as a baseline." >> /tmp/pr-body.md
+ echo "Please review the diff carefully before merging." >> /tmp/pr-body.md
+ echo "" >> /tmp/pr-body.md
+ echo "Once merged, manually create the release tag on main to trigger the build:" >> /tmp/pr-body.md
+ echo "" >> /tmp/pr-body.md
+ echo " git tag ${TAG}" >> /tmp/pr-body.md
+ echo " git push origin ${TAG}" >> /tmp/pr-body.md
+
+ gh pr create \
+ --title "Sync upstream ${TAG} - conflict resolution needed" \
+ --body-file /tmp/pr-body.md \
+ --base main \
+ --head "$BRANCH"
+
+ echo "::warning::Merge conflict detected - PR opened for manual resolution. Release build NOT triggered."
+
+ # ── Clean merge path: push main + tag → triggers build.yml ───────────
+ - name: Push merged main
+ if: steps.check.outputs.already_released == 'false' && env.CONFLICT != 'true' && github.event.inputs.dry_run != 'true'
+ run: git push origin HEAD:main
+
+ - name: Create and push release tag
+ if: steps.check.outputs.already_released == 'false' && env.CONFLICT != 'true' && github.event.inputs.dry_run != 'true'
+ run: |
+ TAG="${{ steps.upstream.outputs.tag }}"
+ git tag "$TAG" -m "AMD builds for upstream $TAG"
+ git push origin "$TAG"
+ echo "::notice::Pushed tag $TAG — build.yml will now run and publish the release."
+
+ - name: Dry-run summary
+ if: github.event.inputs.dry_run == 'true' && steps.check.outputs.already_released == 'false' && env.CONFLICT != 'true'
+ run: |
+ echo "DRY RUN — merge was clean. Would have pushed main and tagged ${{ steps.upstream.outputs.tag }}."
+ echo "Re-run with dry_run=false to publish."
diff --git a/.github/workflows/test-whisper.yml b/.github/workflows/test-whisper.yml
new file mode 100644
index 00000000000..3a9d2047ff3
--- /dev/null
+++ b/.github/workflows/test-whisper.yml
@@ -0,0 +1,282 @@
+name: Test whisper-cli
+
+# Downloads a published release artifact and runs whisper-cli against jfk.wav.
+# Tests on real self-hosted GPU hardware (stx-halo runners).
+# Trigger manually after a release, or let it run automatically via workflow_dispatch
+# from build.yml once artifacts are published.
+
+on:
+ workflow_dispatch:
+ inputs:
+ release_tag:
+ description: 'Release tag to test (e.g. v1.8.4) or "latest"'
+ required: false
+ default: 'latest'
+ type: string
+ gfx_target:
+ description: 'ROCm GPU target to test'
+ required: false
+ default: 'gfx1151'
+ type: string
+
+env:
+ RELEASE_TAG: ${{ github.event.inputs.release_tag || 'latest' }}
+ GFX_TARGET: ${{ github.event.inputs.gfx_target || 'gfx1151' }}
+
+jobs:
+
+ # ---------------------------------------------------------------------------
+ # Resolve release tag (latest or specific)
+ # ---------------------------------------------------------------------------
+ prepare:
+ runs-on: ubuntu-latest
+ outputs:
+ release_tag: ${{ steps.resolve.outputs.release_tag }}
+ steps:
+ - name: Resolve release tag
+ id: resolve
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: |
+ if [ "${{ env.RELEASE_TAG }}" = "latest" ]; then
+ TAG=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
+ "https://api.github.com/repos/${{ github.repository }}/releases/latest" \
+ | jq -r '.tag_name')
+ echo "Resolved latest release: $TAG"
+ else
+ TAG="${{ env.RELEASE_TAG }}"
+ STATUS=$(curl -s -o /dev/null -w "%{http_code}" \
+ -H "Authorization: token $GITHUB_TOKEN" \
+ "https://api.github.com/repos/${{ github.repository }}/releases/tags/$TAG")
+ if [ "$STATUS" != "200" ]; then
+ echo "Error: Release $TAG not found (HTTP $STATUS)"
+ exit 1
+ fi
+ echo "Using specified release: $TAG"
+ fi
+ echo "release_tag=$TAG" >> $GITHUB_OUTPUT
+
+ # ---------------------------------------------------------------------------
+ # Test ROCm artifact on Windows (self-hosted stx-halo GPU runner)
+ # ---------------------------------------------------------------------------
+ test-windows-rocm:
+ runs-on: [self-hosted, Windows, stx-halo]
+ needs: prepare
+
+ steps:
+ - name: Checkout (for samples/jfk.wav and models/ scripts)
+ uses: actions/checkout@v4
+
+ - name: Download ROCm Windows artifact
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ shell: pwsh
+ run: |
+ $tag = "${{ needs.prepare.outputs.release_tag }}"
+ $target = "${{ env.GFX_TARGET }}"
+ # strip leading 'v' for artifact filename (e.g. v1.8.4 -> 1.8.4)
+ $ver = $tag.TrimStart('v')
+ $asset = "whisper-${ver}-windows-rocm-${target}.zip"
+ $repo = "${{ github.repository }}"
+
+ Write-Host "Downloading: $asset from release $tag"
+
+ $headers = @{ "Authorization" = "token $env:GITHUB_TOKEN" }
+ $release = Invoke-RestMethod -Uri "https://api.github.com/repos/$repo/releases/tags/$tag" -Headers $headers
+ $found = $release.assets | Where-Object { $_.name -eq $asset }
+
+ if (-not $found) {
+ Write-Error "Asset '$asset' not found in release '$tag'"
+ Write-Host "Available assets:"
+ $release.assets | ForEach-Object { Write-Host " $($_.name)" }
+ exit 1
+ }
+
+ Write-Host "Found: $($found.name) ($([math]::Round($found.size/1MB,2)) MB)"
+ Invoke-WebRequest -Uri $found.browser_download_url -OutFile $asset -Headers $headers
+
+ Write-Host "Extracting..."
+ Expand-Archive -Path $asset -DestinationPath whisper-bin -Force
+ Write-Host "Binaries:"
+ Get-ChildItem whisper-bin -Filter "*.exe" | Format-Table Name, Length
+
+ - name: Download tiny model
+ shell: pwsh
+ run: |
+ New-Item -ItemType Directory -Force -Path models | Out-Null
+ $url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin"
+ Write-Host "Downloading ggml-tiny.bin..."
+ Invoke-WebRequest -Uri $url -OutFile "models\ggml-tiny.bin"
+ $mb = [math]::Round((Get-Item "models\ggml-tiny.bin").Length/1MB,2)
+ Write-Host "Downloaded: $mb MB"
+
+ - name: Run transcription
+ shell: pwsh
+ run: |
+ $cli = "whisper-bin\whisper-cli.exe"
+ if (-not (Test-Path $cli)) {
+ Write-Error "whisper-cli.exe not found. Contents of whisper-bin:"
+ Get-ChildItem -Recurse whisper-bin | Format-Table Name, Length
+ exit 1
+ }
+ Write-Host "Running whisper-cli against samples\jfk.wav ..."
+ & $cli -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result
+ Write-Host "--- Transcription output ---"
+ Get-Content jfk-result.txt
+
+ - name: Verify transcription
+ shell: pwsh
+ run: |
+ $text = Get-Content "jfk-result.txt" -Raw -ErrorAction SilentlyContinue
+ if (-not $text) { Write-Error "jfk-result.txt is empty or missing"; exit 1 }
+ if ($text -match "country|ask not|nation|kennedy") {
+ Write-Host "PASS: transcription contains expected words" -ForegroundColor Green
+ } else {
+ Write-Error "FAIL: expected words not found in transcription"
+ Write-Host $text
+ exit 1
+ }
+
+ # ---------------------------------------------------------------------------
+ # Test ROCm artifact on Linux (self-hosted stx-halo GPU runner)
+ # ---------------------------------------------------------------------------
+ test-linux-rocm:
+ runs-on: [self-hosted, Linux, stx-halo]
+ needs: prepare
+
+ steps:
+ - name: Checkout (for samples/jfk.wav)
+ uses: actions/checkout@v4
+
+ - name: Install jq if needed
+ run: |
+ if ! command -v jq &>/dev/null; then
+ mkdir -p ~/bin
+ curl -L -o ~/bin/jq https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64
+ chmod +x ~/bin/jq
+ echo "$HOME/bin" >> $GITHUB_PATH
+ fi
+
+ - name: Download ROCm Linux artifact
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: |
+ TAG="${{ needs.prepare.outputs.release_tag }}"
+ TARGET="${{ env.GFX_TARGET }}"
+ VER="${TAG#v}"
+ ASSET="whisper-${VER}-linux-rocm-${TARGET}.tar.gz"
+ REPO="${{ github.repository }}"
+
+ echo "Downloading: $ASSET from release $TAG"
+
+ RELEASE=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
+ "https://api.github.com/repos/$REPO/releases/tags/$TAG")
+
+ URL=$(echo "$RELEASE" | jq -r ".assets[] | select(.name == \"$ASSET\") | .browser_download_url")
+
+ if [ -z "$URL" ] || [ "$URL" = "null" ]; then
+ echo "Asset '$ASSET' not found in release '$TAG'"
+ echo "Available assets:"
+ echo "$RELEASE" | jq -r '.assets[].name'
+ exit 1
+ fi
+
+ SIZE=$(echo "$RELEASE" | jq -r ".assets[] | select(.name == \"$ASSET\") | .size")
+ echo "Found: $ASSET ($SIZE bytes)"
+
+ curl -L -H "Authorization: token $GITHUB_TOKEN" -o "$ASSET" "$URL"
+
+ echo "Extracting..."
+ mkdir -p whisper-bin
+ tar -xzf "$ASSET" --strip-components=1 -C whisper-bin
+ chmod +x whisper-bin/whisper-cli
+ echo "Binaries:"
+ ls -lh whisper-bin/whisper-cli
+
+ - name: Download tiny model
+ run: |
+ mkdir -p models
+ curl -L -o models/ggml-tiny.bin \
+ "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin"
+ echo "Downloaded: $(du -h models/ggml-tiny.bin | cut -f1)"
+
+ - name: Set library path
+ run: echo "LD_LIBRARY_PATH=$(pwd)/whisper-bin:$LD_LIBRARY_PATH" >> $GITHUB_ENV
+
+ - name: Run transcription
+ run: |
+ echo "Running whisper-cli against samples/jfk.wav ..."
+ ./whisper-bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result
+ echo "--- Transcription output ---"
+ cat jfk-result.txt
+
+ - name: Verify transcription
+ run: |
+ if grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt; then
+ echo "PASS: transcription contains expected words"
+ else
+ echo "FAIL: expected words not found in transcription"
+ cat jfk-result.txt
+ exit 1
+ fi
+
+ # ---------------------------------------------------------------------------
+ # Test CPU artifact on Linux (GitHub-hosted runner - no GPU needed)
+ # ---------------------------------------------------------------------------
+ test-linux-cpu:
+ runs-on: ubuntu-latest
+ needs: prepare
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Download CPU Linux artifact
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: |
+ TAG="${{ needs.prepare.outputs.release_tag }}"
+ VER="${TAG#v}"
+ ASSET="whisper-${VER}-linux-cpu-x86_64.tar.gz"
+ REPO="${{ github.repository }}"
+
+ echo "Downloading: $ASSET from release $TAG"
+
+ RELEASE=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
+ "https://api.github.com/repos/$REPO/releases/tags/$TAG")
+
+ URL=$(echo "$RELEASE" | jq -r ".assets[] | select(.name == \"$ASSET\") | .browser_download_url")
+
+ if [ -z "$URL" ] || [ "$URL" = "null" ]; then
+ echo "Asset '$ASSET' not found. Available:"
+ echo "$RELEASE" | jq -r '.assets[].name'
+ exit 1
+ fi
+
+ curl -L -H "Authorization: token $GITHUB_TOKEN" -o "$ASSET" "$URL"
+ mkdir -p whisper-bin
+ tar -xzf "$ASSET" --strip-components=1 -C whisper-bin
+ chmod +x whisper-bin/whisper-cli
+ ls -lh whisper-bin/whisper-cli
+
+ - name: Download tiny model
+ run: |
+ mkdir -p models
+ curl -L -o models/ggml-tiny.bin \
+ "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin"
+
+ - name: Run transcription
+ run: |
+ ./whisper-bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result
+ echo "--- Transcription output ---"
+ cat jfk-result.txt
+
+ - name: Verify transcription
+ run: |
+ if grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt; then
+ echo "PASS: transcription contains expected words"
+ else
+ echo "FAIL: expected words not found"
+ cat jfk-result.txt
+ exit 1
+ fi
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a0f74041321..d4dc318056a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -91,6 +91,7 @@ endif()
option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF)
+option(WHISPER_VITISAI "whisper: support for AMD Vitis AI" OFF)
# Required for relocatable CMake package
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
diff --git a/README.md b/README.md
index 474a1301da7..ef7dd801b24 100644
--- a/README.md
+++ b/README.md
@@ -1,862 +1,283 @@
-# whisper.cpp
+# whisper.cpp-rocm
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Pre-built releases of **[whisper.cpp](https://github.com/ggerganov/whisper.cpp)** with full AMD hardware acceleration — **ROCm™ GPU**, **Vulkan GPU**, **RyzenAI NPU**, and optimised **CPU** builds — for Windows and Linux.
+
+Releases track upstream whisper.cpp exactly: every time upstream publishes a new version, our automated pipeline syncs, builds all backends, and publishes a matching release within 24 hours. No manual steps. No lag.
+
+> [!IMPORTANT]
+> **No ROCm installation required.** All ROCm and Vulkan runtime libraries are bundled inside every release archive. Download, extract, and run.
+
+> [!NOTE]
+> This project is maintained by the [Lemonade SDK](https://github.com/lemonade-sdk/lemonade) team. Our primary focus is seamless integration with Lemonade and similar AMD-optimised AI applications. We welcome collaborations and contributions that advance AMD whisper.cpp support.
-
-
-[](https://github.com/ggml-org/whisper.cpp/actions)
-[](https://opensource.org/licenses/MIT)
-[](https://conan.io/center/whisper-cpp)
-[](https://www.npmjs.com/package/whisper.cpp/)
-
-Stable: [v1.8.1](https://github.com/ggml-org/whisper.cpp/releases/tag/v1.8.1) / [Roadmap](https://github.com/orgs/ggml-org/projects/4/)
-
-High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
-
-- Plain C/C++ implementation without dependencies
-- Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](#core-ml-support)
-- AVX intrinsics support for x86 architectures
-- [VSX intrinsics support for POWER architectures](#power-vsx-intrinsics)
-- Mixed F16 / F32 precision
-- [Integer quantization support](#quantization)
-- Zero memory allocations at runtime
-- [Vulkan support](#vulkan-gpu-support)
-- Support for CPU-only inference
-- [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
-- [OpenVINO Support](#openvino-support)
-- [Ascend NPU Support](#ascend-npu-support)
-- [Moore Threads GPU Support](#moore-threads-gpu-support)
-- [C-style API](https://github.com/ggml-org/whisper.cpp/blob/master/include/whisper.h)
-- [Voice Activity Detection (VAD)](#voice-activity-detection-vad)
-
-Supported platforms:
-
-- [x] Mac OS (Intel and Arm)
-- [x] [iOS](examples/whisper.objc)
-- [x] [Android](examples/whisper.android)
-- [x] [Java](bindings/java/README.md)
-- [x] Linux / [FreeBSD](https://github.com/ggml-org/whisper.cpp/issues/56#issuecomment-1350920264)
-- [x] [WebAssembly](examples/whisper.wasm)
-- [x] Windows ([MSVC](https://github.com/ggml-org/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggml-org/whisper.cpp/issues/168))
-- [x] [Raspberry Pi](https://github.com/ggml-org/whisper.cpp/discussions/166)
-- [x] [Docker](https://github.com/ggml-org/whisper.cpp/pkgs/container/whisper.cpp)
-
-The entire high-level implementation of the model is contained in [whisper.h](include/whisper.h) and [whisper.cpp](src/whisper.cpp).
-The rest of the code is part of the [`ggml`](https://github.com/ggml-org/ggml) machine learning library.
-
-Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
-As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: [whisper.objc](examples/whisper.objc)
-
-https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4
-
-You can also easily make your own offline voice assistant application: [command](examples/command)
-
-https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
-
-On Apple Silicon, the inference runs fully on the GPU via Metal:
-
-https://github.com/ggml-org/whisper.cpp/assets/1991296/c82e8f86-60dc-49f2-b048-d2fdbd6b5225
-
-## Quick start
+---
-First clone the repository:
+## 🎯 Supported Devices
-```bash
-git clone https://github.com/ggml-org/whisper.cpp.git
-```
+### ROCm GPU
-Navigate into the directory:
+| Architecture | Devices |
+|---|---|
+| **gfx1151** — RDNA3.5 APU | Ryzen AI MAX+ Pro 395 (Strix Halo) |
+| **gfx1150** — RDNA3.5 APU | Ryzen AI 300 series (Strix Point) |
+| **gfx120X** — RDNA4 dGPU | Radeon RX 9070 XT / 9070 / 9060 XT / 9060 |
+| **gfx110X** — RDNA3 dGPU & iGPU | RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT, RX 7600 XT/7600; iGPU Radeon 780M / 760M / 740M |
-```
-cd whisper.cpp
-```
+### Vulkan GPU
-Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example:
+Any GPU with a Vulkan 1.3-capable driver — AMD, NVIDIA, Intel. Covers iGPUs on all platforms where a Vulkan driver is present.
-```bash
-sh ./models/download-ggml-model.sh base.en
-```
+### NPU — RyzenAI
-Now build the [whisper-cli](examples/cli) example and transcribe an audio file like this:
+| Device | OS | Requirement |
+|---|---|---|
+| Ryzen AI 300 series (Strix Point / Strix Halo) | Windows only | NPU driver ≥ `.280` |
-```bash
-# build the project
-cmake -B build
-cmake --build build -j --config Release
+### CPU
-# transcribe an audio file
-./build/bin/whisper-cli -f samples/jfk.wav
-```
+Optimised CPU-only builds for x86-64. Windows and Linux. No GPU required.
---
-For a quick demo, simply run `make base.en`.
+## 📦 Downloads
-The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`.
+All builds are self-contained — no separate driver or runtime installation needed (except the NPU driver for the NPU build).
-For detailed usage instructions, run: `./build/bin/whisper-cli -h`
+### ROCm — GPU Accelerated
-Note that the [whisper-cli](examples/cli) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool.
-For example, you can use `ffmpeg` like this:
+| GPU Target | Linux | Windows |
+|---|---|---|
+| **gfx1151** (Ryzen AI MAX+ Pro 395) | [](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-rocm-gfx1151.tar.gz) | [](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-rocm-gfx1151.zip) |
+| **gfx1150** (Ryzen AI 300) | [](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-rocm-gfx1150.tar.gz) | [](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-rocm-gfx1150.zip) |
+| **gfx120X** (RDNA4 dGPU) | [](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-rocm-gfx120X.tar.gz) | [](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-rocm-gfx120X.zip) |
+| **gfx110X** (RDNA3 dGPU & iGPU) | [](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-rocm-gfx110X.tar.gz) | [](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-rocm-gfx110X.zip) |
-```bash
-ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
-```
-
-## More audio samples
-
-If you want some extra audio samples to play with, simply run:
-
-```
-make -j samples
-```
+### Vulkan — Cross-Vendor GPU
-This will download a few more audio files from Wikipedia and convert them to 16-bit WAV format via `ffmpeg`.
+| Linux | Windows |
+|---|---|
+| [](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-vulkan-x86_64.tar.gz) | [](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-vulkan-x64.zip) |
-You can download and run the other models as follows:
+### NPU — RyzenAI (Windows only)
-```
-make -j tiny.en
-make -j tiny
-make -j base.en
-make -j base
-make -j small.en
-make -j small
-make -j medium.en
-make -j medium
-make -j large-v1
-make -j large-v2
-make -j large-v3
-make -j large-v3-turbo
-```
+| Windows |
+|---|
+| [-red?logo=amd&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-npu-x64.zip) |
-## Memory usage
+> Requires NPU driver ≥ `.280` and a pre-compiled `.rai` encoder model from [AMD's Hugging Face collection](https://huggingface.co/collections/amd/ryzen-ai-16-whisper-npu-optimized-onnx-models). Place the `.rai` file alongside your `ggml-*.bin` model — whisper-cli picks it up automatically.
-| Model | Disk | Mem |
-| ------ | ------- | ------- |
-| tiny | 75 MiB | ~273 MB |
-| base | 142 MiB | ~388 MB |
-| small | 466 MiB | ~852 MB |
-| medium | 1.5 GiB | ~2.1 GB |
-| large | 2.9 GiB | ~3.9 GB |
+### macOS — Metal GPU
-## POWER VSX Intrinsics
+| macOS (Apple Silicon) |
+|---|
+| [-lightgrey?logo=apple&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-darwin-metal-arm64.tar.gz) |
-`whisper.cpp` supports POWER architectures and includes code which
-significantly speeds operation on Linux running on POWER9/10, making it
-capable of faster-than-realtime transcription on underclocked Raptor
-Talos II. Ensure you have a BLAS package installed, and replace the
-standard cmake setup with:
+### CPU — No GPU Required
-```bash
-# build with GGML_BLAS defined
-cmake -B build -DGGML_BLAS=1
-cmake --build build -j --config Release
-./build/bin/whisper-cli [ .. etc .. ]
-```
+| Linux | Windows |
+|---|---|
+| [](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-cpu-x86_64.tar.gz) | [](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-cpu-x64.zip) |
-## Quantization
+---
-`whisper.cpp` supports integer quantization of the Whisper `ggml` models.
-Quantized models require less memory and disk space and depending on the hardware can be processed more efficiently.
+## 🧪 Quick Smoketest
-Here are the steps for creating and using a quantized model:
+### 1. Get a model
```bash
-# quantize a model with Q5_0 method
-cmake -B build
-cmake --build build -j --config Release
-./build/bin/quantize models/ggml-base.en.bin models/ggml-base.en-q5_0.bin q5_0
-
-# run the examples as usual, specifying the quantized model file
-./build/bin/whisper-cli -m models/ggml-base.en-q5_0.bin ./samples/gb0.wav
-```
-
-## Core ML support
-
-On Apple Silicon devices, the Encoder inference can be executed on the Apple Neural Engine (ANE) via Core ML. This can result in significant
-speed-up - more than x3 faster compared with CPU-only execution. Here are the instructions for generating a Core ML model and using it with `whisper.cpp`:
-
-- Install Python dependencies needed for the creation of the Core ML model:
-
- ```bash
- pip install ane_transformers
- pip install openai-whisper
- pip install coremltools
- ```
-
- - To ensure `coremltools` operates correctly, please confirm that [Xcode](https://developer.apple.com/xcode/) is installed and execute `xcode-select --install` to install the command-line tools.
- - Python 3.11 is recommended.
- - MacOS Sonoma (version 14) or newer is recommended, as older versions of MacOS might experience issues with transcription hallucination.
- - [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step:
- - To create an environment, use: `conda create -n py311-whisper python=3.11 -y`
- - To activate the environment, use: `conda activate py311-whisper`
-
-- Generate a Core ML model. For example, to generate a `base.en` model, use:
-
- ```bash
- ./models/generate-coreml-model.sh base.en
- ```
-
- This will generate the folder `models/ggml-base.en-encoder.mlmodelc`
-
-- Build `whisper.cpp` with Core ML support:
-
- ```bash
- # using CMake
- cmake -B build -DWHISPER_COREML=1
- cmake --build build -j --config Release
- ```
-
-- Run the examples as usual. For example:
-
- ```text
- $ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav
-
- ...
-
- whisper_init_state: loading Core ML model from 'models/ggml-base.en-encoder.mlmodelc'
- whisper_init_state: first run on a device may take a while ...
- whisper_init_state: Core ML model loaded
-
- system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | COREML = 1 |
-
- ...
- ```
-
- The first run on a device is slow, since the ANE service compiles the Core ML model to some device-specific format.
- Next runs are faster.
-
-For more information about the Core ML implementation please refer to PR [#566](https://github.com/ggml-org/whisper.cpp/pull/566).
-
-## OpenVINO support
-
-On platforms that support [OpenVINO](https://github.com/openvinotoolkit/openvino), the Encoder inference can be executed
-on OpenVINO-supported devices including x86 CPUs and Intel GPUs (integrated & discrete).
-
-This can result in significant speedup in encoder performance. Here are the instructions for generating the OpenVINO model and using it with `whisper.cpp`:
-
-- First, setup python virtual env. and install python dependencies. Python 3.10 is recommended.
-
- Windows:
-
- ```powershell
- cd models
- python -m venv openvino_conv_env
- openvino_conv_env\Scripts\activate
- python -m pip install --upgrade pip
- pip install -r requirements-openvino.txt
- ```
-
- Linux and macOS:
-
- ```bash
- cd models
- python3 -m venv openvino_conv_env
- source openvino_conv_env/bin/activate
- python -m pip install --upgrade pip
- pip install -r requirements-openvino.txt
- ```
-
-- Generate an OpenVINO encoder model. For example, to generate a `base.en` model, use:
-
- ```
- python convert-whisper-to-openvino.py --model base.en
- ```
-
- This will produce ggml-base.en-encoder-openvino.xml/.bin IR model files. It's recommended to relocate these to the same folder as `ggml` models, as that
- is the default location that the OpenVINO extension will search at runtime.
-
-- Build `whisper.cpp` with OpenVINO support:
-
- Download OpenVINO package from [release page](https://github.com/openvinotoolkit/openvino/releases). The recommended version to use is [2024.6.0](https://github.com/openvinotoolkit/openvino/releases/tag/2024.6.0). Ready to use Binaries of the required libraries can be found in the [OpenVino Archives](https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/)
-
- After downloading & extracting package onto your development system, set up required environment by sourcing setupvars script. For example:
-
- Linux:
-
- ```bash
- source /path/to/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/setupvars.sh
- ```
-
- Windows (cmd):
-
- ```powershell
- C:\Path\To\w_openvino_toolkit_windows_2023.0.0.10926.b4452d56304_x86_64\setupvars.bat
- ```
-
- And then build the project using cmake:
-
- ```bash
- cmake -B build -DWHISPER_OPENVINO=1
- cmake --build build -j --config Release
- ```
-
-- Run the examples as usual. For example:
-
- ```text
- $ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav
-
- ...
-
- whisper_ctx_init_openvino_encoder: loading OpenVINO model from 'models/ggml-base.en-encoder-openvino.xml'
- whisper_ctx_init_openvino_encoder: first run on a device may take a while ...
- whisper_openvino_init: path_model = models/ggml-base.en-encoder-openvino.xml, device = GPU, cache_dir = models/ggml-base.en-encoder-openvino-cache
- whisper_ctx_init_openvino_encoder: OpenVINO model loaded
-
- system_info: n_threads = 4 / 8 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | COREML = 0 | OPENVINO = 1 |
-
- ...
- ```
-
- The first time run on an OpenVINO device is slow, since the OpenVINO framework will compile the IR (Intermediate Representation) model to a device-specific 'blob'. This device-specific blob will get
- cached for the next run.
-
-For more information about the OpenVINO implementation please refer to PR [#1037](https://github.com/ggml-org/whisper.cpp/pull/1037).
-
-## NVIDIA GPU support
-
-With NVIDIA cards the processing of the models is done efficiently on the GPU via cuBLAS and custom CUDA kernels.
-First, make sure you have installed `cuda`: https://developer.nvidia.com/cuda-downloads
-
-Now build `whisper.cpp` with CUDA support:
-
-```
-cmake -B build -DGGML_CUDA=1
-cmake --build build -j --config Release
-```
-
-or for newer NVIDIA GPU's (RTX 5000 series):
-```
-cmake -B build -DGGML_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="86"
-cmake --build build -j --config Release
-```
-
-## Vulkan GPU support
-Cross-vendor solution which allows you to accelerate workload on your GPU.
-First, make sure your graphics card driver provides support for Vulkan API.
-
-Now build `whisper.cpp` with Vulkan support:
-```
-cmake -B build -DGGML_VULKAN=1
-cmake --build build -j --config Release
-```
-
-## BLAS CPU support via OpenBLAS
-
-Encoder processing can be accelerated on the CPU via OpenBLAS.
-First, make sure you have installed `openblas`: https://www.openblas.net/
-
-Now build `whisper.cpp` with OpenBLAS support:
-
-```
-cmake -B build -DGGML_BLAS=1
-cmake --build build -j --config Release
-```
-
-## Ascend NPU support
-
-Ascend NPU provides inference acceleration via [`CANN`](https://www.hiascend.com/en/software/cann) and AI cores.
-
-First, check if your Ascend NPU device is supported:
-
-**Verified devices**
-| Ascend NPU | Status |
-|:-----------------------------:|:-------:|
-| Atlas 300T A2 | Support |
-| Atlas 300I Duo | Support |
-
-Then, make sure you have installed [`CANN toolkit`](https://www.hiascend.com/en/software/cann/community) . The lasted version of CANN is recommanded.
-
-Now build `whisper.cpp` with CANN support:
-
-```
-cmake -B build -DGGML_CANN=1
-cmake --build build -j --config Release
-```
-
-Run the inference examples as usual, for example:
+# Download the tiny.en model (~75 MB) for a fast smoke test
+./models/download-ggml-model.sh tiny.en
+# Or grab any ggml-*.bin from https://huggingface.co/ggerganov/whisper.cpp
```
-./build/bin/whisper-cli -f samples/jfk.wav -m models/ggml-base.en.bin -t 8
-```
-
-*Notes:*
-
-- If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
-- If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
-## Moore Threads GPU support
-
-With Moore Threads cards the processing of the models is done efficiently on the GPU via muBLAS and custom MUSA kernels.
-First, make sure you have installed `MUSA SDK rc4.2.0`: https://developer.mthreads.com/sdk/download/musa?equipment=&os=&driverVersion=&version=4.2.0
-
-Now build `whisper.cpp` with MUSA support:
-
-```
-cmake -B build -DGGML_MUSA=1
-cmake --build build -j --config Release
-```
-
-or specify the architecture for your Moore Threads GPU. For example, if you have a MTT S80 GPU, you can specify the architecture as follows:
-
-```
-cmake -B build -DGGML_MUSA=1 -DMUSA_ARCHITECTURES="21"
-cmake --build build -j --config Release
-```
-
-## FFmpeg support (Linux only)
-
-If you want to support more audio formats (such as Opus and AAC), you can turn on the `WHISPER_FFMPEG` build flag to enable FFmpeg integration.
-
-First, you need to install required libraries:
+### 2. Transcribe the bundled sample
```bash
-# Debian/Ubuntu
-sudo apt install libavcodec-dev libavformat-dev libavutil-dev
+# Linux
+./whisper-cli -m models/ggml-tiny.en.bin -f samples/jfk.wav
-# RHEL/Fedora
-sudo dnf install libavcodec-free-devel libavformat-free-devel libavutil-free-devel
+# Windows
+whisper-cli.exe -m models\ggml-tiny.en.bin -f samples\jfk.wav
```
-Then you can build the project as follows:
-
-```bash
-cmake -B build -D WHISPER_FFMPEG=yes
-cmake --build build
-```
+Expected: a transcription of the JFK "Ask not what your country can do for you" excerpt.
-Run the following example to confirm it's working:
+### 3. Verify GPU is active (ROCm)
```bash
-# Convert an audio file to Opus format
-ffmpeg -i samples/jfk.wav jfk.opus
-
-# Transcribe the audio file
-./build/bin/whisper-cli --model models/ggml-base.en.bin --file jfk.opus
-```
-
-## Docker
-
-### Prerequisites
-
-- Docker must be installed and running on your system.
-- Create a folder to store big models & intermediate files (ex. /whisper/models)
-
-### Images
-
-We have multiple Docker images available for this project:
-
-1. `ghcr.io/ggml-org/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
-2. `ghcr.io/ggml-org/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
-3. `ghcr.io/ggml-org/whisper.cpp:main-musa`: Same as `main` but compiled with MUSA support. (platforms: `linux/amd64`)
-4. `ghcr.io/ggml-org/whisper.cpp:main-vulkan`: Same as `main` but compiled with Vulkan support. (platforms: `linux/amd64`)
-
-### Usage
-
-```shell
-# download model and persist it in a local folder
-docker run -it --rm \
- -v path/to/models:/models \
- whisper.cpp:main "./models/download-ggml-model.sh base /models"
-
-# transcribe an audio file
-docker run -it --rm \
- -v path/to/models:/models \
- -v path/to/audios:/audios \
- whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f /audios/jfk.wav"
-
-# transcribe an audio file in samples folder
-docker run -it --rm \
- -v path/to/models:/models \
- whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f ./samples/jfk.wav"
-
-# run the web server
-docker run -it --rm -p "8080:8080" \
- -v path/to/models:/models \
- whisper.cpp:main "whisper-server --host 127.0.0.1 -m /models/ggml-base.bin"
-
-# run the bench too on the small.en model using 4 threads
-docker run -it --rm \
- -v path/to/models:/models \
- whisper.cpp:main "whisper-bench -m /models/ggml-small.en.bin -t 4"
+# At startup whisper-cli prints the backend in use — look for:
+# ggml_hip: using device ...
+./whisper-cli -m models/ggml-tiny.en.bin -f samples/jfk.wav 2>&1 | grep -i "hip\|rocm\|device"
```
-## Installing with Conan
-
-You can install pre-built binaries for whisper.cpp or build it from source using [Conan](https://conan.io/). Use the following command:
+### 4. Verify NPU is active (VitisAI)
```
-conan install --requires="whisper-cpp/[*]" --build=missing
+# Place the .rai encoder alongside the .bin model, then run normally.
+# Look for this line in stdout:
+# whisper_vitisai_encode: Vitis AI model inference completed.
+whisper-cli.exe -m models\ggml-tiny.en.bin -f samples\jfk.wav
```
-For detailed instructions on how to use Conan, please refer to the [Conan documentation](https://docs.conan.io/2/).
-
-## Limitations
-
-- Inference only
-
-## Real-time audio input example
-
-This is a naive example of performing real-time inference on audio from your microphone.
-The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continuously.
-More info is available in [issue #10](https://github.com/ggml-org/whisper.cpp/issues/10).
-You will need to have [sdl2](https://wiki.libsdl.org/SDL2/Installation) installed for it to work properly.
+### 5. Verify portability (Linux ROCm)
```bash
-cmake -B build -DWHISPER_SDL2=ON
-cmake --build build -j --config Release
-./build/bin/whisper-stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
+# ROCm runtime libs are bundled — RPATH should point to $ORIGIN (same dir as binary)
+readelf -d whisper-cli | grep RPATH # -> $ORIGIN
+ldd whisper-cli | grep "not found" # -> (empty — all deps resolved locally)
```
-https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
+---
-## Confidence color-coding
+## 🔄 Release Cadence
-Adding the `--print-colors` argument will print the transcribed text using an experimental color coding strategy
-to highlight words with high or low confidence:
+Releases are fully automated and mirror upstream whisper.cpp releases with no manual steps:
-```bash
-./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/gb0.wav --print-colors
```
-
-
-
-## Controlling the length of the generated text segments (experimental)
-
-For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`:
-
-```text
-$ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16
-
-whisper_model_load: loading model from './models/ggml-base.en.bin'
-...
-system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
-
-main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
-
-[00:00:00.000 --> 00:00:00.850] And so my
-[00:00:00.850 --> 00:00:01.590] fellow
-[00:00:01.590 --> 00:00:04.140] Americans, ask
-[00:00:04.140 --> 00:00:05.660] not what your
-[00:00:05.660 --> 00:00:06.840] country can do
-[00:00:06.840 --> 00:00:08.430] for you, ask
-[00:00:08.430 --> 00:00:09.440] what you can do
-[00:00:09.440 --> 00:00:10.020] for your
-[00:00:10.020 --> 00:00:11.000] country.
+upstream whisper.cpp releases vX.Y.Z
+ |
+ v (detected within 24 h by daily sync job)
+ sync.yml merges upstream into main, pushes tag vX.Y.Z
+ |
+ v (tag push triggers build pipeline)
+ build.yml builds all backend/OS combinations in parallel
+ |
+ v
+ GitHub Release: "whisper.cpp vX.Y.Z — AMD Builds"
+ with 13 artifacts across all backends and OS targets
```
-## Word-level timestamp (experimental)
-
-The `--max-len` argument can be used to obtain word-level timestamps. Simply use `-ml 1`:
-
-```text
-$ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1
-
-whisper_model_load: loading model from './models/ggml-base.en.bin'
-...
-system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
-
-main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
-
-[00:00:00.000 --> 00:00:00.320]
-[00:00:00.320 --> 00:00:00.370] And
-[00:00:00.370 --> 00:00:00.690] so
-[00:00:00.690 --> 00:00:00.850] my
-[00:00:00.850 --> 00:00:01.590] fellow
-[00:00:01.590 --> 00:00:02.850] Americans
-[00:00:02.850 --> 00:00:03.300] ,
-[00:00:03.300 --> 00:00:04.140] ask
-[00:00:04.140 --> 00:00:04.990] not
-[00:00:04.990 --> 00:00:05.410] what
-[00:00:05.410 --> 00:00:05.660] your
-[00:00:05.660 --> 00:00:06.260] country
-[00:00:06.260 --> 00:00:06.600] can
-[00:00:06.600 --> 00:00:06.840] do
-[00:00:06.840 --> 00:00:07.010] for
-[00:00:07.010 --> 00:00:08.170] you
-[00:00:08.170 --> 00:00:08.190] ,
-[00:00:08.190 --> 00:00:08.430] ask
-[00:00:08.430 --> 00:00:08.910] what
-[00:00:08.910 --> 00:00:09.040] you
-[00:00:09.040 --> 00:00:09.320] can
-[00:00:09.320 --> 00:00:09.440] do
-[00:00:09.440 --> 00:00:09.760] for
-[00:00:09.760 --> 00:00:10.020] your
-[00:00:10.020 --> 00:00:10.510] country
-[00:00:10.510 --> 00:00:11.000] .
-```
+**Every release ships up to 14 artifacts:**
-## Speaker segmentation via tinydiarize (experimental)
-
-More information about this approach is available here: https://github.com/ggml-org/whisper.cpp/pull/1058
-
-Sample usage:
-
-```py
-# download a tinydiarize compatible model
-./models/download-ggml-model.sh small.en-tdrz
-
-# run as usual, adding the "-tdrz" command-line argument
-./build/bin/whisper-cli -f ./samples/a13.wav -m ./models/ggml-small.en-tdrz.bin -tdrz
-...
-main: processing './samples/a13.wav' (480000 samples, 30.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, tdrz = 1, timestamps = 1 ...
-...
-[00:00:00.000 --> 00:00:03.800] Okay Houston, we've had a problem here. [SPEAKER_TURN]
-[00:00:03.800 --> 00:00:06.200] This is Houston. Say again please. [SPEAKER_TURN]
-[00:00:06.200 --> 00:00:08.260] Uh Houston we've had a problem.
-[00:00:08.260 --> 00:00:11.320] We've had a main beam up on a volt. [SPEAKER_TURN]
-[00:00:11.320 --> 00:00:13.820] Roger main beam interval. [SPEAKER_TURN]
-[00:00:13.820 --> 00:00:15.100] Uh uh [SPEAKER_TURN]
-[00:00:15.100 --> 00:00:18.020] So okay stand, by thirteen we're looking at it. [SPEAKER_TURN]
-[00:00:18.020 --> 00:00:25.740] Okay uh right now uh Houston the uh voltage is uh is looking good um.
-[00:00:27.620 --> 00:00:29.940] And we had a a pretty large bank or so.
```
-
-## Karaoke-style movie generation (experimental)
-
-The [whisper-cli](examples/cli) example provides support for output of karaoke-style movies, where the
-currently pronounced word is highlighted. Use the `-owts` argument and run the generated bash script.
-This requires to have `ffmpeg` installed.
-
-Here are a few _"typical"_ examples:
-
-```bash
-./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts
-source ./samples/jfk.wav.wts
-ffplay ./samples/jfk.wav.mp4
+whisper-{version}-linux-rocm-gfx1151.tar.gz
+whisper-{version}-linux-rocm-gfx1150.tar.gz
+whisper-{version}-linux-rocm-gfx120X.tar.gz
+whisper-{version}-linux-rocm-gfx110X.tar.gz
+whisper-{version}-windows-rocm-gfx1151.zip
+whisper-{version}-windows-rocm-gfx1150.zip
+whisper-{version}-windows-rocm-gfx120X.zip
+whisper-{version}-windows-rocm-gfx110X.zip
+whisper-{version}-linux-vulkan-x86_64.tar.gz
+whisper-{version}-windows-vulkan-x64.zip
+whisper-{version}-windows-npu-x64.zip (may be absent if NPU runner offline)
+whisper-{version}-linux-cpu-x86_64.tar.gz
+whisper-{version}-windows-cpu-x64.zip
+whisper-{version}-darwin-metal-arm64.tar.gz
```
-https://user-images.githubusercontent.com/1991296/199337465-dbee4b5e-9aeb-48a3-b1c6-323ac4db5b2c.mp4
+> [!TIP]
+> **Linux APU out of VRAM despite free memory (gfx1150 / gfx1151)?**
+> Add `ttm.pages_limit=12582912` to your kernel command line (e.g. in GRUB), run `update-grub`, and reboot.
+> See the [TheRock FAQ](https://github.com/ROCm/TheRock/blob/main/docs/faq.md#gfx1151-strix-halo-specific-questions) for details.
---
-```bash
-./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts
-source ./samples/mm0.wav.wts
-ffplay ./samples/mm0.wav.mp4
-```
+## 🖥️ Local Builds (Windows)
-https://user-images.githubusercontent.com/1991296/199337504-cc8fd233-0cb7-4920-95f9-4227de3570aa.mp4
+Reproduce any CI build locally using the bundled PowerShell script. Produces identical artifacts to what CI publishes.
----
-
-```bash
-./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts
-source ./samples/gb0.wav.wts
-ffplay ./samples/gb0.wav.mp4
-```
+```powershell
+# Prerequisites: CMake, VS Build Tools 2022, 7-Zip, internet access
-https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a0cd-f28a317987ba.mp4
+# CPU only (~2 min, no GPU needed)
+.\scripts\local-build.ps1 -Backend cpu
----
+# Vulkan — requires Vulkan SDK from https://vulkan.lunarg.com
+.\scripts\local-build.ps1 -Backend vulkan
-## Video comparison of different models
+# ROCm for RDNA3 iGPU — downloads ROCm tarball (~2-4 GB, cached after first run)
+.\scripts\local-build.ps1 -Backend rocm -GfxTarget gfx1151
-Use the [scripts/bench-wts.sh](https://github.com/ggml-org/whisper.cpp/blob/master/scripts/bench-wts.sh) script to generate a video in the following format:
+# NPU — requires RyzenAI hardware + NPU driver >= .280
+.\scripts\local-build.ps1 -Backend npu
-```bash
-./scripts/bench-wts.sh samples/jfk.wav
-ffplay ./samples/jfk.wav.all.mp4
+# All backends, version-stamped artifacts placed in .\dist\
+.\scripts\local-build.ps1 -Backend all -Version 1.8.4
```
-https://user-images.githubusercontent.com/1991296/223206245-2d36d903-cf8e-4f09-8c3b-eb9f9c39d6fc.mp4
-
---
-## Benchmarks
-
-In order to have an objective comparison of the performance of the inference across different system configurations,
-use the [whisper-bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it
-took to execute it. The results are summarized in the following Github issue:
-
-[Benchmark results](https://github.com/ggml-org/whisper.cpp/issues/89)
+## 📦 Dependencies
-Additionally a script to run whisper.cpp with different models and audio files is provided [bench.py](scripts/bench.py).
+### Bundled in every release (no installation needed)
-You can run it with the following command, by default it will run against any standard model in the models folder.
+| Backend | What is included |
+|---|---|
+| ROCm | `amdhip64`, `rocblas`, `hipblaslt` + library data, LLVM runtime, all system deps; RPATH=`$ORIGIN` on Linux |
+| Vulkan | SPIR-V shaders embedded at build time; links against system Vulkan loader |
+| Metal | Uses macOS system Metal framework; no extra bundling needed |
+| NPU | FlexML Runtime DLLs (`flexmlrt/bin` + `flexmlrt/lib`) |
+| CPU | SDL2.dll included on Windows |
-```bash
-python3 scripts/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2
-```
+### Build-time only
-It is written in python with the intention of being easy to modify and extend for your benchmarking use case.
-
-It outputs a csv file with the results of the benchmarking.
-
-## `ggml` format
-
-The original models are converted to a custom binary format. This allows to pack everything needed into a single file:
-
-- model parameters
-- mel filters
-- vocabulary
-- weights
-
-You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script
-or manually from here:
-
-- https://huggingface.co/ggerganov/whisper.cpp
-
-For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or [models/README.md](models/README.md).
-
-## [Bindings](https://github.com/ggml-org/whisper.cpp/discussions/categories/bindings)
-
-- [x] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggml-org/whisper.cpp/discussions/310)
-- [x] JavaScript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggml-org/whisper.cpp/discussions/309)
- - React Native (iOS / Android): [whisper.rn](https://github.com/mybigday/whisper.rn)
-- [x] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggml-org/whisper.cpp/discussions/312)
-- [x] Java:
- - [GiviMAD/whisper-jni](https://github.com/GiviMAD/whisper-jni)
-- [x] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggml-org/whisper.cpp/discussions/507)
-- [x] Objective-C / Swift: [ggml-org/whisper.spm](https://github.com/ggml-org/whisper.spm) | [#313](https://github.com/ggml-org/whisper.cpp/discussions/313)
- - [exPHAT/SwiftWhisper](https://github.com/exPHAT/SwiftWhisper)
-- [x] .NET: | [#422](https://github.com/ggml-org/whisper.cpp/discussions/422)
- - [sandrohanea/whisper.net](https://github.com/sandrohanea/whisper.net)
- - [NickDarvey/whisper](https://github.com/NickDarvey/whisper)
-- [x] Python: | [#9](https://github.com/ggml-org/whisper.cpp/issues/9)
- - [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython)
- - [AIWintermuteAI/whispercpp](https://github.com/AIWintermuteAI/whispercpp) (Updated fork of aarnphm/whispercpp)
- - [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11)
- - [abdeladim-s/pywhispercpp](https://github.com/abdeladim-s/pywhispercpp) (Pybind11)
-- [x] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper)
-- [x] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity)
-
-## XCFramework
-The XCFramework is a precompiled version of the library for iOS, visionOS, tvOS,
-and macOS. It can be used in Swift projects without the need to compile the
-library from source. For example, the v1.7.5 version of the XCFramework can be
-used as follows:
-
-```swift
-// swift-tools-version: 5.10
-// The swift-tools-version declares the minimum version of Swift required to build this package.
-
-import PackageDescription
-
-let package = Package(
- name: "Whisper",
- targets: [
- .executableTarget(
- name: "Whisper",
- dependencies: [
- "WhisperFramework"
- ]),
- .binaryTarget(
- name: "WhisperFramework",
- url: "https://github.com/ggml-org/whisper.cpp/releases/download/v1.7.5/whisper-v1.7.5-xcframework.zip",
- checksum: "c7faeb328620d6012e130f3d705c51a6ea6c995605f2df50f6e1ad68c59c6c4a"
- )
- ]
-)
-```
+| Tool | Purpose |
+|---|---|
+| [whisper.cpp](https://github.com/ggerganov/whisper.cpp) | Upstream source |
+| [ROCm / TheRock](https://github.com/ROCm/TheRock) | HIP compiler + GPU runtime (tarball, not installed globally) |
+| [FlexML Runtime](https://github.com/lemonade-sdk/whisper.cpp/releases/tag/deps) | VitisAI NPU inference |
+| [Vulkan SDK](https://vulkan.lunarg.com/sdk/home) | GLSL to SPIR-V shader compilation |
+| [CMake >= 3.21](https://cmake.org/) | Build system |
+| [Ninja](https://ninja-build.org/) | Fast build backend (ROCm builds) |
+| [VS Build Tools 2022](https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2022) | Windows MSVC toolchain |
-## Voice Activity Detection (VAD)
-Support for Voice Activity Detection (VAD) can be enabled using the `--vad`
-argument to `whisper-cli`. In addition to this option a VAD model is also
-required.
-
-The way this works is that first the audio samples are passed through
-the VAD model which will detect speech segments. Using this information,
-only the speech segments that are detected are extracted from the original audio
-input and passed to whisper for processing. This reduces the amount of audio
-data that needs to be processed by whisper and can significantly speed up the
-transcription process.
-
-The following VAD models are currently supported:
-
-### Silero-VAD
-[Silero-vad](https://github.com/snakers4/silero-vad) is a lightweight VAD model
-written in Python that is fast and accurate.
-
-Models can be downloaded by running the following command on Linux or MacOS:
-```console
-$ ./models/download-vad-model.sh silero-v6.2.0
-Downloading ggml model silero-v6.2.0 from 'https://huggingface.co/ggml-org/whisper-vad' ...
-ggml-silero-v6.2.0.bin 100%[==============================================>] 864.35K --.-KB/s in 0.04s
-Done! Model 'silero-v6.2.0' saved in '/path/models/ggml-silero-v6.2.0.bin'
-You can now use it like this:
+---
- $ ./build/bin/whisper-cli -vm /path/models/ggml-silero-v6.2.0.bin --vad -f samples/jfk.wav -m models/ggml-base.en.bin
+## 🏗️ Repository Structure
```
-And the following command on Windows:
-```console
-> .\models\download-vad-model.cmd silero-v6.2.0
-Downloading vad model silero-v6.2.0...
-Done! Model silero-v6.2.0 saved in C:\Users\danie\work\ai\whisper.cpp\ggml-silero-v6.2.0.bin
-You can now use it like this:
-
-C:\path\build\bin\Release\whisper-cli.exe -vm C:\path\ggml-silero-v6.2.0.bin --vad -m models/ggml-base.en.bin -f samples\jfk.wav
-
+whisper.cpp-rocm/
+├── .github/
+│ └── workflows/
+│ ├── build.yml # All AMD backends — builds + publishes releases
+│ └── sync.yml # Daily upstream sync + auto-tagging
+├── ci/
+│ ├── resolve-rocm-version.sh # Resolves AMD tarball URL for a given ROCm version
+│ └── map-gpu-target.sh # Maps gfx110X/gfx120X shorthands to specific arch lists
+├── src/
+│ └── vitisai/
+│ ├── whisper-vitisai-encoder.h # VitisAI NPU encoder C interface
+│ └── whisper-vitisai-encoder.cpp # FlexML runtime integration
+├── scripts/
+│ └── local-build.ps1 # Local Windows build script (mirrors CI jobs exactly)
+├── ggml/ # GGML library (all GPU backends live here)
+├── src/ # whisper.cpp source (VitisAI hooks added)
+└── CMakeLists.txt # Adds -DWHISPER_VITISAI option
```
-To see a list of all available models, run the above commands without any
-arguments.
+---
-This model can be also be converted manually to ggml using the following command:
-```console
-$ python3 -m venv venv && source venv/bin/activate
-$ (venv) pip install silero-vad
-$ (venv) $ python models/convert-silero-vad-to-ggml.py --output models/silero.bin
-Saving GGML Silero-VAD model to models/silero-v6.2.0-ggml.bin
-```
-And it can then be used with whisper as follows:
-```console
-$ ./build/bin/whisper-cli \
- --file ./samples/jfk.wav \
- --model ./models/ggml-base.en.bin \
- --vad \
- --vad-model ./models/silero-v6.2.0-ggml.bin
-```
+## 📄 License
+
+This project is licensed under the MIT License — see [LICENSE](LICENSE) for details.
-### VAD Options
-
-* --vad-threshold: Threshold probability for speech detection. A probability
-for a speech segment/frame above this threshold will be considered as speech.
-
-* --vad-min-speech-duration-ms: Minimum speech duration in milliseconds. Speech
-segments shorter than this value will be discarded to filter out brief noise or
-false positives.
-
-* --vad-min-silence-duration-ms: Minimum silence duration in milliseconds. Silence
-periods must be at least this long to end a speech segment. Shorter silence
-periods will be ignored and included as part of the speech.
-
-* --vad-max-speech-duration-s: Maximum speech duration in seconds. Speech segments
-longer than this will be automatically split into multiple segments at silence
-points exceeding 98ms to prevent excessively long segments.
-
-* --vad-speech-pad-ms: Speech padding in milliseconds. Adds this amount of padding
-before and after each detected speech segment to avoid cutting off speech edges.
-
-* --vad-samples-overlap: Amount of audio to extend from each speech segment into
-the next one, in seconds (e.g., 0.10 = 100ms overlap). This ensures speech isn't
-cut off abruptly between segments when they're concatenated together.
-
-## Examples
-
-There are various examples of using the library for different projects in the [examples](examples) folder.
-Some of the examples are even ported to run in the browser using WebAssembly. Check them out!
-
-| Example | Web | Description |
-| --------------------------------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
-| [whisper-cli](examples/cli) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper |
-| [whisper-bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine |
-| [whisper-stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
-| [whisper-command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
-| [whisper-server](examples/server) | | HTTP transcription server with OAI-like API |
-| [whisper-talk-llama](examples/talk-llama) | | Talk with a LLaMA bot |
-| [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp |
-| [whisper.swiftui](examples/whisper.swiftui) | | SwiftUI iOS / macOS application using whisper.cpp |
-| [whisper.android](examples/whisper.android) | | Android mobile application using whisper.cpp |
-| [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
-| [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
-| [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggml-org/whisper.cpp/issues/185) |
-| [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
-| [wchess](examples/wchess) | [wchess.wasm](examples/wchess) | Voice-controlled chess |
-
-## [Discussions](https://github.com/ggml-org/whisper.cpp/discussions)
-
-If you have any kind of feedback about this project feel free to use the Discussions section and open a new topic.
-You can use the [Show and tell](https://github.com/ggml-org/whisper.cpp/discussions/categories/show-and-tell) category
-to share your own projects that use `whisper.cpp`. If you have a question, make sure to check the
-[Frequently asked questions (#126)](https://github.com/ggml-org/whisper.cpp/discussions/126) discussion.
+whisper.cpp is copyright Georgi Gerganov and contributors — [ggerganov/whisper.cpp](https://github.com/ggerganov/whisper.cpp).
+ROCm is copyright Advanced Micro Devices, Inc.
+VitisAI encoder copyright 2025 Advanced Micro Devices, Inc.
diff --git a/bindings/ruby/ext/ruby_whisper_context.c b/bindings/ruby/ext/ruby_whisper_context.c
index 6e38ead6321..9f326c47a5b 100644
--- a/bindings/ruby/ext/ruby_whisper_context.c
+++ b/bindings/ruby/ext/ruby_whisper_context.c
@@ -308,7 +308,7 @@ check_memory_view(rb_memory_view_t *memview)
rb_warn("currently only format \"f\" is supported for MemoryView, but given: %s", memview->format);
return false;
}
- if (memview->format != NULL && memview->ndim != 1) {
+ if (memview->ndim != 1) {
rb_warn("currently only 1 dimensional MemoryView is supported, but given: %zd", memview->ndim);
return false;
}
diff --git a/ci/map-gpu-target.sh b/ci/map-gpu-target.sh
new file mode 100755
index 00000000000..1e7de7c9fcf
--- /dev/null
+++ b/ci/map-gpu-target.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# Map a GFX target shorthand to specific GPU architectures for CMake.
+#
+# Usage:
+# source ci/map-gpu-target.sh
+#
+# Arguments:
+# gfx_target - GPU target (gfx1151, gfx1150, gfx110X, gfx120X, or specific)
+#
+# Outputs (exported):
+# MAPPED_GPU_TARGET - Semicolon-separated list of GPU architectures
+
+gfx_target="$1"
+
+if [ -z "$gfx_target" ]; then
+ echo "Usage: source ci/map-gpu-target.sh "
+ return 1 2>/dev/null || exit 1
+fi
+
+case "$gfx_target" in
+ gfx110X) MAPPED_GPU_TARGET="gfx1100;gfx1101;gfx1102" ;;
+ gfx120X) MAPPED_GPU_TARGET="gfx1200;gfx1201" ;;
+ *) MAPPED_GPU_TARGET="$gfx_target" ;;
+esac
+
+export MAPPED_GPU_TARGET
+echo "Mapped GPU target: $gfx_target -> $MAPPED_GPU_TARGET"
diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh
new file mode 100755
index 00000000000..cf3bccbe778
--- /dev/null
+++ b/ci/resolve-rocm-version.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+#
+# Resolve the ROCm tarball URL for a given platform and version.
+#
+# Uses AMD's official repo tarball distribution:
+# https://repo.amd.com/rocm/tarball/therock-dist-{platform}-{gfx_target}-{version}.tar.gz
+#
+# Usage:
+# source ci/resolve-rocm-version.sh
+#
+# Arguments:
+# platform - "linux" or "windows"
+# gfx_target - GPU target (defaults to gfx1151 if not specified or is a group target)
+# rocm_version - Specific version (e.g. 7.12.0, 7.2.1) - required, no "latest" auto-detection
+#
+# Outputs (exported):
+# ROCM_RESOLVED_VERSION - The resolved version string
+# ROCM_TARBALL_URL - The full URL to download
+
+platform="$1"
+gfx_target="$2"
+rocm_version="$3"
+
+if [ -z "$platform" ] || [ -z "$gfx_target" ] || [ -z "$rocm_version" ]; then
+ echo "Usage: source ci/resolve-rocm-version.sh "
+ return 1 2>/dev/null || exit 1
+fi
+
+# Validate that a specific version was provided (no "latest" auto-detection)
+if [ "$rocm_version" = "latest" ]; then
+ echo "ERROR: 'latest' auto-detection is not supported."
+ echo "Please specify a concrete ROCm version (e.g., 7.12.0, 7.2.1)."
+ echo "Available versions: https://repo.amd.com/rocm/tarball/"
+ return 1 2>/dev/null || exit 1
+fi
+
+# Validate version format (should be X.Y.Z or X.Y.ZaNNNNNNNN pattern)
+if ! echo "$rocm_version" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
+ echo "ERROR: Invalid ROCm version format: '$rocm_version'"
+ echo "Expected format: X.Y.Z (e.g., 7.12.0) or X.Y.ZaNNNNNNNN (e.g., 7.11.0a20251205)"
+ return 1 2>/dev/null || exit 1
+fi
+
+# Exact tarball names published at repo.amd.com/rocm/tarball/ for 7.12.0:
+# linux: gfx110X-all, gfx120X-all, gfx1150, gfx1151, gfx1152
+# windows: gfx110X-all, gfx120X-all, gfx1150, gfx1151, gfx1152
+case "$gfx_target" in
+ gfx110X) tarball_target="gfx110X-all" ;;
+ gfx120X) tarball_target="gfx120X-all" ;;
+ gfx1150) tarball_target="gfx1150" ;;
+ gfx1151) tarball_target="gfx1151" ;;
+ gfx1152) tarball_target="gfx1152" ;;
+ *) tarball_target="$gfx_target" ;;
+esac
+
+# Construct the AMD official repo URL
+ROCM_TARBALL_URL="https://repo.amd.com/rocm/tarball/therock-dist-${platform}-${tarball_target}-${rocm_version}.tar.gz"
+
+export ROCM_RESOLVED_VERSION="$rocm_version"
+echo "ROCm version: $ROCM_RESOLVED_VERSION"
+echo "ROCm URL: $ROCM_TARBALL_URL"
diff --git a/ci/run.sh b/ci/run.sh
index cbe28442e16..9f6d73d9c04 100644
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -79,6 +79,13 @@ if [ ! -z ${GG_BUILD_ROCM} ]; then
fi
CMAKE_EXTRA="${CMAKE_EXTRA} -DAMDGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS}"
+
+ # Set HIP environment if not already set
+ export HIP_PLATFORM=${HIP_PLATFORM:-amd}
+ export ROCM_PATH=${ROCM_PATH:-/opt/rocm}
+ export HIP_PATH=${HIP_PATH:-/opt/rocm}
+ export LD_LIBRARY_PATH=${ROCM_PATH}/lib:${LD_LIBRARY_PATH}
+ CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_PREFIX_PATH=${ROCM_PATH} -DCMAKE_HIP_COMPILER=${ROCM_PATH}/lib/llvm/bin/clang++"
fi
if [ ! -z ${GG_BUILD_SYCL} ]; then
@@ -223,7 +230,7 @@ function gg_run_ctest {
gg_check_build_requirements
(time cmake -DCMAKE_BUILD_TYPE=${mode} ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
- (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
+ (time make -j $(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
diff --git a/scripts/local-build.ps1 b/scripts/local-build.ps1
new file mode 100644
index 00000000000..db90dd5a747
--- /dev/null
+++ b/scripts/local-build.ps1
@@ -0,0 +1,395 @@
+<#
+.SYNOPSIS
+ Local build script for whisper-cpp-amd. Mirrors the GitHub Actions build.yml jobs for Windows.
+
+.DESCRIPTION
+ Builds one or more AMD backends locally, producing the same zip artifacts that CI publishes.
+
+.PARAMETER Backend
+ Which backend to build: cpu, vulkan, rocm, npu, all. Default: cpu
+
+.PARAMETER GfxTarget
+ ROCm GPU target. Default: gfx1151
+ Common: gfx1151, gfx1150, gfx1100, gfx1200
+
+.PARAMETER RocmVersion
+ ROCm version to download. Default: 7.12.0
+
+.PARAMETER OutputDir
+ Directory for final zip artifacts. Default: .\dist
+
+.PARAMETER BuildDir
+ CMake build directory prefix. Default: .\build-local
+
+.PARAMETER Version
+ Version string used in artifact filenames. Default: local
+
+.EXAMPLE
+ .\scripts\local-build.ps1 -Backend cpu
+ .\scripts\local-build.ps1 -Backend vulkan
+ .\scripts\local-build.ps1 -Backend rocm -GfxTarget gfx1151
+ .\scripts\local-build.ps1 -Backend npu
+ .\scripts\local-build.ps1 -Backend all -Version 1.8.4
+#>
+
+param(
+ [ValidateSet("cpu","vulkan","rocm","npu","all")]
+ [string]$Backend = "cpu",
+ [string]$GfxTarget = "gfx1151",
+ [string]$RocmVersion = "7.12.0",
+ [string]$OutputDir = ".\dist",
+ [string]$BuildDir = ".\build-local",
+ [string]$Version = "local"
+)
+
+Set-StrictMode -Version Latest
+$ErrorActionPreference = "Stop"
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+
+function Write-Step([string]$msg) {
+ Write-Host ""
+ Write-Host "================================================" -ForegroundColor Cyan
+ Write-Host " $msg" -ForegroundColor Cyan
+ Write-Host "================================================" -ForegroundColor Cyan
+}
+
+function Write-Ok([string]$msg) { Write-Host " [OK] $msg" -ForegroundColor Green }
+function Write-Info([string]$msg) { Write-Host " --> $msg" -ForegroundColor Yellow }
+function Write-Fail([string]$msg) { Write-Host " [X] $msg" -ForegroundColor Red }
+
+function Require-Command([string]$cmd) {
+ if (-not (Get-Command $cmd -ErrorAction SilentlyContinue)) {
+ Write-Fail "$cmd not found in PATH"
+ throw "Missing requirement: $cmd"
+ }
+ Write-Ok "$cmd found"
+}
+
+function Download-SDL2 {
+ param([string]$Ver = "2.28.5")
+ $sdlDir = "SDL2-$Ver"
+ if (Test-Path $sdlDir) {
+ Write-Info "SDL2 already extracted at $sdlDir"
+ } else {
+ Write-Info "Downloading SDL2 $Ver ..."
+ $url = "https://github.com/libsdl-org/SDL/releases/download/release-$Ver/SDL2-devel-$Ver-VC.zip"
+ Invoke-WebRequest -Uri $url -OutFile "sdl2.zip"
+ 7z x sdl2.zip -y | Out-Null
+ Remove-Item sdl2.zip
+
+ # Patch SDL_endian.h (needed for AMD clang compatibility)
+ $hdr = Get-ChildItem -Recurse -Filter "SDL_endian.h" | Select-Object -First 1
+ if ($hdr) {
+ $content = Get-Content $hdr.FullName -Raw
+ if ($content -match 'extern void _m_prefetch') {
+ $patched = $content -replace 'extern void _m_prefetch\(void \*__P\);', '// extern void _m_prefetch(void *__P);'
+ Set-Content -Path $hdr.FullName -Value $patched -NoNewline
+ Write-Ok "Patched SDL_endian.h"
+ }
+ }
+ }
+ $cmake = Get-ChildItem -Recurse -Filter "sdl2-config.cmake" | Select-Object -First 1
+ if (-not $cmake) { throw "sdl2-config.cmake not found after SDL2 extraction" }
+ return $cmake.DirectoryName
+}
+
+function Package-Build {
+ param([string]$Name, [string]$BinPath)
+ New-Item -ItemType Directory -Force -Path $OutputDir | Out-Null
+ $zip = Join-Path $OutputDir "$Name.zip"
+ Write-Info "Creating $zip ..."
+ Compress-Archive -Path "$BinPath\*" -DestinationPath $zip -Force
+ $mb = [math]::Round((Get-Item $zip).Length / 1MB, 2)
+ Write-Ok "Created $zip ($mb MB)"
+ return $zip
+}
+
+function Run-MSBuild {
+ param([string]$Dir, [string[]]$ConfigArgs, [string]$Config = "Release", [string]$Arch = "x64")
+ Write-Info "CMake configure ..."
+ & cmake -S . -B $Dir @ConfigArgs
+ if ($LASTEXITCODE -ne 0) { throw "CMake configure failed (exit $LASTEXITCODE)" }
+ Write-Info "MSBuild $Config ..."
+ & cmake --build $Dir --config $Config -j $env:NUMBER_OF_PROCESSORS
+ if ($LASTEXITCODE -ne 0) { throw "Build failed (exit $LASTEXITCODE)" }
+}
+
+# ── Preflight ─────────────────────────────────────────────────────────────────
+
+if (-not (Test-Path "CMakeLists.txt") -or -not (Test-Path "src\whisper.cpp")) {
+ Write-Fail "Run this script from the whisper-cpp-amd repo root."
+ exit 1
+}
+
+Require-Command cmake
+New-Item -ItemType Directory -Force -Path $OutputDir | Out-Null
+
+# ── Build functions ───────────────────────────────────────────────────────────
+
+function Build-CPU {
+ Write-Step "CPU - Windows x64"
+ Require-Command msbuild
+
+ $SDL2_DIR = Download-SDL2
+ $dir = "$BuildDir-cpu"
+
+ Run-MSBuild $dir @(
+ "-A", "x64",
+ "-DCMAKE_BUILD_TYPE=Release",
+ "-DBUILD_SHARED_LIBS=ON",
+ "-DWHISPER_SDL2=ON",
+ "-DSDL2_DIR=$SDL2_DIR"
+ )
+
+ $sdl2dll = Get-ChildItem -Path "SDL2-*\lib\x64\SDL2.dll" -ErrorAction SilentlyContinue | Select-Object -First 1
+ if ($sdl2dll) { Copy-Item $sdl2dll.FullName "$dir\bin\Release\" -Force }
+
+ $zip = Package-Build "whisper-$Version-windows-cpu-x64" "$dir\bin\Release"
+ Write-Ok "CPU build done. Artifact: $zip"
+}
+
+function Build-Vulkan {
+ Write-Step "Vulkan - Windows x64"
+ Require-Command msbuild
+
+ # Locate Vulkan SDK
+ $VULKAN_SDK = $env:VULKAN_SDK
+ if (-not $VULKAN_SDK) {
+ $sdkDir = Get-ChildItem "C:\VulkanSDK" -ErrorAction SilentlyContinue |
+ Sort-Object Name -Descending | Select-Object -First 1
+ if (-not $sdkDir) {
+ Write-Fail "Vulkan SDK not found. Install from https://vulkan.lunarg.com/sdk/home"
+ throw "Missing Vulkan SDK"
+ }
+ $VULKAN_SDK = $sdkDir.FullName
+ }
+ Write-Ok "Vulkan SDK: $VULKAN_SDK"
+
+ $SDL2_DIR = Download-SDL2
+ $dir = "$BuildDir-vulkan"
+
+ Run-MSBuild $dir @(
+ "-A", "x64",
+ "-DCMAKE_BUILD_TYPE=Release",
+ "-DBUILD_SHARED_LIBS=ON",
+ "-DGGML_VULKAN=ON",
+ "-DWHISPER_SDL2=ON",
+ "-DSDL2_DIR=$SDL2_DIR",
+ "-DVULKAN_SDK=$VULKAN_SDK"
+ )
+
+ $sdl2dll = Get-ChildItem -Path "SDL2-*\lib\x64\SDL2.dll" -ErrorAction SilentlyContinue | Select-Object -First 1
+ if ($sdl2dll) { Copy-Item $sdl2dll.FullName "$dir\bin\Release\" -Force }
+
+ $zip = Package-Build "whisper-$Version-windows-vulkan-x64" "$dir\bin\Release"
+ Write-Ok "Vulkan build done. Artifact: $zip"
+}
+
+function Build-ROCm {
+ Write-Step "ROCm - Windows x64 (target: $GfxTarget)"
+ Require-Command ninja
+
+ # ── Download ROCm tarball ──────────────────────────────────────────────
+ $rocmRoot = "C:\opt\rocm"
+ if (-not (Test-Path "$rocmRoot\bin\amdclang.exe")) {
+ Write-Info "Downloading ROCm $RocmVersion for $GfxTarget (2-4 GB, takes a few minutes) ..."
+
+ # Replicate resolve-rocm-version.sh: group targets use gfx1151 as the base tarball
+ $baseTarget = $GfxTarget
+ if ($GfxTarget -in @("gfx110X","gfx120X","gfx1150","gfx1100")) {
+ $baseTarget = "gfx1151"
+ }
+ $tarballUrl = "https://repo.amd.com/rocm/tarball/therock-dist-windows-${baseTarget}-${RocmVersion}.tar.gz"
+ Write-Info "URL: $tarballUrl"
+
+ Invoke-WebRequest -Uri $tarballUrl -OutFile rocm.tar.gz
+ New-Item -ItemType Directory -Force -Path $rocmRoot | Out-Null
+ & tar -xzf rocm.tar.gz -C $rocmRoot --strip-components=1
+ if ($LASTEXITCODE -ne 0) { throw "ROCm extraction failed" }
+ Remove-Item rocm.tar.gz
+ Write-Ok "ROCm extracted to $rocmRoot"
+ } else {
+ Write-Ok "ROCm already present at $rocmRoot"
+ }
+
+ # ── Map GFX target (mirrors map-gpu-target.sh) ─────────────────────────
+ $mappedTarget = switch ($GfxTarget) {
+ "gfx110X" { "gfx1100;gfx1101;gfx1102" }
+ "gfx120X" { "gfx1200;gfx1201" }
+ default { $GfxTarget }
+ }
+ Write-Info "GPU target: $GfxTarget -> $mappedTarget"
+
+ $SDL2_DIR = Download-SDL2
+
+ # ── Set ROCm env ──────────────────────────────────────────────────────
+ $env:HIP_PATH = $rocmRoot
+ $env:HIP_PLATFORM = "amd"
+ $env:PATH = "$rocmRoot\bin;$rocmRoot\lib\llvm\bin;$env:PATH"
+
+ # ── Configure ─────────────────────────────────────────────────────────
+ $dir = "$BuildDir-rocm-$GfxTarget"
+ Write-Info "CMake configure (Ninja Multi-Config) ..."
+ & cmake -S . -B $dir `
+ -G "Ninja Multi-Config" `
+ "-DGPU_TARGETS=$mappedTarget" `
+ -DGGML_HIP=ON `
+ "-DCMAKE_C_COMPILER=$rocmRoot/lib/llvm/bin/amdclang.exe" `
+ "-DCMAKE_CXX_COMPILER=$rocmRoot/lib/llvm/bin/amdclang++.exe" `
+ "-DCMAKE_HIP_COMPILER=$rocmRoot/lib/llvm/bin/amdclang++.exe" `
+ "-DCMAKE_C_FLAGS=-D__PRFCHWINTRIN_H" `
+ "-DCMAKE_CXX_FLAGS=-D__PRFCHWINTRIN_H" `
+ "-DCMAKE_HIP_FLAGS=--rocm-path=$rocmRoot" `
+ "-DCMAKE_PREFIX_PATH=$rocmRoot" `
+ -DCMAKE_BUILD_TYPE=Release `
+ -DBUILD_SHARED_LIBS=ON `
+ -DWHISPER_SDL2=ON `
+ "-DSDL2_DIR=$SDL2_DIR"
+ if ($LASTEXITCODE -ne 0) { throw "CMake configure failed" }
+
+ Write-Info "Building ..."
+ & cmake --build $dir --config Release -j $env:NUMBER_OF_PROCESSORS
+ if ($LASTEXITCODE -ne 0) { throw "Build failed" }
+
+ # ── Copy ROCm DLLs ────────────────────────────────────────────────────
+ $binOut = "$dir\bin\Release"
+ $rocBin = "$rocmRoot\bin"
+ Write-Info "Copying ROCm DLLs ..."
+ @("amdhip64_*.dll","amd_comgr*.dll","libhipblas.dll","rocblas.dll",
+ "rocsolver.dll","hipblaslt.dll","libhipblaslt.dll","hipblas.dll") | ForEach-Object {
+ Get-ChildItem $rocBin -Name $_ -ErrorAction SilentlyContinue |
+ ForEach-Object { Copy-Item (Join-Path $rocBin $_) (Join-Path $binOut $_) -Force }
+ }
+ $rocblasLib = Join-Path $rocBin "rocblas\library"
+ if (Test-Path $rocblasLib) {
+ Copy-Item $rocblasLib -Destination (Join-Path $binOut "rocblas\library") -Recurse -Force
+ }
+ $hipblasltLib = Join-Path $rocBin "hipblaslt\library"
+ if (Test-Path $hipblasltLib) {
+ Copy-Item $hipblasltLib -Destination (Join-Path $binOut "hipblaslt\library") -Recurse -Force
+ }
+
+ $sdl2dll = Get-ChildItem -Path "SDL2-*\lib\x64\SDL2.dll" -ErrorAction SilentlyContinue | Select-Object -First 1
+ if ($sdl2dll) { Copy-Item $sdl2dll.FullName $binOut -Force }
+
+ $zip = Package-Build "whisper-$Version-windows-rocm-$GfxTarget" $binOut
+ Write-Ok "ROCm build done. Artifact: $zip"
+}
+
+function Build-NPU {
+ Write-Step "NPU (VitisAI / RyzenAI) - Windows x64"
+ Require-Command msbuild
+
+ # ── FlexML Runtime ────────────────────────────────────────────────────
+ $flexmlDir = Get-ChildItem -Directory | Where-Object { $_.Name -like "flexmlrt*" } | Select-Object -First 1
+ if (-not $flexmlDir) {
+ Write-Info "Downloading FlexML Runtime ..."
+ $url = "https://github.com/lemonade-sdk/whisper.cpp/releases/download/deps/flexmlrt1.7.0-win.zip"
+ Invoke-WebRequest -Uri $url -OutFile flexmlrt.zip
+ if (-not (Test-Path "flexmlrt.zip") -or (Get-Item "flexmlrt.zip").Length -eq 0) {
+ throw "flexmlrt.zip download failed or is empty"
+ }
+ $mb = [math]::Round((Get-Item "flexmlrt.zip").Length / 1MB, 2)
+ Write-Ok "Downloaded FlexML: $mb MB"
+
+ & tar xvf flexmlrt.zip
+ if ($LASTEXITCODE -ne 0) { throw "FlexML extraction failed" }
+ Remove-Item flexmlrt.zip
+
+ $flexmlDir = Get-ChildItem -Directory | Where-Object { $_.Name -like "flexmlrt*" } | Select-Object -First 1
+ if (-not $flexmlDir) { throw "No flexmlrt directory found after extraction" }
+ }
+ Write-Ok "FlexML Runtime: $($flexmlDir.FullName)"
+
+ # ── Run setup.bat via a temporary cmd script ───────────────────────────
+ # cmd /c with && is not reliable from PowerShell; use a temp .bat file instead
+ $tempBat = [System.IO.Path]::GetTempFileName() + ".bat"
+ $setupPath = Join-Path $flexmlDir.FullName "setup.bat"
+ Set-Content -Path $tempBat -Value "@echo off`r`ncall `"$setupPath`"`r`nif errorlevel 1 exit /b 1`r`necho FLEXML_OK"
+ Write-Info "Running FlexML setup.bat ..."
+ $setupOut = & cmd /c $tempBat 2>&1
+ Remove-Item $tempBat -ErrorAction SilentlyContinue
+
+ if ($LASTEXITCODE -ne 0 -or ($setupOut -notmatch "FLEXML_OK")) {
+ Write-Fail "FlexML setup.bat failed. Output:"
+ $setupOut | ForEach-Object { Write-Host " $_" }
+ throw "FlexML setup failed. Ensure NPU drivers (>= .280) are installed."
+ }
+ Write-Ok "FlexML environment configured"
+
+ # ── CMake configure + build ───────────────────────────────────────────
+ $dir = "$BuildDir-npu"
+ Write-Info "CMake configure with -DWHISPER_VITISAI=ON ..."
+ & cmake -B $dir -A x64 -DCMAKE_BUILD_TYPE=Release -DWHISPER_VITISAI=ON
+ if ($LASTEXITCODE -ne 0) { throw "CMake configure failed" }
+
+ Write-Info "Building ..."
+ & cmake --build $dir --config Release -j $env:NUMBER_OF_PROCESSORS
+ if ($LASTEXITCODE -ne 0) { throw "Build failed" }
+
+ # ── List output ───────────────────────────────────────────────────────
+ $binOut = "$dir\bin\Release"
+ if (Test-Path $binOut) {
+ Write-Info "Build output:"
+ Get-ChildItem $binOut | Format-Table Name, Length -AutoSize
+ } else {
+ throw "Expected output directory $binOut not found"
+ }
+
+ # ── Copy FlexML DLLs ─────────────────────────────────────────────────
+ Write-Info "Copying FlexML DLLs ..."
+ $copied = 0
+ foreach ($sub in @("bin", "lib")) {
+ $subPath = Join-Path $flexmlDir.FullName $sub
+ if (Test-Path $subPath) {
+ $dlls = Get-ChildItem "$subPath\*.dll" -ErrorAction SilentlyContinue
+ if ($dlls) {
+ Copy-Item $dlls.FullName $binOut -Force
+ $copied += $dlls.Count
+ }
+ }
+ }
+ Write-Ok "Copied $copied FlexML DLLs"
+
+ $zip = Package-Build "whisper-$Version-windows-npu-x64" $binOut
+ Write-Ok "NPU build done. Artifact: $zip"
+ Write-Info "To run: place the .rai encoder model next to your ggml-*.bin and run whisper-cli.exe normally."
+}
+
+# ── Main dispatch ─────────────────────────────────────────────────────────────
+
+$targets = if ($Backend -eq "all") { @("cpu","vulkan","rocm","npu") } else { @($Backend) }
+$results = [ordered]@{}
+
+foreach ($t in $targets) {
+ try {
+ switch ($t) {
+ "cpu" { Build-CPU }
+ "vulkan" { Build-Vulkan }
+ "rocm" { Build-ROCm }
+ "npu" { Build-NPU }
+ }
+ $results[$t] = "[OK] PASSED"
+ } catch {
+ Write-Fail "[$t] failed: $_"
+ $results[$t] = "[FAIL] $_"
+ }
+}
+
+# ── Summary ───────────────────────────────────────────────────────────────────
+
+Write-Step "Build Summary"
+foreach ($t in $targets) {
+ $color = if ($results[$t].StartsWith("[OK]")) { "Green" } else { "Red" }
+ Write-Host " $t : $($results[$t])" -ForegroundColor $color
+}
+
+Write-Host ""
+Write-Host "Artifacts in: $(Resolve-Path $OutputDir)" -ForegroundColor Cyan
+if (Test-Path $OutputDir) {
+ Get-ChildItem $OutputDir -Filter "*.zip" | ForEach-Object {
+ $mb = [math]::Round($_.Length / 1MB, 2)
+ Write-Host " $($_.Name) ($mb MB)"
+ }
+}
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 095a2791de5..fe10876eaf7 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -48,6 +48,10 @@ if (WHISPER_OPENVINO)
find_package(OpenVINO REQUIRED COMPONENTS Runtime)
endif()
+if (WHISPER_VITISAI)
+ find_package(FlexmlRT REQUIRED)
+endif()
+
#
# libraries
#
@@ -101,6 +105,30 @@ if (WHISPER_OPENVINO)
set_target_properties(${TARGET} PROPERTIES FOLDER "libs")
endif()
+if (WHISPER_VITISAI)
+ set(TARGET whisper.vitisai)
+
+ add_library(${TARGET} OBJECT
+ vitisai/whisper-vitisai-encoder.h
+ vitisai/whisper-vitisai-encoder.cpp
+ )
+
+ target_include_directories(${TARGET} PUBLIC
+ .
+ )
+
+ set_property(TARGET ${TARGET} PROPERTY POSITION_INDEPENDENT_CODE ON)
+ set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_VITISAI)
+
+ # C++17 required for MSVC (FlexML headers use structured bindings etc.)
+ if (MSVC)
+ target_compile_options(${TARGET} PRIVATE /std:c++17)
+ endif()
+
+ target_link_libraries(${TARGET} PRIVATE ggml flexmlrt::flexmlrt)
+ set_target_properties(${TARGET} PROPERTIES FOLDER "libs")
+endif()
+
# whisper
add_library(whisper
@@ -137,6 +165,10 @@ if (WHISPER_OPENVINO)
target_link_libraries(whisper PRIVATE whisper.openvino)
endif()
+if (WHISPER_VITISAI)
+ target_link_libraries(whisper PRIVATE whisper.vitisai)
+endif()
+
if (WHISPER_MKL)
target_link_libraries(whisper PRIVATE MKL::MKL)
endif()
diff --git a/src/vitisai/whisper-vitisai-encoder.cpp b/src/vitisai/whisper-vitisai-encoder.cpp
new file mode 100644
index 00000000000..a6d20a88c9a
--- /dev/null
+++ b/src/vitisai/whisper-vitisai-encoder.cpp
@@ -0,0 +1,204 @@
+// Copyright(C) 2025 Advanced Micro Devices, Inc. All rights reserved.
+#include "vitisai/whisper-vitisai-encoder.h"
+#include "FlexMLClient.h"
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#include
+#include
+#ifdef _WIN32
+ #include
+#else
+ #include
+ #include
+ #include
+#endif
+#include
+#include
+
+struct whisper_vitisai_context {
+ std::string model_path;
+ std::shared_ptr runner;
+ uint8_t * fbs_buffer;
+ size_t fbs_buffer_size;
+};
+
+// Function to mmap rai file for Linux and MapViewOfFile for Windows
+bool map_rai_file(const char * path, uint8_t ** buffer, size_t * size) {
+#ifdef _WIN32
+ // Open the file
+ HANDLE hFile = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+ if (hFile == INVALID_HANDLE_VALUE) {
+ std::fprintf(stderr, "%s: %d: Failed to open rai file '%s'\n", __func__, __LINE__, path);
+ return false;
+ }
+
+ // Get the file size
+ LARGE_INTEGER fileSize;
+ if (!GetFileSizeEx(hFile, &fileSize)) {
+ CloseHandle(hFile);
+ std::fprintf(stderr, "%s: %d: Failed to get file size for rai file '%s'\n", __func__, __LINE__, path);
+ return false;
+ }
+
+ // Create a file mapping object
+ HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, fileSize.QuadPart, NULL);
+ if (hMapping == NULL) {
+ CloseHandle(hFile);
+ std::fprintf(stderr, "%s: %d: Failed to create file mapping for rai file '%s'\n", __func__, __LINE__, path);
+ return false;
+ }
+
+ // Map the file
+ *buffer = (uint8_t *)MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, fileSize.QuadPart);
+ if (*buffer == NULL) {
+ CloseHandle(hMapping);
+ CloseHandle(hFile);
+ std::fprintf(stderr, "%s: %d: Failed to map rai file '%s'\n", __func__, __LINE__, path);
+ return false;
+ }
+ *size = fileSize.QuadPart;
+ return true;
+#else
+ // Open the file
+ FILE * fd = fopen(path, "rb");
+ if (!fd) {
+ std::fprintf(stderr, "%s: %d: Failed to open rai file '%s'\n", __func__, __LINE__, path);
+ return false;
+ }
+
+ // Get the file size
+ struct stat st;
+ if (fstat(fileno(fd), &st) == -1) {
+ fclose(fd);
+ std::fprintf(stderr, "%s: %d: Failed to get file size for rai file '%s'\n", __func__, __LINE__, path);
+ return false;
+ }
+
+ // Mmap the file
+ *buffer = (uint8_t *)mmap(nullptr, st.st_size, PROT_READ, MAP_SHARED, fileno(fd), 0);
+ if (*buffer == MAP_FAILED) {
+ fclose(fd);
+ std::fprintf(stderr, "%s: %d: Failed to mmap rai file '%s'\n", __func__, __LINE__, path);
+ return false;
+ }
+ *size = st.st_size;
+ return true;
+#endif // _WIN32
+}
+
+void unmap_rai_file(uint8_t * buffer, size_t size) {
+#ifdef _WIN32
+ UnmapViewOfFile(buffer);
+#else
+ munmap(buffer, size);
+#endif // _WIN32
+}
+
+struct whisper_vitisai_context * whisper_vitisai_init(const char * path_model) {
+ if (!path_model) {
+ std::fprintf(stderr, "%s: path_model is null\n", __func__);
+ return nullptr;
+ }
+
+ auto * ctx = new whisper_vitisai_context;
+ ctx->model_path = path_model;
+
+ // Override the model path with the environment variable if it is set
+ if (const char * env_model_path = std::getenv("OVERRIDE_VITISAI_MODEL_PATH")) {
+ if (env_model_path[0] != '\0') {
+ ctx->model_path = env_model_path;
+ }
+ }
+
+ // Step 1: Set up the model
+ flexmlrt::client::Options options;
+ options.modelPath = ctx->model_path;
+ options.deviceName = "stx";
+ options.debug = false;
+ options.executeMode = 2;
+ options.extOptions["ai_analyzer_profiling"] = true; // Enable AIA profiling
+ options.extOptions["enable_preemption"] = true;
+
+ // Check if model_path is rai file and if so, add fbs_buffer and fbs_buffer_size to the options
+ if (ctx->model_path.find(".rai") != std::string::npos) {
+ // mmap rai file for both Linux and Windows and pass the buffer to the options
+ ctx->fbs_buffer = nullptr;
+ ctx->fbs_buffer_size = 0;
+ if (map_rai_file(ctx->model_path.c_str(), &ctx->fbs_buffer, &ctx->fbs_buffer_size)) {
+ options.extOptions["fbs_buffer"] = ctx->fbs_buffer;
+ options.extOptions["fbs_buffer_size"] = ctx->fbs_buffer_size;
+ options.subgraphName = "vaiml_par_0";
+ options.extOptions["cache_dir"] = std::string(".");
+ } else {
+ std::fprintf(stderr, "%s: Failed to mmap rai file '%s'\n", __func__, ctx->model_path.c_str());
+ delete ctx;
+ return nullptr;
+ }
+ }
+
+ try {
+ ctx->runner = std::make_shared(options);
+
+ if (!ctx->runner->good()) {
+ throw std::runtime_error("Runner creation ran into an error");
+ }
+ } catch (const std::exception & e) {
+ std::fprintf(stderr, "%s: Exception during Vitis AI runner creation: %s\n", __func__, e.what());
+ delete ctx;
+ return nullptr;
+ }
+ return ctx;
+}
+
+void whisper_vitisai_free(struct whisper_vitisai_context * ctx) {
+ if (!ctx) {
+ return;
+ }
+
+ std::fprintf(stderr, "%s: releasing Vitis AI encoder context for model '%s'\n", __func__, ctx->model_path.c_str());
+ if (ctx->fbs_buffer) {
+ unmap_rai_file(ctx->fbs_buffer, ctx->fbs_buffer_size);
+ }
+ delete ctx;
+}
+
+int whisper_vitisai_encode(struct whisper_vitisai_context * ctx, struct ggml_tensor * mel, struct ggml_tensor * out) {
+ if (!ctx || !mel || !out) {
+ std::fprintf(stderr, "%s: ctx/mel/out must not be null\n", __func__);
+ return 0;
+ }
+
+ if (ggml_n_dims(mel) != 2) {
+ std::fprintf(stderr, "%s: mel tensor expected to have 2 dims, got %d\n", __func__, ggml_n_dims(mel));
+ return 0;
+ }
+
+ if (ggml_n_dims(out) != 2) {
+ std::fprintf(stderr, "%s: out tensor expected to have 2 dims, got %d\n", __func__, ggml_n_dims(out));
+ return 0;
+ }
+
+ // setup input and output tensors for Vitis AI model
+ std::vector input_tensors, output_tensors;
+ auto model = ctx->runner;
+
+ // Get tensors as CPU tensors (hwTensor = false)
+ input_tensors = model->getIOTensors("input", false);
+ output_tensors = model->getIOTensors("output", false);
+
+ // TODO: add assert checks for tensor numbers and shapes
+
+ input_tensors[0].data = mel->data;
+ output_tensors[0].data = out->data;
+
+ try {
+ model->forward(input_tensors, output_tensors);
+ std::fprintf(stdout, "%s: Vitis AI model inference completed.\n", __func__);
+ } catch (const std::exception & e) {
+ std::fprintf(stderr, "%s: Exception during model inference: %s\n", __func__, e.what());
+ return 0;
+ }
+
+ return 1;
+}
diff --git a/src/vitisai/whisper-vitisai-encoder.h b/src/vitisai/whisper-vitisai-encoder.h
new file mode 100644
index 00000000000..05dc812be88
--- /dev/null
+++ b/src/vitisai/whisper-vitisai-encoder.h
@@ -0,0 +1,32 @@
+// Copyright(C) 2025 Advanced Micro Devices, Inc. All rights reserved.
+
+#pragma once
+
+#include
+#include
+#include
+
+#if __cplusplus
+extern "C" {
+#endif
+
+struct whisper_vitisai_context;
+
+struct whisper_vitisai_context * whisper_vitisai_init(const char * path_model);
+void whisper_vitisai_free(struct whisper_vitisai_context * ctx);
+
+// Function to mmap rai file for Linux and MapViewOfFile for Windows
+bool map_rai_file(const char * path, uint8_t ** buffer, size_t * size);
+// Function to unmap rai file for Linux and UnmapViewOfFile for Windows
+void unmap_rai_file(uint8_t * buffer, size_t size);
+
+struct ggml_tensor;
+
+int whisper_vitisai_encode(
+ struct whisper_vitisai_context * ctx,
+ struct ggml_tensor * mel,
+ struct ggml_tensor * out);
+
+#if __cplusplus
+}
+#endif
diff --git a/src/whisper.cpp b/src/whisper.cpp
index 2f356da0f06..a038a5959ea 100644
--- a/src/whisper.cpp
+++ b/src/whisper.cpp
@@ -14,6 +14,10 @@
#include "openvino/whisper-openvino-encoder.h"
#endif
+#ifdef WHISPER_USE_VITISAI
+#include "vitisai/whisper-vitisai-encoder.h"
+#endif
+
#include
#include
#include
@@ -903,6 +907,10 @@ struct whisper_state {
whisper_openvino_context * ctx_openvino = nullptr;
#endif
+#ifdef WHISPER_USE_VITISAI
+ whisper_vitisai_context * ctx_vitisai = nullptr;
+#endif
+
// [EXPERIMENTAL] token-level timestamps data
int64_t t_beg = 0;
int64_t t_last = 0;
@@ -1970,7 +1978,13 @@ static bool whisper_encode_external(const whisper_state & wstate) {
const bool use_openvino = wstate.ctx_openvino != nullptr;
#endif
- return use_coreml || use_openvino;
+#ifndef WHISPER_USE_VITISAI
+ const bool use_vitisai = false;
+#else
+ const bool use_vitisai = wstate.ctx_vitisai != nullptr;
+#endif
+
+ return use_coreml || use_openvino || use_vitisai;
}
static struct ggml_cgraph * whisper_build_graph_conv(
@@ -2411,6 +2425,8 @@ static bool whisper_encode_internal(
#if defined(WHISPER_USE_COREML)
whisper_coreml_encode(wstate.ctx_coreml, mel->ne[0], mel->ne[1], (float *) mel->data, (float *) wstate.embd_enc->data);
+#elif defined(WHISPER_USE_VITISAI)
+ whisper_vitisai_encode(wstate.ctx_vitisai, mel, wstate.embd_enc);
#elif defined(WHISPER_USE_OPENVINO)
whisper_openvino_encode(wstate.ctx_openvino, mel, wstate.embd_enc);
#endif
@@ -3346,6 +3362,20 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) {
}
#endif
+#ifdef WHISPER_USE_VITISAI
+// replace extension with Vitis AI encoder artifact (.rai)
+static std::string whisper_get_vitisai_path_encoder_cache(std::string path_bin) {
+ auto pos = path_bin.rfind('.');
+ if (pos != std::string::npos) {
+ path_bin = path_bin.substr(0, pos);
+ }
+
+ path_bin += "-encoder-vitisai.rai";
+
+ return path_bin;
+}
+#endif
+
#ifdef WHISPER_USE_OPENVINO
// replace .bin with-encoder-openvino.xml
static std::string whisper_openvino_get_path_encoder(std::string path_bin) {
@@ -3455,6 +3485,19 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
}
#endif
+#ifdef WHISPER_USE_VITISAI
+ const auto path_vitisai = whisper_get_vitisai_path_encoder_cache(ctx->path_model);
+
+ state->ctx_vitisai = whisper_vitisai_init(path_vitisai.c_str());
+ if (!state->ctx_vitisai) {
+ WHISPER_LOG_ERROR("%s: failed to load Vitis AI model from '%s'\n", __func__, path_vitisai.c_str());
+ whisper_free_state(state);
+ return nullptr;
+ } else {
+ WHISPER_LOG_INFO("%s: Vitis AI model loaded\n", __func__);
+ }
+#endif
+
state->logits.reserve(ctx->vocab.n_vocab * ctx->model.hparams.n_text_ctx);
state->batch = whisper_batch_init(ctx->model.hparams.n_text_ctx, WHISPER_MAX_DECODERS);
@@ -3821,6 +3864,13 @@ void whisper_free_state(struct whisper_state * state) {
}
#endif
+#ifdef WHISPER_USE_VITISAI
+ if (state->ctx_vitisai != nullptr) {
+ whisper_vitisai_free(state->ctx_vitisai);
+ state->ctx_vitisai = nullptr;
+ }
+#endif
+
whisper_batch_free(state->batch);
ggml_backend_sched_free(state->sched_conv.sched);
@@ -4312,11 +4362,20 @@ static int whisper_has_openvino(void) {
#endif
}
+static int whisper_has_vitisai(void) {
+#ifdef WHISPER_USE_VITISAI
+ return 1;
+#else
+ return 0;
+#endif
+}
+
const char * whisper_print_system_info(void) {
static std::string s;
s = "";
s += "WHISPER : ";
+ s += "VITISAI = " + std::to_string(whisper_has_vitisai()) + " | ";
s += "COREML = " + std::to_string(whisper_has_coreml()) + " | ";
s += "OPENVINO = " + std::to_string(whisper_has_openvino()) + " | ";