diff --git a/.github/actions/cleanup-processes-linux/action.yml b/.github/actions/cleanup-processes-linux/action.yml new file mode 100644 index 00000000000..58649fcc41b --- /dev/null +++ b/.github/actions/cleanup-processes-linux/action.yml @@ -0,0 +1,19 @@ +name: 'Cleanup GPU Processes (Linux)' +description: 'Kill zombie whisper/GPU processes on self-hosted Linux runners' + +runs: + using: 'composite' + steps: + - name: Kill zombie processes + shell: bash + run: | + echo "=== Cleaning up stale processes ===" + pkill -f "whisper-cli" 2>/dev/null || true + pkill -f "whisper-bench" 2>/dev/null || true + pkill -f "whisper-server" 2>/dev/null || true + pkill -f "ctest.*whisper" 2>/dev/null || true + if command -v rocm-smi &>/dev/null; then + echo "=== GPU process check ===" + rocm-smi --showpids 2>/dev/null || true + fi + echo "=== Cleanup complete ===" diff --git a/.github/actions/cleanup-processes-windows/action.yml b/.github/actions/cleanup-processes-windows/action.yml new file mode 100644 index 00000000000..91a9424dd22 --- /dev/null +++ b/.github/actions/cleanup-processes-windows/action.yml @@ -0,0 +1,15 @@ +name: 'Cleanup GPU Processes (Windows)' +description: 'Kill zombie whisper/GPU processes on self-hosted Windows runners' + +runs: + using: 'composite' + steps: + - name: Kill zombie processes + shell: pwsh + run: | + Write-Host "=== Cleaning up stale processes ===" + $processNames = @("whisper-cli", "whisper-bench", "whisper-server", "ctest") + foreach ($name in $processNames) { + Get-Process -Name $name -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue + } + Write-Host "=== Cleanup complete ===" diff --git a/.github/workflows/bindings-go.yml b/.github/workflows/bindings-go.yml deleted file mode 100644 index 83473e4636a..00000000000 --- a/.github/workflows/bindings-go.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: Bindings Tests (Go) -on: - push: - paths: - - bindings/go/** - - whisper.h - pull_request: - paths: - - bindings/go/** - - whisper.h - -jobs: - ubuntu-22: - runs-on: ubuntu-22.04 - steps: - - uses: actions/setup-go@v6 - with: - go-version: '^1.23' - - uses: actions/checkout@v6 - - run: | - cd bindings/go - make test diff --git a/.github/workflows/bindings-ruby.yml b/.github/workflows/bindings-ruby.yml deleted file mode 100644 index c3f158e26e4..00000000000 --- a/.github/workflows/bindings-ruby.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: Bindings Tests (Ruby) - -on: - push: - branches: - - master - pull_request: - types: [opened, synchronize, reopened] - -jobs: - ubuntu-22: - runs-on: ubuntu-22.04 - defaults: - run: - working-directory: bindings/ruby - steps: - - uses: ruby/setup-ruby@v1 - with: - ruby-version: '3.2' - - uses: actions/checkout@v6 - - run: rake test diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fb115b22abb..5c4710b1663 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,77 +1,83 @@ -name: CI +name: AMD Build & Release +# ────────────────────────────────────────────────────────────────────────────── +# Triggers +# ────────────────────────────────────────────────────────────────────────────── on: - push: - branches: - - master - tags: - - 'v*' - paths: ['.github/workflows/build.yml', - '**/CMakeLists.txt', - '**/Makefile', - '**/*.mk', - '**/*.cmake', - '**/*.in', - '**/*.h', - '**/*.hpp', - '**/*.c', - '**/*.cpp', - '**/*.cu', - '**/*.cuh', - '**/*.cl', - '**/*.swift', - '**/*.m', - '**/*.mm', - '**/*.metal', - '**/*.comp', - '**/*.java'] - - pull_request: - types: [opened, synchronize, reopened] + schedule: + - cron: '0 2 * * 1' # Weekly – every Monday at 02:00 UTC workflow_dispatch: inputs: create_release: - description: 'Create new release' + description: 'Create GitHub Release' required: true type: boolean + default: false pre_release_tag: - description: 'Pre-release tag name' + description: 'Pre-release tag name (optional, overrides auto-tag)' required: false type: string run_type: - description: 'Workflow type to run' + description: 'Workflow scope' required: true type: choice options: - - full-ci - - release-only + - full-ci # all jobs + - release-only # release-producing jobs only + default: full-ci + gfx_targets: + description: 'ROCm GPU targets (comma-separated)' + required: false + type: string + default: 'gfx1151,gfx1150,gfx120X,gfx110X' + rocm_version: + description: 'ROCm version (e.g. 7.12.0)' + required: false + type: string + default: '7.12.0' + push: + tags: + - 'v*' + pull_request: + branches: + - master + - main + types: [opened, synchronize, reopened] concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} cancel-in-progress: true permissions: - contents: write # for creating release + contents: write +# ────────────────────────────────────────────────────────────────────────────── +# Shared environment +# ────────────────────────────────────────────────────────────────────────────── env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} - ubuntu_image: "ubuntu:22.04" + GFX_TARGETS: ${{ github.event.inputs.gfx_targets || 'gfx1151,gfx1150,gfx120X,gfx110X' }} + ROCM_VERSION: ${{ github.event.inputs.rocm_version || '7.12.0' }} + FLEXML_URL: "https://github.com/lemonade-sdk/whisper.cpp/releases/download/deps/flexmlrt1.7.0-win.zip" VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite" jobs: + +# ════════════════════════════════════════════════════════════════════════════════ +# 0. Determine release tag +# ════════════════════════════════════════════════════════════════════════════════ determine-tag: runs-on: ubuntu-latest outputs: - tag_name: ${{ steps.tag.outputs.name }} + tag_name: ${{ steps.tag.outputs.name }} + version: ${{ steps.tag.outputs.version }} should_release: ${{ steps.tag.outputs.should_release }} - steps: - - name: Checkout with full history - uses: actions/checkout@v6 + - uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Determine tag name + - name: Determine tag and version id: tag shell: bash run: | @@ -80,1481 +86,1255 @@ jobs: CUSTOM_TAG="${{ github.event.inputs.pre_release_tag }}" SHOULD_RELEASE="false" - echo "Raw values:" - echo "BUILD_NUMBER: $BUILD_NUMBER" - echo "SHORT_HASH: $SHORT_HASH" - echo "BRANCH_NAME: ${{ env.BRANCH_NAME }}" - echo "CUSTOM_TAG: $CUSTOM_TAG" - if [[ "${{ github.ref_type }}" == "tag" ]]; then - echo "Using pushed tag name" + # Triggered by sync.yml pushing a vX.Y.Z tag — this is the primary release path TAG_NAME="${{ github.ref_name }}" SHOULD_RELEASE="true" elif [[ -n "$CUSTOM_TAG" ]]; then - echo "Using custom tag" - TAG_NAME="${CUSTOM_TAG}" + TAG_NAME="$CUSTOM_TAG" SHOULD_RELEASE="true" elif [[ "${{ github.event.inputs.create_release }}" == "true" ]]; then - echo "Manual release requested" - SHOULD_RELEASE="true" TAG_NAME="b${BUILD_NUMBER}" - elif [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then - echo "Using master branch format" + SHOULD_RELEASE="true" + elif [[ "${{ env.BRANCH_NAME }}" == "main" || "${{ env.BRANCH_NAME }}" == "master" ]]; then TAG_NAME="b${BUILD_NUMBER}" SHOULD_RELEASE="false" else - echo "Using non-master branch format" - SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') - TAG_NAME="${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" + SAFE=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + TAG_NAME="${SAFE}-b${BUILD_NUMBER}-${SHORT_HASH}" SHOULD_RELEASE="false" fi - echo "Final tag name: $TAG_NAME" - echo "Should release: $SHOULD_RELEASE" - echo "name=$TAG_NAME" >> $GITHUB_OUTPUT - echo "should_release=$SHOULD_RELEASE" >> $GITHUB_OUTPUT - - - ubuntu-22: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - - strategy: - fail-fast: false - matrix: - arch: [linux/amd64, linux/ppc64le] - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Build ${{ matrix.arch }} - run: | - docker run --platform ${{ matrix.arch }} --rm \ - -v ${{ github.workspace }}:/workspace \ - -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' - set -e - export DEBIAN_FRONTEND=noninteractive - sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - - apt update - apt install -y build-essential libsdl2-dev cmake git - cmake -B build - cmake --build build --config Release -j $(nproc)' - - ubuntu-22-arm64: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - - strategy: - fail-fast: false - matrix: - arch: [linux/arm64] - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Build ${{ matrix.arch }} - run: | - docker run --platform ${{ matrix.arch }} --rm \ - -v ${{ github.workspace }}:/workspace \ - -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' - set -e - export DEBIAN_FRONTEND=noninteractive - sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - - apt update - apt install -y build-essential libsdl2-dev cmake git - cmake -B build -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8-a - cmake --build build --config Release -j $(nproc)' - - ubuntu-22-arm-v7: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - - strategy: - fail-fast: false - matrix: - arch: [linux/arm/v7] - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 + # Version used in artifact filenames — keep leading 'v' to match lemonade expectations + # e.g. v1.8.4 → v1.8.4, b1234 → b1234 + VERSION="${TAG_NAME}" - - name: Build ${{ matrix.arch }} - run: | - docker run --platform ${{ matrix.arch }} --rm \ - -v ${{ github.workspace }}:/workspace \ - -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' - set -e - export DEBIAN_FRONTEND=noninteractive - sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - - apt update - apt install -y build-essential libsdl2-dev cmake git - cmake -B build -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv7-a+fp - cmake --build build --config Release -j $(nproc)' - - macOS-latest: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: macOS-latest - - strategy: - matrix: - destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS'] + echo "name=$TAG_NAME" >> $GITHUB_OUTPUT + echo "version=$VERSION" >> $GITHUB_OUTPUT + echo "should_release=$SHOULD_RELEASE" >> $GITHUB_OUTPUT +# ════════════════════════════════════════════════════════════════════════════════ +# 1. ROCm matrix (Linux + Windows per GFX target) +# ════════════════════════════════════════════════════════════════════════════════ + prepare-rocm-matrix: + runs-on: ubuntu-latest + outputs: + ubuntu_matrix: ${{ steps.m.outputs.ubuntu_matrix }} + windows_matrix: ${{ steps.m.outputs.windows_matrix }} steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: macOS-latest-swift - evict-old-files: 1d - - - name: Dependencies + - name: Build matrix JSON + id: m run: | - brew update - cmake --version - brew install sdl2 + targets="${{ env.GFX_TARGETS }}" + arr=$(echo "$targets" | tr ',' '\n' | sed 's/^ *//;s/ *$//' | jq -R . | jq -s .) - - name: Build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DWHISPER_BUILD_EXAMPLES=OFF \ - -DWHISPER_BUILD_TESTS=OFF \ - -DWHISPER_BUILD_SERVER=OFF \ - -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) - - -# freeBSD-latest: -# runs-on: macos-13 -# -# steps: -# - name: Clone -# uses: actions/checkout@v6 -# -# - name: Build -# uses: cross-platform-actions/action@v0.27.0 -# with: -# operating_system: freebsd -# version: '14.2' -# run: | -# sudo pkg update -# sudo pkg install -y gmake sdl2 cmake git -# cmake -B build -# cmake --build build --config Release - - ubuntu-22-gcc: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 + ubuntu_matrix=$(echo "$arr" | jq -c \ + '{gfx_target: ., build: ["Release"], sdl2: ["ON"], arch: ["linux/amd64"]}') - strategy: - fail-fast: false - matrix: - build: [Debug, Release] - arch: [linux/amd64, linux/ppc64le] + windows_matrix=$(echo "$arr" | jq -c \ + '{gfx_target: ., build: ["Release"], sdl2: ["ON"], arch: ["x64"], s2arc: ["x64"], s2ver: ["2.28.5"]}') - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Build ${{ matrix.arch }} - run: | - docker run --platform ${{ matrix.arch }} --rm \ - -v ${{ github.workspace }}:/workspace \ - -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' - set -e - export DEBIAN_FRONTEND=noninteractive - sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - - apt update - apt install -y build-essential cmake libsdl2-dev git - cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} - make - ctest -L gh --output-on-failure' - - ubuntu-22-gcc-arm64: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 + echo "ubuntu_matrix=$ubuntu_matrix" >> $GITHUB_OUTPUT + echo "windows_matrix=$windows_matrix" >> $GITHUB_OUTPUT - strategy: - fail-fast: false - matrix: - build: [Debug, Release] - arch: [linux/arm64] - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Build ${{ matrix.arch }} - run: | - docker run --platform ${{ matrix.arch }} --rm \ - -v ${{ github.workspace }}:/workspace \ - -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' - set -e - export DEBIAN_FRONTEND=noninteractive - sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - - apt update - apt install -y build-essential cmake libsdl2-dev git - cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8-a - make - ctest -L gh --output-on-failure' - - ubuntu-22-gcc-arm-v7: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} +# ════════════════════════════════════════════════════════════════════════════════ +# 2. ROCm — Linux +# ════════════════════════════════════════════════════════════════════════════════ + linux-rocm: runs-on: ubuntu-22.04 - - strategy: - fail-fast: false - matrix: - build: [Debug, Release] - arch: [linux/arm/v7] - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Build ${{ matrix.arch }} - run: | - docker run --platform ${{ matrix.arch }} --rm \ - -v ${{ github.workspace }}:/workspace \ - -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' - set -e - export DEBIAN_FRONTEND=noninteractive - sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - - apt update - apt install -y build-essential cmake libsdl2-dev git - cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv7-a+fp - make - ctest -L gh --output-on-failure' - - ubuntu-22-clang: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - - strategy: - fail-fast: false - matrix: - build: [Debug, Release] - #arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le] - # TODO: arm/v7 disabled due to clang bug - # https://github.com/ggerganov/whisper.cpp/actions/runs/9657764109/job/26637633042?pr=2256#step:4:1990 - arch: [linux/amd64, linux/arm64, linux/ppc64le] - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Build ${{ matrix.arch }} - run: | - docker run --platform ${{ matrix.arch }} --rm \ - -v ${{ github.workspace }}:/workspace \ - -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' - set -e - export DEBIAN_FRONTEND=noninteractive - sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - - apt update - apt install -y clang build-essential cmake libsdl2-dev git - cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang - make - ctest -L gh --output-on-failure' - - ubuntu-22-gcc-sanitized: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - + needs: [determine-tag, prepare-rocm-matrix] strategy: + matrix: ${{ fromJson(needs.prepare-rocm-matrix.outputs.ubuntu_matrix) }} fail-fast: false - matrix: - sanitizer: [ADDRESS, THREAD, UNDEFINED] - arch: [linux/amd64] steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Build ${{ matrix.arch }} - run: | - docker run --platform ${{ matrix.arch }} --rm \ - -v ${{ github.workspace }}:/workspace \ - -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' - set -e - export DEBIAN_FRONTEND=noninteractive - sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - - apt update - apt install -y build-essential cmake git - cmake . -DCMAKE_BUILD_TYPE=Debug \ - -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DGGML_OPENMP=OFF - make - ctest -L gh --output-on-failure' - - ubuntu-22-cmake-sycl: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - - strategy: - fail-fast: false - matrix: - dwhisper_sycl: [ON] - dcmake_c_compiler: [icx] - dcmake_cxx_compiler: [icpx] - arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le] - - continue-on-error: true - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: add oneAPI to apt - shell: bash - run: | - cd /tmp - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" - - - name: install oneAPI dpcpp compiler - shell: bash + - name: Free disk space run: | - sudo apt update - sudo apt install intel-oneapi-compiler-dpcpp-cpp git + sudo rm -rf /usr/local/lib/android /opt/ghc /usr/local/share/boost \ + /usr/share/dotnet /usr/local/.ghcup /opt/hostedtoolcache/CodeQL + sudo docker image prune --all --force 2>/dev/null || true - - name: install oneAPI MKL library - shell: bash - run: | - sudo apt install intel-oneapi-mkl-devel git + - uses: actions/checkout@v4 - - name: Clone - id: checkout - uses: actions/checkout@v6 + - name: Install dependencies + run: sudo apt-get update && sudo apt-get install -y cmake ninja-build curl build-essential libsdl2-dev git patchelf - - name: Build - id: cmake_build + - name: Download ROCm tarball run: | - source /opt/intel/oneapi/setvars.sh - mkdir build - cd build - cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx .. - cmake --build . --config Release -j $(nproc) - - ubuntu-22-cmake-sycl-fp16: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - - strategy: - fail-fast: false - matrix: - dwhisper_sycl: [ON] - dcmake_c_compiler: [icx] - dcmake_cxx_compiler: [icpx] - arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le] + source ci/resolve-rocm-version.sh linux "${{ matrix.gfx_target }}" "${{ env.ROCM_VERSION }}" + echo "DETECTED_ROCM_VERSION=$ROCM_RESOLVED_VERSION" >> $GITHUB_ENV + sudo mkdir -p /opt/rocm + curl -L "$ROCM_TARBALL_URL" | sudo tar --use-compress-program=gzip -xf - -C /opt/rocm --strip-components=1 - continue-on-error: true - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: add oneAPI to apt - shell: bash + - name: Set ROCm env run: | - cd /tmp - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" + echo "HIP_PATH=/opt/rocm" >> $GITHUB_ENV + echo "ROCM_PATH=/opt/rocm" >> $GITHUB_ENV + echo "HIP_PLATFORM=amd" >> $GITHUB_ENV + echo "/opt/rocm/bin:/opt/rocm/llvm/bin:$PATH" >> $GITHUB_PATH - - name: install oneAPI dpcpp compiler - shell: bash + - name: Find bitcode path run: | - sudo apt update - sudo apt install intel-oneapi-compiler-dpcpp-cpp git + BITCODE_PATH=$(find /opt/rocm -type d -name bitcode -print -quit) + [ -z "$BITCODE_PATH" ] && { echo "::error::bitcode dir not found"; exit 1; } + echo "ROCM_BITCODE_PATH=$BITCODE_PATH" >> $GITHUB_ENV - - name: install oneAPI MKL library - shell: bash + - name: Configure CMake run: | - sudo apt install intel-oneapi-mkl-devel - - - name: Clone - id: checkout - uses: actions/checkout@v6 + source ci/map-gpu-target.sh "${{ matrix.gfx_target }}" + cmake -S . -B build -G Ninja \ + -DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \ + -DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \ + -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm --rocm-device-lib-path=${{ env.ROCM_BITCODE_PATH }}" \ + -DCMAKE_PREFIX_PATH=/opt/rocm \ + -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ + -DGPU_TARGETS="$MAPPED_GPU_TARGET" \ + -DGGML_HIP=ON \ + -DWHISPER_BUILD_SERVER=ON \ + -DWHISPER_SDL2=${{ matrix.sdl2 }} - name: Build - id: cmake_build run: | - source /opt/intel/oneapi/setvars.sh - mkdir build - cd build - cmake -DGGML_SYCL_F16=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx .. - cmake --build . --config Release -j $(nproc) + cmake --build build --config ${{ matrix.build }} -j$(nproc) > build.log 2>&1 + exit_code=$? + grep -E "error:|FAILED|Linking|Built target|warning:" build.log || true + if [ $exit_code -ne 0 ]; then + echo "--- Last 100 lines of build log ---" + tail -100 build.log + echo "Build failed with exit code $exit_code" + exit $exit_code + fi + echo "Build succeeded." - windows-msys2: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: windows-latest + - name: Verify build output + run: | + if [ ! -f build/bin/whisper-cli ]; then + echo "::error::whisper-cli not found - build likely truncated" + ls -lh build/bin/ 2>/dev/null || true + exit 1 + fi + if [ ! -f build/bin/whisper-server ]; then + echo "::error::whisper-server not found - build likely truncated" + ls -lh build/bin/ 2>/dev/null || true + exit 1 + fi + echo "Build output:"; ls -lh build/bin/whisper-cli build/bin/whisper-server + + - name: Inspect shared library dependencies + run: | + echo "--- ldd whisper-cli ---" + ldd build/bin/whisper-cli || true + echo "--- ldd whisper-server ---" + ldd build/bin/whisper-server || true + echo "--- missing libs ---" + MISSING_CLI=$(ldd build/bin/whisper-cli 2>/dev/null | grep "not found" || true) + MISSING_SRV=$(ldd build/bin/whisper-server 2>/dev/null | grep "not found" || true) + if [ -z "$MISSING_CLI" ] && [ -z "$MISSING_SRV" ]; then + echo "All dependencies resolved." + else + [ -n "$MISSING_CLI" ] && echo "whisper-cli missing:" && echo "$MISSING_CLI" + [ -n "$MISSING_SRV" ] && echo "whisper-server missing:" && echo "$MISSING_SRV" + fi + echo "--- .so* files in build/bin/ ---" + ls -lh build/bin/*.so* 2>/dev/null || echo "(none)" + + - name: Copy ROCm runtime libs + run: | + BIN="build/bin" + mkdir -p "$BIN" + + cp -a build/src/libwhisper.so* "$BIN"/ 2>/dev/null || true + cp -a build/ggml/src/libggml.so* "$BIN"/ 2>/dev/null || true + cp -a build/ggml/src/libggml-base.so* "$BIN"/ 2>/dev/null || true + cp -a build/ggml/src/libggml-cpu.so* "$BIN"/ 2>/dev/null || true + cp -a build/ggml/src/ggml-hip/libggml-hip.so* "$BIN"/ 2>/dev/null || true + export LD_LIBRARY_PATH="$PWD/$BIN:/opt/rocm/lib:/opt/rocm/lib/llvm/lib:/opt/rocm/lib/rocm_sysdeps/lib:${LD_LIBRARY_PATH:-}" + + ROCM_ROOTS="/opt/rocm/lib /opt/rocm/lib/llvm/lib /opt/rocm/lib/rocm_sysdeps/lib" + + for binary in "$BIN"/*; do + [ -f "$binary" ] && [ -x "$binary" ] && file "$binary" | grep -q ELF || continue + ldd "$binary" 2>/dev/null | awk '{print $3}' | grep -E "^/opt/rocm" | while read lib; do + [ -f "$lib" ] || continue + cp -n "$lib" "$BIN/" 2>/dev/null || true + # Also copy the soname symlink if it exists alongside the real file + dir=$(dirname "$lib") + base=$(basename "$lib") + find "$dir" -maxdepth 1 -name "${base%%.*}.so*" -exec cp -Pn {} "$BIN/" \; 2>/dev/null || true + done + done + + # Transitive deps: repeat ldd over any newly copied ROCm .so to catch indirect deps + for pass in 1 2; do + for lib in "$BIN"/lib*.so*; do + [ -f "$lib" ] && [ ! -L "$lib" ] || continue + ldd "$lib" 2>/dev/null | awk '{print $3}' | grep -E "^/opt/rocm" | while read dep; do + [ -f "$dep" ] || continue + cp -n "$dep" "$BIN/" 2>/dev/null || true + done + done + done + + # Kernel library data dirs (loaded at runtime by path, not via soname) + [ -d /opt/rocm/lib/rocblas/library ] && { mkdir -p "$BIN/rocblas"; cp -r /opt/rocm/lib/rocblas/library "$BIN/rocblas/"; } + [ -d /opt/rocm/lib/hipblaslt/library ] && { mkdir -p "$BIN/hipblaslt"; cp -r /opt/rocm/lib/hipblaslt/library "$BIN/hipblaslt/"; } + + - name: Set portable RPATH + run: | + cd build/bin + for f in *.so* whisper-*; do + [ -f "$f" ] && [ ! -L "$f" ] && file "$f" | grep -q ELF && patchelf --set-rpath '$ORIGIN' "$f" 2>/dev/null || true + done + + - name: Package + run: | + VER="${{ needs.determine-tag.outputs.version }}" + ARCHIVE="whisper-${VER}-linux-rocm-${{ matrix.gfx_target }}.tar.gz" + STAGE="whisper-${VER}-linux-rocm-${{ matrix.gfx_target }}" + mkdir -p "$STAGE" && cp -r build/bin/* "$STAGE/" + tar -czf "$ARCHIVE" "$STAGE" + echo "ARCHIVE=$ARCHIVE" >> $GITHUB_ENV + + - uses: actions/upload-artifact@v4 + with: + name: ${{ env.ARCHIVE }} + path: ${{ env.ARCHIVE }} +# ════════════════════════════════════════════════════════════════════════════════ +# 3. ROCm — Windows +# ════════════════════════════════════════════════════════════════════════════════ + windows-rocm: + runs-on: windows-2022 + needs: [determine-tag, prepare-rocm-matrix] strategy: + matrix: ${{ fromJson(needs.prepare-rocm-matrix.outputs.windows_matrix) }} fail-fast: false - matrix: - include: - - { sys: UCRT64, env: ucrt-x86_64, build: Release } - - { sys: CLANG64, env: clang-x86_64, build: Release } steps: - - name: Clone - uses: actions/checkout@v6 + - uses: actions/checkout@v4 - - name: Setup ${{ matrix.sys }} - uses: msys2/setup-msys2@v2 - with: - update: true - msystem: ${{matrix.sys}} - install: >- - base-devel - git - mingw-w64-${{matrix.env}}-toolchain - mingw-w64-${{matrix.env}}-cmake - mingw-w64-${{matrix.env}}-SDL2 - mingw-w64-${{matrix.env}}-openblas + - name: Install Ninja + shell: powershell + run: choco install ninja -y - - name: Build using CMake - shell: msys2 {0} + - name: Fetch SDL2 and patch header + if: matrix.sdl2 == 'ON' + shell: powershell run: | - cmake -B build -DWHISPER_SDL2=ON - cmake --build build --config ${{ matrix.build }} -j $(nproc) + $url = "https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip" + Invoke-WebRequest -Uri $url -OutFile sdl2.zip + 7z x sdl2.zip + $cmake = Get-ChildItem -Recurse -Filter "sdl2-config.cmake" | Select-Object -First 1 + if ($cmake) { echo "SDL2_DIR=$($cmake.DirectoryName)" >> $env:GITHUB_ENV } + else { Write-Error "sdl2-config.cmake not found"; exit 1 } + $hdr = Get-ChildItem -Recurse -Filter "SDL_endian.h" | Select-Object -First 1 + if ($hdr) { + $c = Get-Content $hdr.FullName -Raw + if ($c -match 'extern void _m_prefetch') { + $c = $c -replace 'extern void _m_prefetch\(void \*__P\);','// extern void _m_prefetch(void *__P);' + Set-Content $hdr.FullName $c + } + } else { Write-Error "SDL_endian.h not found"; exit 1 } + + - name: Download and extract ROCm tarball + shell: powershell + run: | + $gfx = "${{ matrix.gfx_target }}" + $ver = "${{ env.ROCM_VERSION }}" + $base = switch ($gfx) { + "gfx110X" { "gfx110X-all" } + "gfx120X" { "gfx120X-all" } + "gfx1150" { "gfx1150" } + "gfx1151" { "gfx1151" } + "gfx1152" { "gfx1152" } + default { $gfx } + } + $url = "https://repo.amd.com/rocm/tarball/therock-dist-windows-${base}-${ver}.tar.gz" + Write-Host "ROCm URL: $url" + "DETECTED_ROCM_VERSION=$ver" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 + curl.exe -L --retry 3 --retry-delay 5 -o rocm.tar.gz $url + if ($LASTEXITCODE -ne 0) { Write-Error "curl failed with exit code $LASTEXITCODE"; exit 1 } + New-Item -ItemType Directory -Force -Path "C:\opt\rocm" | Out-Null + tar -xzf rocm.tar.gz -C C:\opt\rocm --strip-components=1 + + - name: Map GPU target + id: gpu + shell: powershell + run: | + $gfx = "${{ matrix.gfx_target }}" + $mapped = switch ($gfx) { + "gfx110X" { "gfx1100;gfx1101;gfx1102" } + "gfx120X" { "gfx1200;gfx1201" } + default { $gfx } + } + Write-Host "Mapped GPU target: $gfx -> $mapped" + "mapped=$mapped" | Out-File $env:GITHUB_OUTPUT -Append -Encoding utf8 + + - name: Configure CMake + shell: powershell + run: | + $env:HIP_PATH = "C:\opt\rocm" + $env:HIP_PLATFORM = "amd" + $env:PATH = "$env:HIP_PATH\bin;$env:HIP_PATH\lib\llvm\bin;$env:PATH" + cmake -S . -B build ` + -G "Ninja Multi-Config" ` + -DGPU_TARGETS="${{ steps.gpu.outputs.mapped }}" ` + -DGGML_HIP=ON ` + -DCMAKE_C_COMPILER="$env:HIP_PATH/lib/llvm/bin/amdclang.exe" ` + -DCMAKE_CXX_COMPILER="$env:HIP_PATH/lib/llvm/bin/amdclang++.exe" ` + -DCMAKE_HIP_COMPILER="$env:HIP_PATH/lib/llvm/bin/amdclang++.exe" ` + "-DCMAKE_C_FLAGS='-D__PRFCHWINTRIN_H'" ` + "-DCMAKE_CXX_FLAGS='-D__PRFCHWINTRIN_H'" ` + "-DCMAKE_HIP_FLAGS=--rocm-path=C:/opt/rocm" ` + -DCMAKE_PREFIX_PATH="$env:HIP_PATH" ` + -DCMAKE_BUILD_TYPE=${{ matrix.build }} ` + -DWHISPER_BUILD_SERVER=ON ` + -DWHISPER_SDL2=${{ matrix.sdl2 }} - - name: Clean after building using CMake - shell: msys2 {0} - run: | - rm -rf build + - name: Build + shell: powershell + run: | + cmake --build build --config ${{ matrix.build }} -j $env:NUMBER_OF_PROCESSORS > build.log 2>&1 + $exit = $LASTEXITCODE + # Show only errors and link steps - keeps log under GitHub's line limit + Get-Content build.log | Select-String -Pattern "error:|FAILED|Linking|Built target|warning: " | Write-Host + if ($exit -ne 0) { + Write-Host "--- Last 100 lines of build log ---" + Get-Content build.log -Tail 100 + Write-Error "Build failed with exit code $exit" + exit $exit + } + Write-Host "Build succeeded." + + - name: Copy ROCm DLLs + shell: powershell + run: | + $bin = "build/bin/${{ matrix.build }}" + $rocBin = "C:\opt\rocm\bin" + @("amdhip64_*.dll","amd_comgr*.dll","libhipblas.dll","rocblas.dll", + "rocsolver.dll","hipblaslt.dll","libhipblaslt.dll","hipblas.dll") | ForEach-Object { + Get-ChildItem $rocBin -Name $_ -ErrorAction SilentlyContinue | + ForEach-Object { Copy-Item (Join-Path $rocBin $_) (Join-Path $bin $_) } + } + $rocLib = Join-Path $rocBin "rocblas\library" + if (Test-Path $rocLib) { Copy-Item $rocLib -Destination (Join-Path $bin "rocblas\library") -Recurse -Force } + $hipLib = Join-Path $rocBin "hipblaslt\library" + if (Test-Path $hipLib) { Copy-Item $hipLib -Destination (Join-Path $bin "hipblaslt\library") -Recurse -Force } - - name: Build using CMake w/ OpenBLAS - shell: msys2 {0} - run: | - cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS - cmake --build build --config ${{ matrix.build }} -j $(nproc) + - name: Copy SDL2.dll + if: matrix.sdl2 == 'ON' + shell: powershell + run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" "build/bin/${{ matrix.build }}" + + - name: Verify build output + shell: powershell + run: | + $bin = "build/bin/${{ matrix.build }}" + if (-not (Test-Path "$bin/whisper-cli.exe")) { + Write-Error "whisper-cli.exe not found in $bin - build likely truncated" + Get-ChildItem $bin -ErrorAction SilentlyContinue | Format-Table Name, Length + exit 1 + } + if (-not (Test-Path "$bin/whisper-server.exe")) { + Write-Error "whisper-server.exe not found in $bin - build likely truncated" + Get-ChildItem $bin -ErrorAction SilentlyContinue | Format-Table Name, Length + exit 1 + } + Write-Host "Build output:" + Get-ChildItem $bin -Filter "*.exe" | Format-Table Name, Length + + - name: Package + shell: powershell + run: | + $a = "whisper-${{ needs.determine-tag.outputs.version }}-windows-rocm-${{ matrix.gfx_target }}.zip" + Compress-Archive -Path "build/bin/${{ matrix.build }}/*" -DestinationPath $a -Force + "ARCHIVE=$a" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 + + - uses: actions/upload-artifact@v4 + with: + name: ${{ env.ARCHIVE }} + path: ${{ env.ARCHIVE }} - windows: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: windows-latest +# ════════════════════════════════════════════════════════════════════════════════ +# 4. Vulkan — Linux +# ════════════════════════════════════════════════════════════════════════════════ + linux-vulkan: + runs-on: ubuntu-latest needs: determine-tag - strategy: - matrix: - build: [Release] - arch: [Win32, x64] - sdl2: [ON] - include: - - arch: Win32 - s2arc: x86 - jnaPath: win32-x86 - - arch: x64 - s2arc: x64 - jnaPath: win32-x86-64 - - sdl2: ON - s2ver: 2.28.5 - steps: - - name: Clone - uses: actions/checkout@v6 + - uses: actions/checkout@v4 - - name: Add msbuild to PATH - uses: microsoft/setup-msbuild@v2 - - - name: Fetch SDL2 and set SDL2_DIR - if: matrix.sdl2 == 'ON' + - name: Install dependencies run: | - C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip - 7z x sdl2.zip - echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV - - - name: Configure - run: > - cmake -S . -B ./build -A ${{ matrix.arch }} - -DCMAKE_BUILD_TYPE=${{ matrix.build }} - -DBUILD_SHARED_LIBS=ON - -DWHISPER_SDL2=${{ matrix.sdl2 }} + sudo apt-get update + sudo apt-get install -y build-essential cmake git libsdl2-dev pkg-config libvulkan-dev vulkan-tools + sudo apt-get install -y glslc || sudo apt-get install -y shaderc - - name: Build + - name: Check Vulkan availability run: | - cd ./build - msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }} - - - name: Copy SDL2.dll - if: matrix.sdl2 == 'ON' - run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }} - - - name: Upload SDL2.dll - if: matrix.sdl2 == 'ON' - uses: actions/upload-artifact@v6 - with: - name: ${{ matrix.s2arc }}_SDL2.dll - path: build/bin/${{ matrix.build }}/SDL2.dll - - - name: Upload whisper dll - uses: actions/upload-artifact@v6 - with: - name: whisper_${{ matrix.arch }}.dll - path: build/bin/${{ matrix.build }}/whisper.dll + if ! command -v glslc >/dev/null 2>&1 && ! command -v glslangValidator >/dev/null 2>&1; then + echo "::error::No GLSL compiler found (glslc / shaderc)"; exit 1 + fi - - name: Upload ggml dll - uses: actions/upload-artifact@v6 - with: - name: ggml_${{ matrix.arch }}.dll - path: build/bin/${{ matrix.build }}/ggml.dll + - name: Configure CMake + run: | + cmake -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_NATIVE=OFF \ + -DGGML_AVX=ON \ + -DGGML_AVX2=ON \ + -DGGML_FMA=ON \ + -DGGML_AVX512=OFF \ + -DCMAKE_C_FLAGS="-march=x86-64-v3" \ + -DCMAKE_CXX_FLAGS="-march=x86-64-v3" \ + -DGGML_VULKAN=ON \ + -DWHISPER_BUILD_EXAMPLES=ON \ + -DWHISPER_BUILD_TESTS=OFF \ + -DWHISPER_BUILD_SERVER=ON - - name: Upload ggml base dll - uses: actions/upload-artifact@v6 - with: - name: ggml_base_${{ matrix.arch }}.dll - path: build/bin/${{ matrix.build }}/ggml-base.dll + - name: Build + run: cmake --build build --config Release -j$(nproc) - - name: Upload ggml cpu dll - uses: actions/upload-artifact@v6 - with: - name: ggml_cpu_${{ matrix.arch }}.dll - path: build/bin/${{ matrix.build }}/ggml-cpu.dll + - name: Validate Vulkan artifacts + run: | + VFILES=$(find build -type f \( -iname "*vulkan*.so*" -o -iname "*vulkan*" \) 2>/dev/null | wc -l) + if [ "$VFILES" -eq 0 ]; then + echo "::warning::No Vulkan-related artifacts found" + else + echo "Vulkan artifacts found: $VFILES file(s)" + fi - - name: Pack bin artifacts - shell: pwsh + - name: Package run: | - Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-bin-${{ matrix.arch }}.zip" + VER="${{ needs.determine-tag.outputs.version }}" + ARCHIVE="whisper-${VER}-linux-vulkan-x86_64.tar.gz" + STAGE="whisper-${VER}-linux-vulkan-x86_64" + mkdir -p "$STAGE" + cp -r build/bin/* "$STAGE/" 2>/dev/null || true + find build -name "*.so*" -exec cp {} "$STAGE/" \; 2>/dev/null || true + tar -czf "$ARCHIVE" "$STAGE" + echo "ARCHIVE=$ARCHIVE" >> $GITHUB_ENV - - name: Upload binaries - if: matrix.sdl2 == 'ON' && ${{ needs.determine-tag.outputs.should_release }} - uses: actions/upload-artifact@v6 + - uses: actions/upload-artifact@v4 with: - name: whisper-bin-${{ matrix.arch }}.zip - path: whisper-bin-${{ matrix.arch }}.zip + name: ${{ env.ARCHIVE }} + path: ${{ env.ARCHIVE }} - windows-blas: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} +# ════════════════════════════════════════════════════════════════════════════════ +# 5. Vulkan — Windows +# ════════════════════════════════════════════════════════════════════════════════ + windows-vulkan: runs-on: windows-latest - - strategy: - matrix: - build: [Release] - arch: [Win32, x64] - blas: [ON] - sdl2: [ON] - blasver: [0.3.29] - include: - - arch: Win32 - s2arc: x86 - blasfile: x86 - - arch: x64 - s2arc: x64 - blasfile: x64_64 - - sdl2: ON - s2ver: 2.28.5 + needs: determine-tag steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Export GitHub Actions cache environment variables - uses: actions/github-script@v8 - with: - script: | - core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); - core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); + - uses: actions/checkout@v4 - - name: Add msbuild to PATH - uses: microsoft/setup-msbuild@v2 + - uses: microsoft/setup-msbuild@v2 - - name: Install OpenBLAS and pkgconfiglite - if: matrix.blas == 'ON' + - name: Install Vulkan SDK + shell: pwsh run: | - Invoke-WebRequest "https://github.com/OpenMathLib/OpenBLAS/releases/download/v${{matrix.blasver}}/OpenBLAS-${{matrix.blasver}}_${{matrix.blasfile}}.zip" -OutFile "OpenBLAS-${{matrix.blasver}}.zip" - Expand-Archive "OpenBLAS-${{matrix.blasver}}.zip" -DestinationPath "OpenBLAS-${{matrix.blasver}}" - choco install pkgconfiglite + winget install --id KhronosGroup.VulkanSDK -e --silent --accept-package-agreements --accept-source-agreements + $sdk = Get-ChildItem "C:\VulkanSDK" -ErrorAction SilentlyContinue | Select-Object -First 1 + if (-not $sdk) { throw "Vulkan SDK not found under C:\VulkanSDK" } + "VULKAN_SDK=$($sdk.FullName)" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 - - name: Fetch SDL2 and set SDL2_DIR - if: matrix.sdl2 == 'ON' + - name: Fetch SDL2 + shell: pwsh run: | - C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip + C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-2.28.5/SDL2-devel-2.28.5-VC.zip 7z x sdl2.zip - echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV - - - name: Configure - run: > - cmake -S . -B ./build -A ${{ matrix.arch }} - -DCMAKE_TOOLCHAIN_FILE="$env:VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake" - -DCMAKE_BUILD_TYPE=${{ matrix.build }} - -DGGML_BLAS=${{ matrix.blas }} - -DGGML_BLAS_VENDOR=OpenBLAS - -DBLAS_LIBRARIES="$env:GITHUB_WORKSPACE/OpenBLAS-${{matrix.blasver}}/lib/libopenblas.lib" - -DBLAS_INCLUDE_DIRS="$env:GITHUB_WORKSPACE/OpenBLAS-${{matrix.blasver}}/include" - -DWHISPER_SDL2=${{ matrix.sdl2 }} + "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-2.28.5/cmake" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 - - name: Build + - name: Configure CMake + shell: pwsh run: | - cd ./build - msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }} + cmake -S . -B ./build -A x64 ` + -DCMAKE_BUILD_TYPE=Release ` + -DBUILD_SHARED_LIBS=ON ` + -DGGML_VULKAN=ON ` + -DWHISPER_BUILD_SERVER=ON ` + -DWHISPER_SDL2=ON ` + -DVULKAN_SDK="$env:VULKAN_SDK" - - name: Copy openblas.dll - if: matrix.blas == 'ON' - run: copy "$env:GITHUB_WORKSPACE/OpenBLAS-${{matrix.blasver}}/bin/libopenblas.dll" build/bin/${{ matrix.build }} + - name: Build + run: cd ./build && msbuild ALL_BUILD.vcxproj -t:build -p:configuration=Release -p:platform=x64 - name: Copy SDL2.dll - if: matrix.sdl2 == 'ON' - run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }} + shell: pwsh + run: copy "$env:SDL2_DIR/../lib/x64/SDL2.dll" build/bin/Release - - name: Pack bin artifacts + - name: Package shell: pwsh run: | - Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-blas-bin-${{ matrix.arch }}.zip" + $a = "whisper-${{ needs.determine-tag.outputs.version }}-windows-vulkan-x64.zip" + Compress-Archive -Path "build/bin/Release/*" -DestinationPath $a -Force + "ARCHIVE=$a" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 - - name: Upload binaries - if: matrix.blas == 'ON' && matrix.sdl2 == 'ON' && ${{ needs.determine-tag.outputs.should_release }} - uses: actions/upload-artifact@v6 + - uses: actions/upload-artifact@v4 with: - name: whisper-blas-bin-${{ matrix.arch }}.zip - path: whisper-blas-bin-${{ matrix.arch }}.zip - - windows-cublas: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: windows-2022 + name: ${{ env.ARCHIVE }} + path: ${{ env.ARCHIVE }} + +# ════════════════════════════════════════════════════════════════════════════════ +# 6. NPU (VitisAI / RyzenAI) — Windows only (self-hosted runner) +# ════════════════════════════════════════════════════════════════════════════════ + windows-npu: + runs-on: [self-hosted, Windows, stx, rai300_400] needs: determine-tag - strategy: - fail-fast: false - matrix: - build: [Release] - arch: [x64] - cublas: [ON] - sdl2: [ON] - cuda-toolkit: [12.4.0, 11.8.0] - include: - - arch: x64 - sdl2: ON - sdl2_ver: 2.28.5 - steps: - - name: Clone repository - uses: actions/checkout@v6 - - - name: Install Ninja - id: install_ninja - run: | - choco install ninja - - - name: Install ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: ${{ github.job }}-${{ matrix.cuda-toolkit }}-${{ matrix.build }} - variant: sccache - evict-old-files: 5d - - - name: Install Cuda Toolkit 11.8.0 - if: ${{ matrix.cuda-toolkit == '11.8.0' }} - run: | - $CUDA_VERSION = ${{ matrix.cuda-toolkit }} - $CUDA_TOOLKIT_DIR = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_VERSION" - $CUDA_DOWNLOAD = "https://developer.download.nvidia.com/compute/cuda/redist" - - # Components versions - $CUDART_VER = "11.8.89" - $NVCC_VER = "11.8.89" - $NVRTC_VER = "11.8.89" - $CUBLAS_VER = "11.8.1.74" - $NVTX_VER = "11.8.86" - $VS_VER = "11.8.86" - $NVPROF_VER = "11.8.87" - $CCCL_VER = "11.8.89" - - # Create the directory where the CUDA Toolkit will be installed - mkdir -p $CUDA_TOOLKIT_DIR - - # Install unzip to extract the downloaded files - choco install unzip -y - - # Download all the required components - curl -O "$CUDA_DOWNLOAD/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-${CUDART_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-${NVCC_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/libcublas/windows-x86_64/libcublas-windows-x86_64-${CUBLAS_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-${NVTX_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-${VS_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-${CCCL_VER}-archive.zip" - - # Extract all the downloaded files to the CUDA Toolkit directory - unzip '*.zip' -d $CUDA_TOOLKIT_DIR - - # Copy all the extracted files to the main CUDA Toolkit directory - xcopy "$CUDA_TOOLKIT_DIR\cuda_cudart-windows-x86_64-${CUDART_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_nvcc-windows-x86_64-${NVCC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\libcublas-windows-x86_64-${CUBLAS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_nvtx-windows-x86_64-${NVTX_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_cccl-windows-x86_64-${CCCL_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - - # Visual Studio integration - xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\v170\BuildCustomizations" /E /I /H /Y - - # Set environment variables - echo "$CUDA_TOOLKIT_DIR\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "$CUDA_TOOLKIT_DIR\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "CUDA_PATH=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 - echo "CUDA_PATH_V11_8=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 - - - name: Install Cuda Toolkit 12.4.0 - if: ${{ matrix.cuda-toolkit == '12.4.0' }} - run: | - $CUDA_VERSION = ${{ matrix.cuda-toolkit }} - $CUDA_TOOLKIT_DIR = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_VERSION" - $CUDA_DOWNLOAD = "https://developer.download.nvidia.com/compute/cuda/redist" - - # Components versions - $CUDART_VER = "12.4.127" - $NVCC_VER = "12.4.131" - $NVRTC_VER = "12.4.127" - $CUBLAS_VER = "12.4.5.8" - $NVTX_VER = "12.4.127" - $PROFILER_VER = "12.4.127" - $VS_VER = "12.4.127" - $NVPROF_VER = "12.4.128" - $CCCL_VER = "12.4.127" - - # Create the directory where the CUDA Toolkit will be installed - mkdir -p $CUDA_TOOLKIT_DIR - - # Install unzip to extract the downloaded files - choco install unzip -y - - # Download all the required components - curl -O "$CUDA_DOWNLOAD/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-${CUDART_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-${NVCC_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/libcublas/windows-x86_64/libcublas-windows-x86_64-${CUBLAS_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-${NVTX_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-${PROFILER_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-${VS_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-${CCCL_VER}-archive.zip" - - # Extract all the downloaded files to the CUDA Toolkit directory - unzip -q '*.zip' -d $CUDA_TOOLKIT_DIR - - # Copy all the extracted files to the main CUDA Toolkit directory - xcopy "$CUDA_TOOLKIT_DIR\cuda_cudart-windows-x86_64-${CUDART_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_nvcc-windows-x86_64-${NVCC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\libcublas-windows-x86_64-${CUBLAS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_nvtx-windows-x86_64-${NVTX_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_cccl-windows-x86_64-${CCCL_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_profiler_api-windows-x86_64-${PROFILER_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - - # Visual Studio integration - xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\v170\BuildCustomizations" /E /I /H /Y - - # Set environment variables - echo "$CUDA_TOOLKIT_DIR\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "$CUDA_TOOLKIT_DIR\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "CUDA_PATH=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 - echo "CUDA_PATH_V12_2=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 - - - name: Add msbuild to PATH - uses: microsoft/setup-msbuild@v2 - - - name: Install 7-Zip - run: choco install 7zip -y - - - name: Fetch SDL2 and set SDL2_DIR - if: matrix.sdl2 == 'ON' - run: | - Invoke-WebRequest -Uri https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.sdl2_ver }}/SDL2-devel-${{ matrix.sdl2_ver }}-VC.zip -OutFile sdl2.zip - 7z x sdl2.zip - echo "SDL2_DIR=${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" | Out-File -FilePath $env:GITHUB_ENV -Append - echo "${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" > SDL2_PATH.txt - - - name: Install cmake - run: choco install cmake + continue-on-error: true # runner may be offline; don't block release - - name: Build Project + steps: + - uses: actions/checkout@v4 + + - uses: microsoft/setup-msbuild@v2 + + - name: Install CMake if not available + shell: powershell + run: | + $installed = Get-Command cmake -ErrorAction SilentlyContinue + if (-not $installed) { + $ver = "3.28.1" + $url = "https://github.com/Kitware/CMake/releases/download/v$ver/cmake-$ver-windows-x86_64.msi" + Invoke-WebRequest -Uri $url -OutFile cmake.msi + Start-Process msiexec.exe -ArgumentList "/i cmake.msi /quiet /norestart" -Wait + $p = "C:\Program Files\CMake\bin" + $env:PATH = "$p;$env:PATH" + echo $p >> $env:GITHUB_PATH + cmake --version + if ($LASTEXITCODE -ne 0) { Write-Error "CMake install failed"; exit 1 } + } else { cmake --version } + + - name: Download FlexML Runtime + shell: powershell + run: | + Invoke-WebRequest -Uri "${{ env.FLEXML_URL }}" -OutFile flexmlrt.zip + if (-Not (Test-Path "flexmlrt.zip")) { Write-Error "flexmlrt.zip not downloaded"; exit 1 } + if ((Get-Item "flexmlrt.zip").Length -eq 0) { Write-Error "flexmlrt.zip is empty"; exit 1 } + Write-Host "FlexML: $([math]::Round((Get-Item 'flexmlrt.zip').Length/1MB,2)) MB downloaded" + + - name: Extract FlexML Runtime + shell: powershell + run: | + tar xvf flexmlrt.zip + if ($LASTEXITCODE -ne 0) { Write-Error "Extraction failed"; exit 1 } + $dirs = Get-ChildItem -Directory | Where-Object { $_.Name -like "flexmlrt*" } + if (-not $dirs) { Write-Error "No flexmlrt directory found after extraction"; exit 1 } + Write-Host "Extracted: $($dirs.Name)" + + - name: Setup FlexML, configure and build shell: cmd run: | - call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" - cmake --version - where cmake - if "${{ matrix.cuda-toolkit }}" == "11.8.0" ( - set CUDA_FLAGS=-allow-unsupported-compiler -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH -D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR - ) else ( - set CUDA_FLAGS= - ) - cmake -S . -B build -G "Ninja Multi-Config" ^ - -DCMAKE_BUILD_TYPE=${{ matrix.build }} ^ - -DGGML_CUDA=${{ matrix.cublas }} ^ - -DWHISPER_SDL2=${{ matrix.sdl2 }} ^ - -DSDL2_DIR="%SDL2_DIR%" ^ - -DCMAKE_POLICY_VERSION_MINIMUM=3.5 ^ - -DCMAKE_CUDA_FLAGS="%CUDA_FLAGS%" - set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 - cmake --build build --config ${{ matrix.build }} -j %NUMBER_OF_PROCESSORS% - - - name: Check sccache status after build - run: | - sccache --show-stats - - - name: Copy CUDA DLLs - run: | - Get-ChildItem "$env:CUDA_PATH\bin\" -Filter "*.dll" | - Copy-Item -Destination "build/bin/${{ matrix.build }}" - - - name: Copy SDL2.dll - if: matrix.sdl2 == 'ON' - run: copy "$env:SDL2_DIR/../lib/${{ matrix.arch }}/SDL2.dll" build/bin/${{ matrix.build }} - - - name: Pack bin artifacts - shell: pwsh - run: | - Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip" - - - name: Upload binaries - if: ${{ needs.determine-tag.outputs.should_release }} - uses: actions/upload-artifact@v6 + cd flexmlrt + call setup.bat + if errorlevel 1 ( echo ERROR: FlexML setup.bat failed! & exit /b 1 ) + cd .. + cmake -B build -A x64 -DCMAKE_BUILD_TYPE=Release -DWHISPER_VITISAI=ON -DWHISPER_BUILD_SERVER=ON + if errorlevel 1 ( echo ERROR: CMake configure failed! & exit /b 1 ) + cmake --build build --config Release -j + if errorlevel 1 ( echo ERROR: Build failed! & exit /b 1 ) + + - name: List build output + shell: powershell + run: | + if (Test-Path "build/bin/Release") { + Get-ChildItem -Path "build/bin/Release" -Recurse | Format-Table Name, Length + } else { Write-Error "build/bin/Release not found"; exit 1 } + + - name: Copy FlexML DLLs to build output + shell: powershell + run: | + $copied = 0 + if (Test-Path "flexmlrt/bin") { + $d = Get-ChildItem -Path "flexmlrt/bin/*.dll" -ErrorAction SilentlyContinue + if ($d) { Copy-Item "flexmlrt/bin/*.dll" "build/bin/Release/" -Force; $copied += $d.Count } + } + if (Test-Path "flexmlrt/lib") { + $d = Get-ChildItem -Path "flexmlrt/lib/*.dll" -ErrorAction SilentlyContinue + if ($d) { Copy-Item "flexmlrt/lib/*.dll" "build/bin/Release/" -Force; $copied += $d.Count } + } + Write-Host "FlexML DLLs copied: $copied" + + - name: Package + shell: powershell + run: | + $a = "whisper-${{ needs.determine-tag.outputs.version }}-windows-npu-x64.zip" + Compress-Archive -Path "build/bin/Release/*" -DestinationPath $a -Force + if (-not (Test-Path $a)) { Write-Error "Package creation failed"; exit 1 } + $mb = [math]::Round((Get-Item $a).Length/1MB,2) + Write-Host "Package: $a ($mb MB)" + "ARCHIVE=$a" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 + + - name: Build summary + shell: powershell + run: | + Write-Host "NPU build complete. Artifact: $env:ARCHIVE" + + - uses: actions/upload-artifact@v4 with: - name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip - path: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip - - emscripten: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 + name: ${{ env.ARCHIVE }} + path: ${{ env.ARCHIVE }} - strategy: - matrix: - build: [Release] +# ════════════════════════════════════════════════════════════════════════════════ +# 7. Metal — macOS (arm64) +# ════════════════════════════════════════════════════════════════════════════════ + macos-metal: + runs-on: macos-latest + needs: determine-tag steps: - - name: Clone - uses: actions/checkout@v6 + - uses: actions/checkout@v4 - - name: Setup emsdk - uses: mymindstorm/setup-emsdk@v14 + - name: Install dependencies + run: brew install cmake ninja - - name: Verify - run: emcc -v + - name: Configure CMake + run: | + cmake -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_MACOSX_RPATH=ON \ + -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ + -DCMAKE_INSTALL_RPATH="@loader_path" \ + -DGGML_METAL=ON \ + -DWHISPER_BUILD_EXAMPLES=ON \ + -DWHISPER_BUILD_TESTS=OFF \ + -DWHISPER_BUILD_SERVER=ON - name: Build run: | - emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} - make + cmake --build build --config Release -j$(sysctl -n hw.logicalcpu) > build.log 2>&1 + exit_code=$? + grep -E "error:|FAILED|Linking|Built target" build.log || true + if [ $exit_code -ne 0 ]; then + tail -100 build.log + exit $exit_code + fi + echo "Build succeeded." - ios-xcode-build: - runs-on: macos-latest - needs: determine-tag + - name: Verify build output + run: | + if [ ! -f build/bin/whisper-cli ]; then + echo "::error::whisper-cli not found" + ls -lh build/bin/ 2>/dev/null || true + exit 1 + fi - strategy: - matrix: - build: [Release] + if [ ! -f build/bin/whisper-server ]; then + echo "::error::whisper-server not found" + ls -lh build/bin/ 2>/dev/null || true + exit 1 + fi - steps: - - name: Checkout code - uses: actions/checkout@v6 + echo "--- build/bin ---" + ls -lh build/bin/ - - name: Configure - run: | - cp models/for-tests-ggml-base.en.bin models/ggml-base.en.bin - mkdir models/ggml-base.en-encoder.mlmodelc + echo "--- macOS dylibs produced by build ---" + find build -name "*.dylib" -print | sort - - name: Build - id: cmake_build - run: | - sysctl -a - mkdir build - cd build - cmake -G Xcode .. \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DWHISPER_BUILD_EXAMPLES=OFF \ - -DWHISPER_BUILD_TESTS=OFF \ - -DWHISPER_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=iOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + echo "--- whisper-server dependencies before packaging ---" + otool -L build/bin/whisper-server - - name: xcodebuild for swift package - id: xcodebuild + - name: Package run: | - ./build-xcframework.sh + set -euo pipefail - - name: Build objc example - run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO FRAMEWORK_FOLDER_PATH=./build-ios build + VER="${{ needs.determine-tag.outputs.version }}" + ARCHIVE="whisper-${VER}-darwin-metal-arm64.tar.gz" + STAGE="whisper-${VER}-darwin-metal-arm64" - - name: Build swiftui example - run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build + rm -rf "$STAGE" "$ARCHIVE" + mkdir -p "$STAGE" - - name: Pack artifacts - id: pack_artifacts - run: | - zip --symlinks -r whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip build-apple/whisper.xcframework + cp -R build/bin/* "$STAGE/" 2>/dev/null || true - - name: Upload artifacts - if: ${{ needs.determine-tag.outputs.should_release }} - uses: actions/upload-artifact@v6 - with: - path: whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip - name: whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip + # whisper-server depends on libwhisper / ggml dylibs that CMake may + # leave under build/src and build/ggml/src rather than build/bin. + # Package all produced dylibs next to the executables so @loader_path + # can resolve them on downstream machines and GitHub macOS runners. + while IFS= read -r lib; do + cp -P "$lib" "$STAGE/" + done < <(find build -name "*.dylib" -print | sort) - android: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 + # Make dylib lookup portable inside the extracted archive. + for target in "$STAGE"/whisper-* "$STAGE"/*.dylib; do + [ -e "$target" ] || continue - steps: - - name: Clone - uses: actions/checkout@v6 - with: - path: whisper + install_name_tool -add_rpath "@loader_path" "$target" 2>/dev/null || true - - name: Install Java - uses: actions/setup-java@v5 - with: - distribution: zulu - java-version: 21 + if [ "${target##*.}" = "dylib" ] && [ ! -L "$target" ]; then + install_name_tool -id "@rpath/$(basename "$target")" "$target" 2>/dev/null || true + fi - - name: Setup Android SDK - uses: android-actions/setup-android@v3 + while IFS= read -r dep; do + case "$dep" in + "$PWD"/build/*|/Users/runner/work/whisper.cpp-rocm/*) + install_name_tool -change "$dep" "@rpath/$(basename "$dep")" "$target" 2>/dev/null || true + ;; + esac + done < <(otool -L "$target" 2>/dev/null | awk 'NR > 1 {print $1}') + done - - name: Build - run: | - cd whisper/examples/whisper.android - ./gradlew assembleRelease --no-daemon + echo "--- packaged files ---" + find "$STAGE" -maxdepth 1 -type f -o -type l | sort - - name: Build with external ggml - run: | - export PATH_TO_GGML=$PWD/ggml - cd whisper/examples/whisper.android - ./gradlew assembleRelease --no-daemon + echo "--- whisper-server dependencies after packaging ---" + otool -L "$STAGE/whisper-server" + otool -l "$STAGE/whisper-server" | grep -A2 LC_RPATH || true - android_java: - runs-on: ubuntu-22.04 + if otool -L "$STAGE/whisper-server" | grep -q "/Users/runner/work/whisper.cpp-rocm"; then + echo "::error::whisper-server still references non-portable build paths" + exit 1 + fi - steps: - - name: Clone - uses: actions/checkout@v6 + if ! find "$STAGE" -maxdepth 1 \( -type f -o -type l \) -name "libwhisper*.dylib" | grep -q .; then + echo "::error::packaged archive is missing libwhisper dylib" + exit 1 + fi - - name: set up JDK 11 - uses: actions/setup-java@v5 - with: - java-version: '11' - distribution: 'temurin' - cache: gradle + set +e + DYLD_LIBRARY_PATH="$PWD/$STAGE" "$STAGE/whisper-server" --help > whisper-server-smoke.log 2>&1 + smoke_status=$? + set -e - - name: Setup Android SDK - uses: android-actions/setup-android@v3 - with: - cmdline-tools-version: 9.0 + cat whisper-server-smoke.log - - name: Build - run: | - cd examples/whisper.android.java - chmod +x ./gradlew - ./gradlew assembleRelease + if grep -q "Library not loaded" whisper-server-smoke.log; then + echo "::error::whisper-server has unresolved dylib dependencies" + exit 1 + fi - bindings-java: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - needs: ['windows'] - runs-on: windows-latest - steps: - - uses: actions/checkout@v6 + echo "whisper-server smoke command exited with status ${smoke_status}" - - name: Install Java - uses: actions/setup-java@v5 - with: - distribution: zulu - java-version: 20 + tar -czf "$ARCHIVE" "$STAGE" + echo "ARCHIVE=$ARCHIVE" >> "$GITHUB_ENV" - - name: Download Whisper Windows lib - uses: actions/download-artifact@v7 + - uses: actions/upload-artifact@v4 with: - name: whisper_x64.dll + name: ${{ env.ARCHIVE }} + path: ${{ env.ARCHIVE }} - - name: Download GGML Windows lib - uses: actions/download-artifact@v7 - with: - name: ggml_x64.dll +# ════════════════════════════════════════════════════════════════════════════════ +# 8. CPU — Linux +# ════════════════════════════════════════════════════════════════════════════════ + linux-cpu: + runs-on: ubuntu-latest + needs: determine-tag - - name: Download GGML Base Windows lib - uses: actions/download-artifact@v7 - with: - name: ggml_base_x64.dll + steps: + - uses: actions/checkout@v4 - - name: Download GGML CPU Windows lib - uses: actions/download-artifact@v7 - with: - name: ggml_cpu_x64.dll + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y build-essential cmake git libsdl2-dev pkg-config + echo "cmake $(cmake --version | head -1)" + echo "gcc $(gcc --version | head -1)" + + - name: Configure CMake + run: | + cmake -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_NATIVE=OFF \ + -DGGML_AVX=ON \ + -DGGML_AVX2=ON \ + -DGGML_FMA=ON \ + -DGGML_AVX512=OFF \ + -DCMAKE_C_FLAGS="-march=x86-64-v3" \ + -DCMAKE_CXX_FLAGS="-march=x86-64-v3" \ + -DWHISPER_BUILD_EXAMPLES=ON \ + -DWHISPER_BUILD_TESTS=OFF \ + -DWHISPER_BUILD_SERVER=ON - - name: Download SDL2.dll - uses: actions/download-artifact@v7 - with: - name: x64_SDL2.dll + - name: Build + run: cmake --build build --config Release -j$(nproc) - - name: List downloaded files - shell: pwsh + - name: List build output run: | - Get-ChildItem -Path "." -Recurse -Filter "*.dll" + find build/bin -type f | sort + find build/bin -type f -executable | while read f; do ls -lh "$f"; done - - name: Move DLL to correct location - shell: pwsh + - name: Package run: | - New-Item -Path "build\bin\Release" -ItemType Directory -Force - - Copy-Item -Path "whisper.dll" -Destination "build\bin\Release\whisper.dll" -Force - Write-Host "Copied whisper.dll to build\bin\Release\whisper.dll directory" + VER="${{ needs.determine-tag.outputs.version }}" + ARCHIVE="whisper-${VER}-linux-cpu-x86_64.tar.gz" + STAGE="whisper-${VER}-linux-cpu-x86_64" + mkdir -p "$STAGE" + cp -r build/bin/* "$STAGE/" 2>/dev/null || true + find build -name "*.so*" -exec cp {} "$STAGE/" \; 2>/dev/null || true + printf "whisper.cpp CPU build for Linux\nDate: %s\nArch: %s\n" \ + "$(date -u +"%Y-%m-%d %H:%M:%S UTC")" "$(uname -m)" > "$STAGE/README.txt" + tar -czf "$ARCHIVE" "$STAGE" + echo "ARCHIVE=$ARCHIVE" >> $GITHUB_ENV - Copy-Item -Path "ggml.dll" -Destination "build\bin\Release\ggml.dll" -Force - Write-Host "Copied ggml.dll to build\bin\Release\ggml.dll directory" + - uses: actions/upload-artifact@v4 + with: + name: ${{ env.ARCHIVE }} + path: ${{ env.ARCHIVE }} - Copy-Item -Path "ggml-base.dll" -Destination "build\bin\Release\ggml-base.dll" -Force - Write-Host "Copied ggml-base.dll to build\bin\Release\ggml-base.dll directory" +# ════════════════════════════════════════════════════════════════════════════════ +# 8. CPU — Windows +# ════════════════════════════════════════════════════════════════════════════════ + windows-cpu: + runs-on: windows-latest + needs: determine-tag - Copy-Item -Path "ggml-cpu.dll" -Destination "build\bin\Release\ggml-cpu.dll" -Force - Write-Host "Copied ggml-cpu.dll to build\bin\Release\ggml-cpu.dll directory" + steps: + - uses: actions/checkout@v4 - Copy-Item -Path "SDL2.dll" -Destination "build\bin\Release\SDL2.dll" -Force - Write-Host "Copied SDL2.dll to build\bin\Release\SDL2.dll directory" + - uses: microsoft/setup-msbuild@v2 - - name: List build release files + - name: Fetch SDL2 shell: pwsh run: | - Get-ChildItem -Path "build\Release" -Recurse -Filter "*.dll" - - - name: Build - run: | - models\download-ggml-model.cmd tiny.en models/ - cd bindings/java - chmod +x ./gradlew - ./gradlew build --info + C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-2.28.5/SDL2-devel-2.28.5-VC.zip + 7z x sdl2.zip + "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-2.28.5/cmake" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 - - name: Pack jar artifacts + - name: Configure CMake shell: pwsh run: | - Compress-Archive -Path "bindings/java/build/libs/whispercpp-*.jar" -DestinationPath "whispercpp.jar.zip" + cmake -S . -B ./build -A x64 ` + -DCMAKE_BUILD_TYPE=Release ` + -DGGML_NATIVE=OFF ` + -DGGML_AVX=ON ` + -DGGML_AVX2=ON ` + -DGGML_FMA=ON ` + -DGGML_AVX512=OFF ` + -DBUILD_SHARED_LIBS=ON ` + -DWHISPER_BUILD_SERVER=ON ` + -DWHISPER_SDL2=ON - - name: Upload jar - uses: actions/upload-artifact@v6 - with: - name: whispercpp.jar.zip - path: whispercpp.jar.zip - -# - name: Publish package -# if: ${{ github.ref == 'refs/heads/master' }} -# uses: gradle/gradle-build-action@v2.4.2 -# with: -# arguments: publish -# build-root-directory: bindings/java -# env: -# MAVEN_USERNAME: ${{ secrets.JIRA_USER }} -# MAVEN_PASSWORD: ${{ secrets.JIRA_PASS }} -# PGP_SECRET: ${{ secrets.GPG_PRIVATE_KEY }} -# PGP_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} - - quantize: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 + - name: Build + run: cd ./build && msbuild ALL_BUILD.vcxproj -t:build -p:configuration=Release -p:platform=x64 - steps: - - name: Clone - uses: actions/checkout@v6 + - name: Copy SDL2.dll + shell: pwsh + run: copy "$env:SDL2_DIR/../lib/x64/SDL2.dll" build/bin/Release - - name: Test quantize + - name: Package + shell: pwsh run: | - ./models/download-ggml-model.sh tiny.en - cmake -B build - cmake --build build --config Release - ./build/bin/whisper-quantize models/ggml-tiny.en.bin models/ggml-tiny.en-q4_0.bin q4_0 - - release: - if: ${{ github.event.inputs.create_release == 'true' || github.event.inputs.pre_release_tag != '' || startsWith(github.ref, 'refs/tags/v') }} - - runs-on: ubuntu-latest - - needs: - - determine-tag - - ios-xcode-build - - windows - - windows-blas - - windows-cublas - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - with: - fetch-depth: 0 + $a = "whisper-${{ needs.determine-tag.outputs.version }}-windows-cpu-x64.zip" + Compress-Archive -Path "build/bin/Release/*" -DestinationPath $a -Force + "ARCHIVE=$a" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 + - uses: actions/upload-artifact@v4 with: - key: release - evict-old-files: 1d + name: ${{ env.ARCHIVE }} + path: ${{ env.ARCHIVE }} + +# ════════════════════════════════════════════════════════════════════════════════ +# 9. Publish GitHub Release +# ════════════════════════════════════════════════════════════════════════════════ +# Shared model download step (reused across all test jobs via inline steps) +# Models: ggml-tiny.bin from HuggingFace ggerganov/whisper.cpp +# ggml-tiny-encoder-vitisai.rai from amd/whisper-tiny-onnx-npu +# ════════════════════════════════════════════════════════════════════════════════ + +# ════════════════════════════════════════════════════════════════════════════════ +# 9. Test — CPU Windows (GitHub-hosted, no GPU needed) +# ════════════════════════════════════════════════════════════════════════════════ + test-cpu-windows: + runs-on: windows-latest + needs: [determine-tag, windows-cpu] + if: needs.windows-cpu.result == 'success' + continue-on-error: true + steps: + - uses: actions/checkout@v4 - # Downloads all the artifacts from the previous jobs - - name: Download artifacts - id: download-artifact - uses: actions/download-artifact@v7 + - name: Download artifact + uses: actions/download-artifact@v4 with: + name: whisper-${{ needs.determine-tag.outputs.version }}-windows-cpu-x64.zip path: ./artifact - - name: Move artifacts - id: move_artifacts - run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release - - - name: Create release - id: create_release - uses: ggml-org/action-create-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - tag_name: ${{ needs.determine-tag.outputs.tag_name }} - prerelease: ${{ github.event.inputs.pre_release_tag != '' }} - draft: true - - - name: Upload release - id: upload_release - uses: actions/github-script@v3 - with: - github-token: ${{secrets.GITHUB_TOKEN}} - script: | - const path = require('path'); - const fs = require('fs'); - const release_id = '${{ steps.create_release.outputs.id }}'; - for (let file of await fs.readdirSync('./artifact/release')) { - if (path.extname(file) === '.zip') { - console.log('uploadReleaseAsset', file); - await github.repos.uploadReleaseAsset({ - owner: context.repo.owner, - repo: context.repo.repo, - release_id: release_id, - name: file, - data: await fs.readFileSync(`./artifact/release/${file}`) - }); - } - } - - coreml-base-en: - if: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/master') || - github.event.inputs.create_release == 'true' || - github.event.inputs.pre_release_tag != '' || - startsWith(github.ref, 'refs/tags/v') }} - runs-on: macos-latest - needs: determine-tag - - steps: - - name: Checkout code - uses: actions/checkout@v6 - - - name: Set environment variables - id: set_vars + - name: Extract + shell: pwsh run: | - echo "MODEL_NAME=base.en" >> $GITHUB_ENV - echo "GEN_MODEL_NAME=whisper-${{ needs.determine-tag.outputs.tag_name }}-ggml-base.en-encoder.mlmodelc" >> $GITHUB_ENV + New-Item -ItemType Directory -Force -Path bin | Out-Null + Expand-Archive -Path (Get-ChildItem artifact -Filter "*.zip" | Select-Object -First 1).FullName -DestinationPath bin -Force - - name: Download model + - name: Download tiny model + shell: pwsh run: | - ./models/download-ggml-model.sh ${{ env.MODEL_NAME }} + New-Item -ItemType Directory -Force -Path models | Out-Null + Invoke-WebRequest -Uri "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -OutFile "models\ggml-tiny.bin" - - name: Generate CoreML model + - name: Run and verify + shell: pwsh run: | - python3.11 -m venv venv - source venv/bin/activate - pip install ane_transformers openai-whisper coremltools - ./models/generate-coreml-model.sh ${{ env.MODEL_NAME }} + .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result + $text = Get-Content "jfk-result.txt" -Raw + Write-Host $text + if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS" -ForegroundColor Green } + else { Write-Error "FAIL: expected words not found"; exit 1 } - vad: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} +# ════════════════════════════════════════════════════════════════════════════════ +# 10. Test — CPU Linux (GitHub-hosted, no GPU needed) +# ════════════════════════════════════════════════════════════════════════════════ + test-cpu-linux: runs-on: ubuntu-latest - + needs: [determine-tag, linux-cpu] + if: needs.linux-cpu.result == 'success' + continue-on-error: true steps: - - name: Checkout - uses: actions/checkout@v6 + - uses: actions/checkout@v4 - - name: Build - shell: bash + - name: Download artifact + uses: actions/download-artifact@v4 + with: + name: whisper-${{ needs.determine-tag.outputs.version }}-linux-cpu-x86_64.tar.gz + path: ./artifact + + - name: Extract run: | - cmake -B build - cmake --build build --config Release + mkdir -p bin + tar -xzf artifact/*.tar.gz --strip-components=1 -C bin + chmod +x bin/whisper-cli - - name: Test - shell: bash + - name: Download tiny model run: | - ctest -R ^test-vad$ --test-dir build --output-on-failure -VV + mkdir -p models + curl -L -o models/ggml-tiny.bin "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -# TODO: simplify the following workflows using a matrix - ggml-ci-x64-cpu-low-perf: - runs-on: ubuntu-22.04 + - name: Run and verify + run: | + LD_LIBRARY_PATH=$(pwd)/bin:$LD_LIBRARY_PATH ./bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result + cat jfk-result.txt + grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt && echo "PASS" || { echo "FAIL"; exit 1; } +# ════════════════════════════════════════════════════════════════════════════════ +# 11. Test — Vulkan Windows (stx-halo, has Vulkan driver) +# ════════════════════════════════════════════════════════════════════════════════ + test-vulkan-windows: + runs-on: [self-hosted, Windows, stx-halo] + needs: [determine-tag, windows-vulkan] + if: needs.windows-vulkan.result == 'success' + continue-on-error: true steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 + - uses: actions/checkout@v4 - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 + - name: Download artifact + uses: actions/download-artifact@v4 with: - key: ggml-ci-x64-cpu-low-perf - evict-old-files: 1d + name: whisper-${{ needs.determine-tag.outputs.version }}-windows-vulkan-x64.zip + path: ./artifact - - name: Dependencies - id: depends + - name: Extract + shell: powershell run: | - sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev + New-Item -ItemType Directory -Force -Path bin | Out-Null + Expand-Archive -Path (Get-ChildItem artifact -Filter "*.zip" | Select-Object -First 1).FullName -DestinationPath bin -Force - - name: Test - id: ggml-ci + - name: Download tiny model + shell: powershell run: | - LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + New-Item -ItemType Directory -Force -Path models | Out-Null + Invoke-WebRequest -Uri "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -OutFile "models\ggml-tiny.bin" - ggml-ci-arm64-cpu-low-perf: - runs-on: ubuntu-22.04-arm + - name: Run and verify + shell: powershell + run: | + .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result + $text = Get-Content "jfk-result.txt" -Raw + Write-Host $text + if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS" } + else { Write-Error "FAIL: expected words not found"; exit 1 } +# ════════════════════════════════════════════════════════════════════════════════ +# 12. Test — Vulkan Linux (stx-halo, has Vulkan driver) +# ════════════════════════════════════════════════════════════════════════════════ + test-vulkan-linux: + runs-on: [self-hosted, Linux, stx-halo] + needs: [determine-tag, linux-vulkan] + if: needs.linux-vulkan.result == 'success' + continue-on-error: true steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 + - uses: actions/checkout@v4 - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 + - name: Download artifact + uses: actions/download-artifact@v4 with: - key: ggml-ci-arm64-cpu-low-perf - evict-old-files: 1d + name: whisper-${{ needs.determine-tag.outputs.version }}-linux-vulkan-x86_64.tar.gz + path: ./artifact - - name: Dependencies - id: depends + - name: Extract run: | - sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev + mkdir -p bin + tar -xzf artifact/*.tar.gz --strip-components=1 -C bin + chmod +x bin/whisper-cli - - name: Test - id: ggml-ci + - name: Download tiny model run: | - LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + mkdir -p models + curl -L -o models/ggml-tiny.bin "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" - ggml-ci-x64-cpu-high-perf: - runs-on: ubuntu-22.04 + - name: Run and verify + run: | + LD_LIBRARY_PATH=$(pwd)/bin:$LD_LIBRARY_PATH ./bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result + cat jfk-result.txt + grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt && echo "PASS" || { echo "FAIL"; exit 1; } +# ════════════════════════════════════════════════════════════════════════════════ +# 13. Test — ROCm Windows (stx-halo, gfx1151) +# ════════════════════════════════════════════════════════════════════════════════ + test-rocm-windows: + runs-on: [self-hosted, Windows, stx-halo] + needs: [determine-tag, windows-rocm] + if: needs.windows-rocm.result == 'success' + continue-on-error: true steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 + - uses: actions/checkout@v4 - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 + - name: Download ROCm artifact (gfx1151 - stx-halo GPU target) + uses: actions/download-artifact@v4 with: - key: ggml-ci-x64-cpu-high-perf - evict-old-files: 1d + name: whisper-${{ needs.determine-tag.outputs.version }}-windows-rocm-gfx1151.zip + path: ./artifact - - name: Dependencies - id: depends + - name: Extract + shell: powershell run: | - sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev + New-Item -ItemType Directory -Force -Path bin | Out-Null + Expand-Archive -Path (Get-ChildItem artifact -Filter "*.zip" | Select-Object -First 1).FullName -DestinationPath bin -Force - - name: Test - id: ggml-ci + - name: Download tiny model + shell: powershell run: | - LLAMA_ARG_THREADS=$(nproc) bash ./ci/run.sh ./tmp/results ./tmp/mnt + New-Item -ItemType Directory -Force -Path models | Out-Null + Invoke-WebRequest -Uri "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -OutFile "models\ggml-tiny.bin" - ggml-ci-arm64-cpu-high-perf: - runs-on: ubuntu-22.04-arm + - name: Run and verify + shell: powershell + run: | + .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result + $text = Get-Content "jfk-result.txt" -Raw + Write-Host $text + if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS" } + else { Write-Error "FAIL: expected words not found"; exit 1 } +# ════════════════════════════════════════════════════════════════════════════════ +# 14. Test — ROCm Linux (stx-halo, gfx1151) +# ════════════════════════════════════════════════════════════════════════════════ + test-rocm-linux: + runs-on: [self-hosted, Linux, stx-halo] + needs: [determine-tag, linux-rocm] + if: needs.linux-rocm.result == 'success' + continue-on-error: true steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 + - uses: actions/checkout@v4 - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 + - name: Download ROCm artifact (gfx1151) + uses: actions/download-artifact@v4 with: - key: ggml-ci-arm64-cpu-high-perf - evict-old-files: 1d + name: whisper-${{ needs.determine-tag.outputs.version }}-linux-rocm-gfx1151.tar.gz + path: ./artifact - - name: Dependencies - id: depends + - name: Extract run: | - sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev + mkdir -p bin + tar -xzf artifact/*.tar.gz --strip-components=1 -C bin + chmod +x bin/whisper-cli - - name: Test - id: ggml-ci + - name: Download tiny model run: | - LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + mkdir -p models + curl -L -o models/ggml-tiny.bin "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" - ggml-ci-arm64-cpu-high-perf-sve: - runs-on: ubuntu-22.04-arm + - name: Run and verify + run: | + LD_LIBRARY_PATH=$(pwd)/bin:$LD_LIBRARY_PATH ./bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result + cat jfk-result.txt + grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt && echo "PASS" || { echo "FAIL"; exit 1; } +# ════════════════════════════════════════════════════════════════════════════════ +# 15. Test — NPU Windows (rai300_400 runner, needs FlexML + .rai model) +# ════════════════════════════════════════════════════════════════════════════════ + test-npu-windows: + runs-on: [self-hosted, Windows, stx, rai300_400] + needs: [determine-tag, windows-npu] + if: needs.windows-npu.result == 'success' + continue-on-error: true steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 + - uses: actions/checkout@v4 - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 + - name: Download NPU artifact + uses: actions/download-artifact@v4 with: - key: ggml-ci-arm64-cpu-high-perf-sve - evict-old-files: 1d + name: whisper-${{ needs.determine-tag.outputs.version }}-windows-npu-x64.zip + path: ./artifact - - name: Dependencies - id: depends + - name: Extract + shell: powershell run: | - sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev + New-Item -ItemType Directory -Force -Path bin | Out-Null + Expand-Archive -Path (Get-ChildItem artifact -Filter "*.zip" | Select-Object -First 1).FullName -DestinationPath bin -Force - - name: Test - id: ggml-ci + - name: Download FlexML Runtime and setup environment + shell: powershell run: | - LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt - - ggml-ci-x64-nvidia-cuda: - runs-on: [self-hosted, Linux, mnt-root, NVIDIA] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 + Invoke-WebRequest -Uri "${{ env.FLEXML_URL }}" -OutFile flexmlrt.zip + tar xvf flexmlrt.zip + Remove-Item flexmlrt.zip - - name: Test - id: ggml-ci + - name: Setup FlexML environment + shell: cmd run: | - nvidia-smi - GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp - - ggml-ci-x64-nvidia-vulkan-cm: - runs-on: [self-hosted, Linux, mnt-root, NVIDIA] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 + cd flexmlrt + call setup.bat + if errorlevel 1 ( echo ERROR: FlexML setup failed! & exit /b 1 ) - - name: Test - id: ggml-ci + - name: Copy FlexML DLLs to bin + shell: powershell run: | - vulkaninfo --summary - GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp - - ggml-ci-x64-nvidia-vulkan-cm2: - runs-on: [self-hosted, Linux, mnt-root, NVIDIA, COOPMAT2] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 + $flexml = Get-ChildItem -Directory | Where-Object { $_.Name -like "flexmlrt*" } | Select-Object -First 1 + foreach ($sub in @("bin","lib")) { + $path = Join-Path $flexml.FullName $sub + if (Test-Path $path) { + Get-ChildItem "$path\*.dll" -ErrorAction SilentlyContinue | + ForEach-Object { Copy-Item $_.FullName "bin\" -Force } + } + } - - name: Test - id: ggml-ci + - name: Download models (ggml weights + .rai NPU encoder) + shell: powershell run: | - vulkaninfo --summary - GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp - - #ggml-ci-x64-cpu-amx: - # runs-on: [self-hosted, Linux, X64, CPU, AMX] + New-Item -ItemType Directory -Force -Path models | Out-Null + Invoke-WebRequest -Uri "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -OutFile "models\ggml-tiny.bin" + Invoke-WebRequest -Uri "https://huggingface.co/amd/whisper-tiny-onnx-npu/resolve/main/ggml-tiny-encoder-vitisai.rai" -OutFile "models\ggml-tiny-encoder-vitisai.rai" + Write-Host "Models:" + Get-ChildItem models | Format-Table Name, Length - # steps: - # - name: Clone - # id: checkout - # uses: actions/checkout@v6 - - # - name: Test - # id: ggml-ci - # run: | - # bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp + - name: Run and verify (NPU encoder + CPU decoder) + shell: powershell + run: | + .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result + $text = Get-Content "jfk-result.txt" -Raw + Write-Host $text + if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS: transcription correct" -ForegroundColor Green } + else { Write-Error "FAIL: expected words not found"; exit 1 } - ggml-ci-mac-metal: - runs-on: [self-hosted, macOS, ARM64] +# ════════════════════════════════════════════════════════════════════════════════ +# 16. Publish GitHub Release (only after all tests pass or are skipped) +# ════════════════════════════════════════════════════════════════════════════════ + release: + if: | + always() && + needs.determine-tag.outputs.should_release == 'true' && + (needs.test-cpu-windows.result == 'success' || needs.test-cpu-windows.result == 'skipped') && + (needs.test-cpu-linux.result == 'success' || needs.test-cpu-linux.result == 'skipped') && + (needs.test-vulkan-windows.result == 'success' || needs.test-vulkan-windows.result == 'skipped') && + (needs.test-vulkan-linux.result == 'success' || needs.test-vulkan-linux.result == 'skipped') && + (needs.test-rocm-windows.result == 'success' || needs.test-rocm-windows.result == 'skipped') && + (needs.test-rocm-linux.result == 'success' || needs.test-rocm-linux.result == 'skipped') && + (needs.test-npu-windows.result == 'success' || needs.test-npu-windows.result == 'skipped') && + (needs.macos-metal.result == 'success' || needs.macos-metal.result == 'skipped') + runs-on: ubuntu-latest + needs: + - determine-tag + - linux-rocm + - windows-rocm + - linux-vulkan + - windows-vulkan + - windows-npu + - macos-metal + - linux-cpu + - windows-cpu + - test-cpu-windows + - test-cpu-linux + - test-vulkan-windows + - test-vulkan-linux + - test-rocm-windows + - test-rocm-linux + - test-npu-windows steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 + - uses: actions/checkout@v4 + with: + fetch-depth: 0 - - name: Test - id: ggml-ci + - name: Download all artifacts + uses: actions/download-artifact@v4 + with: + path: ./artifacts + + - name: Flatten artifacts into release/ run: | - GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/whisper.cpp ~/mnt/whisper.cpp + mkdir -p release + find ./artifacts -mindepth 2 \( -name '*.zip' -o -name '*.tar.gz' \) -exec mv {} release/ \; + echo "Release assets:" + ls -lh release/ - ggml-ci-mac-vulkan: - runs-on: [self-hosted, macOS, ARM64] + - name: Delete existing release/tag if present + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + TAG="${{ needs.determine-tag.outputs.tag_name }}" + RELEASE_ID=$(gh api repos/${{ github.repository }}/releases/tags/"$TAG" --jq '.id' 2>/dev/null || true) + if [ -n "$RELEASE_ID" ]; then + echo "Deleting existing release $RELEASE_ID for tag $TAG" + gh api -X DELETE repos/${{ github.repository }}/releases/"$RELEASE_ID" + fi + git push --delete origin "refs/tags/$TAG" 2>/dev/null || true - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 + - name: Create release + id: create_release + uses: ggml-org/action-create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ needs.determine-tag.outputs.tag_name }} + release_name: "whisper.cpp ${{ needs.determine-tag.outputs.tag_name }} - AMD Builds" + prerelease: ${{ github.event.inputs.pre_release_tag != '' }} + draft: false + body: | + ## AMD whisper.cpp ${{ needs.determine-tag.outputs.tag_name }} + + AMD-based pre-built binaries of [whisper.cpp ${{ needs.determine-tag.outputs.tag_name }}](https://github.com/ggerganov/whisper.cpp/releases/tag/${{ needs.determine-tag.outputs.tag_name }}) with full hardware acceleration across ROCm GPU (iGPU and dGPU), NPU (RyzenAI), and CPU — for Linux and Windows. + All ROCm runtime libraries (ROCm ${{ env.ROCM_VERSION }}) are bundled. No drivers or separate installs required — download, extract, and run. + + ### Packages + + | Target | Linux | Windows | + |---|---|---| + | ROCm gfx1151 (Ryzen AI MAX+ Pro 395) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-rocm-gfx1151.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-rocm-gfx1151.zip` | + | ROCm gfx1150 (Ryzen AI 300) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-rocm-gfx1150.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-rocm-gfx1150.zip` | + | ROCm gfx120X (RDNA4 dGPU) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-rocm-gfx120X.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-rocm-gfx120X.zip` | + | ROCm gfx110X (RDNA3 dGPU & iGPU) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-rocm-gfx110X.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-rocm-gfx110X.zip` | + | Vulkan (cross-vendor) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-vulkan-x86_64.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-vulkan-x64.zip` | + | NPU (RyzenAI) | — | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-npu-x64.zip` | + | Metal (Apple Silicon) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-darwin-metal-arm64.tar.gz` | — | + | CPU only | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-cpu-x86_64.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-cpu-x64.zip` | + + - name: Upload release assets + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const fs = require('fs'); + const path = require('path'); + const id = '${{ steps.create_release.outputs.id }}'; + for (const file of fs.readdirSync('./release')) { + if (!file.endsWith('.zip') && !file.endsWith('.tar.gz')) continue; + console.log('Uploading:', file); + await github.rest.repos.uploadReleaseAsset({ + owner: context.repo.owner, + repo: context.repo.repo, + release_id: id, + name: file, + data: fs.readFileSync(`./release/${file}`), + }); + } - - name: Test - id: ggml-ci + - name: Update README download links run: | - vulkaninfo --summary - GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/whisper.cpp ~/mnt/whisper.cpp + TAG="${{ needs.determine-tag.outputs.tag_name }}" + # Replace the placeholder tag in all download URLs with the actual release tag + sed -i "s|/releases/download/[^/]*/whisper-[^-]*-|/releases/download/${TAG}/whisper-${TAG}-|g" README.md + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add README.md + git diff --cached --quiet || git commit -m "docs: update download links to ${TAG}" + git push diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml deleted file mode 100644 index 6c0de0ece70..00000000000 --- a/.github/workflows/docker.yml +++ /dev/null @@ -1,77 +0,0 @@ -name: Publish Docker image - -on: - pull_request: - push: - branches: - - master - -jobs: - push_to_registry: - name: Push Docker image to Docker Hub - if: github.event.pull_request.draft == false - - runs-on: ubuntu-22.04 - env: - COMMIT_SHA: ${{ github.sha }} - strategy: - fail-fast: false - matrix: - config: - - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" } - - { tag: "main-musa", dockerfile: ".devops/main-musa.Dockerfile", platform: "linux/amd64" } - - { tag: "main-intel", dockerfile: ".devops/main-intel.Dockerfile", platform: "linux/amd64" } - - { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" } - - { tag: "main-vulkan", dockerfile: ".devops/main-vulkan.Dockerfile", platform: "linux/amd64" } - - steps: - - name: Check out the repo - uses: actions/checkout@v6 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - with: - image: tonistiigi/binfmt:qemu-v7.0.0-28 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Log in to Docker Hub - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Free up disk space - run: | - sudo apt-get remove -y '^dotnet-.*' '^llvm-.*' '^mysql-.*' '^postgresql-.*' - sudo apt-get autoremove -y - sudo apt-get autoclean - - sudo rm -rf /usr/share/dotnet - sudo rm -rf /usr/local/lib/android - sudo rm -rf /opt/ghc - sudo rm -rf /opt/hostedtoolcache/CodeQL - - docker system prune -af - - df -h - - - name: Generate tags - id: tags - run: | - TAGS="ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}" - if [ "${{ github.event_name }}" == "push" ]; then - TAGS="$TAGS,ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}" - fi - echo "tags=$TAGS" >> $GITHUB_OUTPUT - - - name: Build and push Docker image (tagged) - uses: docker/build-push-action@v6 - with: - context: . - push: ${{ github.event_name == 'push' }} - platforms: ${{ matrix.config.platform }} - tags: ${{ steps.tags.outputs.tags }} - file: ${{ matrix.config.dockerfile }} diff --git a/.github/workflows/examples-wasm.yml b/.github/workflows/examples-wasm.yml deleted file mode 100644 index 927438cdad8..00000000000 --- a/.github/workflows/examples-wasm.yml +++ /dev/null @@ -1,97 +0,0 @@ -name: Examples WASM -on: - push: - branches: ["master"] - - workflow_dispatch: - -permissions: - contents: read - pages: write - id-token: write - -concurrency: - group: "pages" - cancel-in-progress: false - -jobs: - deploy-wasm-github-pages: - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v6 - - - name: Setup Pages - uses: actions/configure-pages@v5 - - - name: Setup emsdk - uses: mymindstorm/setup-emsdk@v14 - - - name: Build WASM Examples - # Enable for real build later in whisper.cpp - run: | - mkdir -p build-em && cd build-em - emcmake cmake .. -DCMAKE_BUILD_TYPE=Release - make -j - - - name: Create staging directory - run: mkdir -p staging - - - name: Create .nojekyll file in staging directory - run: touch staging/.nojekyll - - - name: Copy application files - run: | - build_dir=build-em/bin - - ls ${build_dir} - - # command.wasm - target_dir=staging/command.wasm - mkdir -p ${target_dir} - cp ${build_dir}/command.wasm/{index.html,command.js,helpers.js} ${target_dir} - cp ${build_dir}/libcommand.js ${target_dir} - - # bench.wasm - target_dir=staging/bench.wasm - mkdir -p ${target_dir} - cp ${build_dir}/bench.wasm/{index.html,bench.js,helpers.js} ${target_dir} - cp ${build_dir}/libbench.js ${target_dir} - - # stream.wasm - target_dir=staging/stream.wasm - mkdir -p ${target_dir} - cp ${build_dir}/stream.wasm/{index.html,stream.js,helpers.js} ${target_dir} - cp ${build_dir}/libstream.js ${target_dir} - - # wchess.wasm - target_dir=staging/wchess.wasm - mkdir -p ${target_dir} - cp -r ${build_dir}/wchess.wasm/{index.html,css,img,js} ${target_dir} - cp ${build_dir}/wchess.wasm.js ${target_dir} - - # whisper.wasm (this will be the main example page) - target_dir=staging - mkdir -p ${target_dir} - cp ${build_dir}/whisper.wasm/{index.html,main.js,helpers.js} ${target_dir} - cp ${build_dir}/libmain.js ${target_dir} - - # Copy Cross-Origin Isolation service worker - cp -v examples/coi-serviceworker.js staging/ - - - name: List files in staging directory (for debugging) - run: | - echo "Files in staging directory:" - find staging -type f | sort - - - name: Upload artifact - uses: actions/upload-pages-artifact@v4 - with: - path: ./staging - - - name: Deploy to GitHub Pages - id: deployment - uses: actions/deploy-pages@v4 diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml deleted file mode 100644 index 1c9ade5a300..00000000000 --- a/.github/workflows/examples.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: Examples Tests -on: - push: - paths: - - examples/addon.node/** - - whisper.h - pull_request: - paths: - - examples/addon.node/** - - whisper.h - -jobs: - addon_node-ubuntu-22: - runs-on: ubuntu-22.04 - strategy: - matrix: - node-version: [ 16.x, 18.x ] - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install build-essential git - sudo apt-get install cmake - sudo apt-get install libsdl2-dev - - - name: Use Node.js ${{ matrix.node-version }} - uses: actions/setup-node@v6 - with: - node-version: ${{ matrix.node-version }} - cache: 'npm' - - - name: Install package.json dependencies - working-directory: ./examples/addon.node - run: npm install - - - name: Compile addon.node - run: npx cmake-js compile -T addon.node -B Release - - - name: Download test model - run: | - bash ./models/download-ggml-model.sh base.en - - name: Test - run: | - cd examples/addon.node - npm run test diff --git a/.github/workflows/sync.yml b/.github/workflows/sync.yml new file mode 100644 index 00000000000..47693822a04 --- /dev/null +++ b/.github/workflows/sync.yml @@ -0,0 +1,146 @@ +name: Sync Upstream & Auto-Release + +# Runs daily to detect new upstream whisper.cpp releases. +# When a new release is found: +# - clean merge → pushes main + creates tag vX.Y.Z → triggers build.yml +# - conflict → opens a PR for manual resolution, does NOT tag + +on: + workflow_dispatch: + inputs: + upstream_tag: + description: 'Force a specific upstream tag (e.g. v1.8.5). Leave blank to auto-detect latest.' + required: false + type: string + dry_run: + description: 'Dry run — merge locally but do not push or tag' + required: false + type: boolean + default: false + +permissions: + contents: write + pull-requests: write + +jobs: + sync-and-tag: + runs-on: ubuntu-latest + + steps: + - name: Checkout (full history + tags) + uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Configure git identity + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + - name: Add upstream remote + run: git remote add upstream https://github.com/ggerganov/whisper.cpp || true + + # ── Detect which upstream release to target ────────────────────────── + - name: Detect upstream release + id: upstream + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + if [[ -n "${{ github.event.inputs.upstream_tag }}" ]]; then + UPSTREAM_TAG="${{ github.event.inputs.upstream_tag }}" + echo "Using manually specified tag: $UPSTREAM_TAG" + else + UPSTREAM_TAG=$(gh api repos/ggerganov/whisper.cpp/releases/latest --jq '.tag_name') + echo "Latest upstream release: $UPSTREAM_TAG" + fi + + # Strip leading 'v' for use in artifact filenames + VERSION="${UPSTREAM_TAG#v}" + + echo "tag=$UPSTREAM_TAG" >> $GITHUB_OUTPUT + echo "version=$VERSION" >> $GITHUB_OUTPUT + + # ── Check if we already have a release for this upstream version ────── + - name: Check if already released + id: check + run: | + git fetch --tags + # Our tags match the upstream tag exactly (e.g. v1.8.4) + EXISTING=$(git tag -l "${{ steps.upstream.outputs.tag }}" | head -1) + if [[ -n "$EXISTING" ]]; then + echo "already_released=true" >> $GITHUB_OUTPUT + echo "::notice::Already have release $EXISTING — nothing to do." + else + echo "already_released=false" >> $GITHUB_OUTPUT + echo "New upstream release detected: ${{ steps.upstream.outputs.tag }}" + fi + + # ── Merge upstream tag into main ───────────────────────────────────── + - name: Fetch upstream tags + if: steps.check.outputs.already_released == 'false' + run: git fetch upstream --tags + + - name: Attempt merge + if: steps.check.outputs.already_released == 'false' + run: | + git merge "${{ steps.upstream.outputs.tag }}" --no-edit || echo "CONFLICT=true" >> $GITHUB_ENV + + # ── Conflict path: open PR, do NOT tag ─────────────────────────────── + - name: Open conflict PR + if: steps.check.outputs.already_released == 'false' && env.CONFLICT == 'true' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + CONFLICT_FILES=$(git diff --name-only --diff-filter=U | tr '\n' ', ') + git merge --abort + + BRANCH="sync/${{ steps.upstream.outputs.tag }}" + git checkout -b "$BRANCH" + # Resolve conflicts by preferring upstream (theirs) so the branch is pushable + git merge "${{ steps.upstream.outputs.tag }}" --no-edit --strategy-option=theirs || true + git add -A + git commit -m "chore: merge upstream ${{ steps.upstream.outputs.tag }} (auto-resolved via theirs)" --allow-empty + git push origin "$BRANCH" + + TAG="${{ steps.upstream.outputs.tag }}" + echo "## Upstream sync: ${TAG}" > /tmp/pr-body.md + echo "" >> /tmp/pr-body.md + echo "Conflicts were detected during automatic merge. Files affected:" >> /tmp/pr-body.md + echo "" >> /tmp/pr-body.md + echo " ${CONFLICT_FILES}" >> /tmp/pr-body.md + echo "" >> /tmp/pr-body.md + echo "This PR was auto-resolved using upstream (theirs) as a baseline." >> /tmp/pr-body.md + echo "Please review the diff carefully before merging." >> /tmp/pr-body.md + echo "" >> /tmp/pr-body.md + echo "Once merged, manually create the release tag on main to trigger the build:" >> /tmp/pr-body.md + echo "" >> /tmp/pr-body.md + echo " git tag ${TAG}" >> /tmp/pr-body.md + echo " git push origin ${TAG}" >> /tmp/pr-body.md + + gh pr create \ + --title "Sync upstream ${TAG} - conflict resolution needed" \ + --body-file /tmp/pr-body.md \ + --base main \ + --head "$BRANCH" + + echo "::warning::Merge conflict detected - PR opened for manual resolution. Release build NOT triggered." + + # ── Clean merge path: push main + tag → triggers build.yml ─────────── + - name: Push merged main + if: steps.check.outputs.already_released == 'false' && env.CONFLICT != 'true' && github.event.inputs.dry_run != 'true' + run: git push origin HEAD:main + + - name: Create and push release tag + if: steps.check.outputs.already_released == 'false' && env.CONFLICT != 'true' && github.event.inputs.dry_run != 'true' + run: | + TAG="${{ steps.upstream.outputs.tag }}" + git tag "$TAG" -m "AMD builds for upstream $TAG" + git push origin "$TAG" + echo "::notice::Pushed tag $TAG — build.yml will now run and publish the release." + + - name: Dry-run summary + if: github.event.inputs.dry_run == 'true' && steps.check.outputs.already_released == 'false' && env.CONFLICT != 'true' + run: | + echo "DRY RUN — merge was clean. Would have pushed main and tagged ${{ steps.upstream.outputs.tag }}." + echo "Re-run with dry_run=false to publish." diff --git a/.github/workflows/test-whisper.yml b/.github/workflows/test-whisper.yml new file mode 100644 index 00000000000..3a9d2047ff3 --- /dev/null +++ b/.github/workflows/test-whisper.yml @@ -0,0 +1,282 @@ +name: Test whisper-cli + +# Downloads a published release artifact and runs whisper-cli against jfk.wav. +# Tests on real self-hosted GPU hardware (stx-halo runners). +# Trigger manually after a release, or let it run automatically via workflow_dispatch +# from build.yml once artifacts are published. + +on: + workflow_dispatch: + inputs: + release_tag: + description: 'Release tag to test (e.g. v1.8.4) or "latest"' + required: false + default: 'latest' + type: string + gfx_target: + description: 'ROCm GPU target to test' + required: false + default: 'gfx1151' + type: string + +env: + RELEASE_TAG: ${{ github.event.inputs.release_tag || 'latest' }} + GFX_TARGET: ${{ github.event.inputs.gfx_target || 'gfx1151' }} + +jobs: + + # --------------------------------------------------------------------------- + # Resolve release tag (latest or specific) + # --------------------------------------------------------------------------- + prepare: + runs-on: ubuntu-latest + outputs: + release_tag: ${{ steps.resolve.outputs.release_tag }} + steps: + - name: Resolve release tag + id: resolve + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + if [ "${{ env.RELEASE_TAG }}" = "latest" ]; then + TAG=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/${{ github.repository }}/releases/latest" \ + | jq -r '.tag_name') + echo "Resolved latest release: $TAG" + else + TAG="${{ env.RELEASE_TAG }}" + STATUS=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/${{ github.repository }}/releases/tags/$TAG") + if [ "$STATUS" != "200" ]; then + echo "Error: Release $TAG not found (HTTP $STATUS)" + exit 1 + fi + echo "Using specified release: $TAG" + fi + echo "release_tag=$TAG" >> $GITHUB_OUTPUT + + # --------------------------------------------------------------------------- + # Test ROCm artifact on Windows (self-hosted stx-halo GPU runner) + # --------------------------------------------------------------------------- + test-windows-rocm: + runs-on: [self-hosted, Windows, stx-halo] + needs: prepare + + steps: + - name: Checkout (for samples/jfk.wav and models/ scripts) + uses: actions/checkout@v4 + + - name: Download ROCm Windows artifact + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + shell: pwsh + run: | + $tag = "${{ needs.prepare.outputs.release_tag }}" + $target = "${{ env.GFX_TARGET }}" + # strip leading 'v' for artifact filename (e.g. v1.8.4 -> 1.8.4) + $ver = $tag.TrimStart('v') + $asset = "whisper-${ver}-windows-rocm-${target}.zip" + $repo = "${{ github.repository }}" + + Write-Host "Downloading: $asset from release $tag" + + $headers = @{ "Authorization" = "token $env:GITHUB_TOKEN" } + $release = Invoke-RestMethod -Uri "https://api.github.com/repos/$repo/releases/tags/$tag" -Headers $headers + $found = $release.assets | Where-Object { $_.name -eq $asset } + + if (-not $found) { + Write-Error "Asset '$asset' not found in release '$tag'" + Write-Host "Available assets:" + $release.assets | ForEach-Object { Write-Host " $($_.name)" } + exit 1 + } + + Write-Host "Found: $($found.name) ($([math]::Round($found.size/1MB,2)) MB)" + Invoke-WebRequest -Uri $found.browser_download_url -OutFile $asset -Headers $headers + + Write-Host "Extracting..." + Expand-Archive -Path $asset -DestinationPath whisper-bin -Force + Write-Host "Binaries:" + Get-ChildItem whisper-bin -Filter "*.exe" | Format-Table Name, Length + + - name: Download tiny model + shell: pwsh + run: | + New-Item -ItemType Directory -Force -Path models | Out-Null + $url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" + Write-Host "Downloading ggml-tiny.bin..." + Invoke-WebRequest -Uri $url -OutFile "models\ggml-tiny.bin" + $mb = [math]::Round((Get-Item "models\ggml-tiny.bin").Length/1MB,2) + Write-Host "Downloaded: $mb MB" + + - name: Run transcription + shell: pwsh + run: | + $cli = "whisper-bin\whisper-cli.exe" + if (-not (Test-Path $cli)) { + Write-Error "whisper-cli.exe not found. Contents of whisper-bin:" + Get-ChildItem -Recurse whisper-bin | Format-Table Name, Length + exit 1 + } + Write-Host "Running whisper-cli against samples\jfk.wav ..." + & $cli -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result + Write-Host "--- Transcription output ---" + Get-Content jfk-result.txt + + - name: Verify transcription + shell: pwsh + run: | + $text = Get-Content "jfk-result.txt" -Raw -ErrorAction SilentlyContinue + if (-not $text) { Write-Error "jfk-result.txt is empty or missing"; exit 1 } + if ($text -match "country|ask not|nation|kennedy") { + Write-Host "PASS: transcription contains expected words" -ForegroundColor Green + } else { + Write-Error "FAIL: expected words not found in transcription" + Write-Host $text + exit 1 + } + + # --------------------------------------------------------------------------- + # Test ROCm artifact on Linux (self-hosted stx-halo GPU runner) + # --------------------------------------------------------------------------- + test-linux-rocm: + runs-on: [self-hosted, Linux, stx-halo] + needs: prepare + + steps: + - name: Checkout (for samples/jfk.wav) + uses: actions/checkout@v4 + + - name: Install jq if needed + run: | + if ! command -v jq &>/dev/null; then + mkdir -p ~/bin + curl -L -o ~/bin/jq https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64 + chmod +x ~/bin/jq + echo "$HOME/bin" >> $GITHUB_PATH + fi + + - name: Download ROCm Linux artifact + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + TAG="${{ needs.prepare.outputs.release_tag }}" + TARGET="${{ env.GFX_TARGET }}" + VER="${TAG#v}" + ASSET="whisper-${VER}-linux-rocm-${TARGET}.tar.gz" + REPO="${{ github.repository }}" + + echo "Downloading: $ASSET from release $TAG" + + RELEASE=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$REPO/releases/tags/$TAG") + + URL=$(echo "$RELEASE" | jq -r ".assets[] | select(.name == \"$ASSET\") | .browser_download_url") + + if [ -z "$URL" ] || [ "$URL" = "null" ]; then + echo "Asset '$ASSET' not found in release '$TAG'" + echo "Available assets:" + echo "$RELEASE" | jq -r '.assets[].name' + exit 1 + fi + + SIZE=$(echo "$RELEASE" | jq -r ".assets[] | select(.name == \"$ASSET\") | .size") + echo "Found: $ASSET ($SIZE bytes)" + + curl -L -H "Authorization: token $GITHUB_TOKEN" -o "$ASSET" "$URL" + + echo "Extracting..." + mkdir -p whisper-bin + tar -xzf "$ASSET" --strip-components=1 -C whisper-bin + chmod +x whisper-bin/whisper-cli + echo "Binaries:" + ls -lh whisper-bin/whisper-cli + + - name: Download tiny model + run: | + mkdir -p models + curl -L -o models/ggml-tiny.bin \ + "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" + echo "Downloaded: $(du -h models/ggml-tiny.bin | cut -f1)" + + - name: Set library path + run: echo "LD_LIBRARY_PATH=$(pwd)/whisper-bin:$LD_LIBRARY_PATH" >> $GITHUB_ENV + + - name: Run transcription + run: | + echo "Running whisper-cli against samples/jfk.wav ..." + ./whisper-bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result + echo "--- Transcription output ---" + cat jfk-result.txt + + - name: Verify transcription + run: | + if grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt; then + echo "PASS: transcription contains expected words" + else + echo "FAIL: expected words not found in transcription" + cat jfk-result.txt + exit 1 + fi + + # --------------------------------------------------------------------------- + # Test CPU artifact on Linux (GitHub-hosted runner - no GPU needed) + # --------------------------------------------------------------------------- + test-linux-cpu: + runs-on: ubuntu-latest + needs: prepare + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Download CPU Linux artifact + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + TAG="${{ needs.prepare.outputs.release_tag }}" + VER="${TAG#v}" + ASSET="whisper-${VER}-linux-cpu-x86_64.tar.gz" + REPO="${{ github.repository }}" + + echo "Downloading: $ASSET from release $TAG" + + RELEASE=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$REPO/releases/tags/$TAG") + + URL=$(echo "$RELEASE" | jq -r ".assets[] | select(.name == \"$ASSET\") | .browser_download_url") + + if [ -z "$URL" ] || [ "$URL" = "null" ]; then + echo "Asset '$ASSET' not found. Available:" + echo "$RELEASE" | jq -r '.assets[].name' + exit 1 + fi + + curl -L -H "Authorization: token $GITHUB_TOKEN" -o "$ASSET" "$URL" + mkdir -p whisper-bin + tar -xzf "$ASSET" --strip-components=1 -C whisper-bin + chmod +x whisper-bin/whisper-cli + ls -lh whisper-bin/whisper-cli + + - name: Download tiny model + run: | + mkdir -p models + curl -L -o models/ggml-tiny.bin \ + "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" + + - name: Run transcription + run: | + ./whisper-bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result + echo "--- Transcription output ---" + cat jfk-result.txt + + - name: Verify transcription + run: | + if grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt; then + echo "PASS: transcription contains expected words" + else + echo "FAIL: expected words not found" + cat jfk-result.txt + exit 1 + fi diff --git a/CMakeLists.txt b/CMakeLists.txt index a0f74041321..d4dc318056a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,6 +91,7 @@ endif() option(WHISPER_COREML "whisper: enable Core ML framework" OFF) option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF) option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF) +option(WHISPER_VITISAI "whisper: support for AMD Vitis AI" OFF) # Required for relocatable CMake package include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake) diff --git a/README.md b/README.md index 474a1301da7..ef7dd801b24 100644 --- a/README.md +++ b/README.md @@ -1,862 +1,283 @@ -# whisper.cpp +# whisper.cpp-rocm + + + GitHub release (latest by date) + + + Latest release date + + + License + + + ROCm 7.12 + + + Powered by whisper.cpp + + + Platform: Windows | Linux | macOS + + + GPU Targets + + + NPU: Ryzen AI 300 + + +Pre-built releases of **[whisper.cpp](https://github.com/ggerganov/whisper.cpp)** with full AMD hardware acceleration — **ROCm™ GPU**, **Vulkan GPU**, **RyzenAI NPU**, and optimised **CPU** builds — for Windows and Linux. + +Releases track upstream whisper.cpp exactly: every time upstream publishes a new version, our automated pipeline syncs, builds all backends, and publishes a matching release within 24 hours. No manual steps. No lag. + +> [!IMPORTANT] +> **No ROCm installation required.** All ROCm and Vulkan runtime libraries are bundled inside every release archive. Download, extract, and run. + +> [!NOTE] +> This project is maintained by the [Lemonade SDK](https://github.com/lemonade-sdk/lemonade) team. Our primary focus is seamless integration with Lemonade and similar AMD-optimised AI applications. We welcome collaborations and contributions that advance AMD whisper.cpp support. -![whisper.cpp](https://user-images.githubusercontent.com/1991296/235238348-05d0f6a4-da44-4900-a1de-d0707e75b763.jpeg) - -[![Actions Status](https://github.com/ggml-org/whisper.cpp/workflows/CI/badge.svg)](https://github.com/ggml-org/whisper.cpp/actions) -[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) -[![Conan Center](https://shields.io/conan/v/whisper-cpp)](https://conan.io/center/whisper-cpp) -[![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/) - -Stable: [v1.8.1](https://github.com/ggml-org/whisper.cpp/releases/tag/v1.8.1) / [Roadmap](https://github.com/orgs/ggml-org/projects/4/) - -High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model: - -- Plain C/C++ implementation without dependencies -- Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](#core-ml-support) -- AVX intrinsics support for x86 architectures -- [VSX intrinsics support for POWER architectures](#power-vsx-intrinsics) -- Mixed F16 / F32 precision -- [Integer quantization support](#quantization) -- Zero memory allocations at runtime -- [Vulkan support](#vulkan-gpu-support) -- Support for CPU-only inference -- [Efficient GPU support for NVIDIA](#nvidia-gpu-support) -- [OpenVINO Support](#openvino-support) -- [Ascend NPU Support](#ascend-npu-support) -- [Moore Threads GPU Support](#moore-threads-gpu-support) -- [C-style API](https://github.com/ggml-org/whisper.cpp/blob/master/include/whisper.h) -- [Voice Activity Detection (VAD)](#voice-activity-detection-vad) - -Supported platforms: - -- [x] Mac OS (Intel and Arm) -- [x] [iOS](examples/whisper.objc) -- [x] [Android](examples/whisper.android) -- [x] [Java](bindings/java/README.md) -- [x] Linux / [FreeBSD](https://github.com/ggml-org/whisper.cpp/issues/56#issuecomment-1350920264) -- [x] [WebAssembly](examples/whisper.wasm) -- [x] Windows ([MSVC](https://github.com/ggml-org/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggml-org/whisper.cpp/issues/168)) -- [x] [Raspberry Pi](https://github.com/ggml-org/whisper.cpp/discussions/166) -- [x] [Docker](https://github.com/ggml-org/whisper.cpp/pkgs/container/whisper.cpp) - -The entire high-level implementation of the model is contained in [whisper.h](include/whisper.h) and [whisper.cpp](src/whisper.cpp). -The rest of the code is part of the [`ggml`](https://github.com/ggml-org/ggml) machine learning library. - -Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications. -As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: [whisper.objc](examples/whisper.objc) - -https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4 - -You can also easily make your own offline voice assistant application: [command](examples/command) - -https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4 - -On Apple Silicon, the inference runs fully on the GPU via Metal: - -https://github.com/ggml-org/whisper.cpp/assets/1991296/c82e8f86-60dc-49f2-b048-d2fdbd6b5225 - -## Quick start +--- -First clone the repository: +## 🎯 Supported Devices -```bash -git clone https://github.com/ggml-org/whisper.cpp.git -``` +### ROCm GPU -Navigate into the directory: +| Architecture | Devices | +|---|---| +| **gfx1151** — RDNA3.5 APU | Ryzen AI MAX+ Pro 395 (Strix Halo) | +| **gfx1150** — RDNA3.5 APU | Ryzen AI 300 series (Strix Point) | +| **gfx120X** — RDNA4 dGPU | Radeon RX 9070 XT / 9070 / 9060 XT / 9060 | +| **gfx110X** — RDNA3 dGPU & iGPU | RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT, RX 7600 XT/7600; iGPU Radeon 780M / 760M / 740M | -``` -cd whisper.cpp -``` +### Vulkan GPU -Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example: +Any GPU with a Vulkan 1.3-capable driver — AMD, NVIDIA, Intel. Covers iGPUs on all platforms where a Vulkan driver is present. -```bash -sh ./models/download-ggml-model.sh base.en -``` +### NPU — RyzenAI -Now build the [whisper-cli](examples/cli) example and transcribe an audio file like this: +| Device | OS | Requirement | +|---|---|---| +| Ryzen AI 300 series (Strix Point / Strix Halo) | Windows only | NPU driver ≥ `.280` | -```bash -# build the project -cmake -B build -cmake --build build -j --config Release +### CPU -# transcribe an audio file -./build/bin/whisper-cli -f samples/jfk.wav -``` +Optimised CPU-only builds for x86-64. Windows and Linux. No GPU required. --- -For a quick demo, simply run `make base.en`. +## 📦 Downloads -The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`. +All builds are self-contained — no separate driver or runtime installation needed (except the NPU driver for the NPU build). -For detailed usage instructions, run: `./build/bin/whisper-cli -h` +### ROCm — GPU Accelerated -Note that the [whisper-cli](examples/cli) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool. -For example, you can use `ffmpeg` like this: +| GPU Target | Linux | Windows | +|---|---|---| +| **gfx1151** (Ryzen AI MAX+ Pro 395) | [![Linux gfx1151](https://img.shields.io/badge/Download-Linux%20gfx1151-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-rocm-gfx1151.tar.gz) | [![Windows gfx1151](https://img.shields.io/badge/Download-Windows%20gfx1151-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-rocm-gfx1151.zip) | +| **gfx1150** (Ryzen AI 300) | [![Linux gfx1150](https://img.shields.io/badge/Download-Linux%20gfx1150-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-rocm-gfx1150.tar.gz) | [![Windows gfx1150](https://img.shields.io/badge/Download-Windows%20gfx1150-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-rocm-gfx1150.zip) | +| **gfx120X** (RDNA4 dGPU) | [![Linux gfx120X](https://img.shields.io/badge/Download-Linux%20gfx120X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-rocm-gfx120X.tar.gz) | [![Windows gfx120X](https://img.shields.io/badge/Download-Windows%20gfx120X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-rocm-gfx120X.zip) | +| **gfx110X** (RDNA3 dGPU & iGPU) | [![Linux gfx110X](https://img.shields.io/badge/Download-Linux%20gfx110X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-rocm-gfx110X.tar.gz) | [![Windows gfx110X](https://img.shields.io/badge/Download-Windows%20gfx110X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-rocm-gfx110X.zip) | -```bash -ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav -``` - -## More audio samples - -If you want some extra audio samples to play with, simply run: - -``` -make -j samples -``` +### Vulkan — Cross-Vendor GPU -This will download a few more audio files from Wikipedia and convert them to 16-bit WAV format via `ffmpeg`. +| Linux | Windows | +|---|---| +| [![Linux Vulkan](https://img.shields.io/badge/Download-Linux%20Vulkan-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-vulkan-x86_64.tar.gz) | [![Windows Vulkan](https://img.shields.io/badge/Download-Windows%20Vulkan-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-vulkan-x64.zip) | -You can download and run the other models as follows: +### NPU — RyzenAI (Windows only) -``` -make -j tiny.en -make -j tiny -make -j base.en -make -j base -make -j small.en -make -j small -make -j medium.en -make -j medium -make -j large-v1 -make -j large-v2 -make -j large-v3 -make -j large-v3-turbo -``` +| Windows | +|---| +| [![Windows NPU](https://img.shields.io/badge/Download-Windows%20NPU%20(RyzenAI)-red?logo=amd&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-npu-x64.zip) | -## Memory usage +> Requires NPU driver ≥ `.280` and a pre-compiled `.rai` encoder model from [AMD's Hugging Face collection](https://huggingface.co/collections/amd/ryzen-ai-16-whisper-npu-optimized-onnx-models). Place the `.rai` file alongside your `ggml-*.bin` model — whisper-cli picks it up automatically. -| Model | Disk | Mem | -| ------ | ------- | ------- | -| tiny | 75 MiB | ~273 MB | -| base | 142 MiB | ~388 MB | -| small | 466 MiB | ~852 MB | -| medium | 1.5 GiB | ~2.1 GB | -| large | 2.9 GiB | ~3.9 GB | +### macOS — Metal GPU -## POWER VSX Intrinsics +| macOS (Apple Silicon) | +|---| +| [![macOS Metal](https://img.shields.io/badge/Download-macOS%20Metal%20(arm64)-lightgrey?logo=apple&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-darwin-metal-arm64.tar.gz) | -`whisper.cpp` supports POWER architectures and includes code which -significantly speeds operation on Linux running on POWER9/10, making it -capable of faster-than-realtime transcription on underclocked Raptor -Talos II. Ensure you have a BLAS package installed, and replace the -standard cmake setup with: +### CPU — No GPU Required -```bash -# build with GGML_BLAS defined -cmake -B build -DGGML_BLAS=1 -cmake --build build -j --config Release -./build/bin/whisper-cli [ .. etc .. ] -``` +| Linux | Windows | +|---|---| +| [![Linux CPU](https://img.shields.io/badge/Download-Linux%20CPU-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-cpu-x86_64.tar.gz) | [![Windows CPU](https://img.shields.io/badge/Download-Windows%20CPU-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-cpu-x64.zip) | -## Quantization +--- -`whisper.cpp` supports integer quantization of the Whisper `ggml` models. -Quantized models require less memory and disk space and depending on the hardware can be processed more efficiently. +## 🧪 Quick Smoketest -Here are the steps for creating and using a quantized model: +### 1. Get a model ```bash -# quantize a model with Q5_0 method -cmake -B build -cmake --build build -j --config Release -./build/bin/quantize models/ggml-base.en.bin models/ggml-base.en-q5_0.bin q5_0 - -# run the examples as usual, specifying the quantized model file -./build/bin/whisper-cli -m models/ggml-base.en-q5_0.bin ./samples/gb0.wav -``` - -## Core ML support - -On Apple Silicon devices, the Encoder inference can be executed on the Apple Neural Engine (ANE) via Core ML. This can result in significant -speed-up - more than x3 faster compared with CPU-only execution. Here are the instructions for generating a Core ML model and using it with `whisper.cpp`: - -- Install Python dependencies needed for the creation of the Core ML model: - - ```bash - pip install ane_transformers - pip install openai-whisper - pip install coremltools - ``` - - - To ensure `coremltools` operates correctly, please confirm that [Xcode](https://developer.apple.com/xcode/) is installed and execute `xcode-select --install` to install the command-line tools. - - Python 3.11 is recommended. - - MacOS Sonoma (version 14) or newer is recommended, as older versions of MacOS might experience issues with transcription hallucination. - - [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step: - - To create an environment, use: `conda create -n py311-whisper python=3.11 -y` - - To activate the environment, use: `conda activate py311-whisper` - -- Generate a Core ML model. For example, to generate a `base.en` model, use: - - ```bash - ./models/generate-coreml-model.sh base.en - ``` - - This will generate the folder `models/ggml-base.en-encoder.mlmodelc` - -- Build `whisper.cpp` with Core ML support: - - ```bash - # using CMake - cmake -B build -DWHISPER_COREML=1 - cmake --build build -j --config Release - ``` - -- Run the examples as usual. For example: - - ```text - $ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav - - ... - - whisper_init_state: loading Core ML model from 'models/ggml-base.en-encoder.mlmodelc' - whisper_init_state: first run on a device may take a while ... - whisper_init_state: Core ML model loaded - - system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | COREML = 1 | - - ... - ``` - - The first run on a device is slow, since the ANE service compiles the Core ML model to some device-specific format. - Next runs are faster. - -For more information about the Core ML implementation please refer to PR [#566](https://github.com/ggml-org/whisper.cpp/pull/566). - -## OpenVINO support - -On platforms that support [OpenVINO](https://github.com/openvinotoolkit/openvino), the Encoder inference can be executed -on OpenVINO-supported devices including x86 CPUs and Intel GPUs (integrated & discrete). - -This can result in significant speedup in encoder performance. Here are the instructions for generating the OpenVINO model and using it with `whisper.cpp`: - -- First, setup python virtual env. and install python dependencies. Python 3.10 is recommended. - - Windows: - - ```powershell - cd models - python -m venv openvino_conv_env - openvino_conv_env\Scripts\activate - python -m pip install --upgrade pip - pip install -r requirements-openvino.txt - ``` - - Linux and macOS: - - ```bash - cd models - python3 -m venv openvino_conv_env - source openvino_conv_env/bin/activate - python -m pip install --upgrade pip - pip install -r requirements-openvino.txt - ``` - -- Generate an OpenVINO encoder model. For example, to generate a `base.en` model, use: - - ``` - python convert-whisper-to-openvino.py --model base.en - ``` - - This will produce ggml-base.en-encoder-openvino.xml/.bin IR model files. It's recommended to relocate these to the same folder as `ggml` models, as that - is the default location that the OpenVINO extension will search at runtime. - -- Build `whisper.cpp` with OpenVINO support: - - Download OpenVINO package from [release page](https://github.com/openvinotoolkit/openvino/releases). The recommended version to use is [2024.6.0](https://github.com/openvinotoolkit/openvino/releases/tag/2024.6.0). Ready to use Binaries of the required libraries can be found in the [OpenVino Archives](https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/) - - After downloading & extracting package onto your development system, set up required environment by sourcing setupvars script. For example: - - Linux: - - ```bash - source /path/to/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/setupvars.sh - ``` - - Windows (cmd): - - ```powershell - C:\Path\To\w_openvino_toolkit_windows_2023.0.0.10926.b4452d56304_x86_64\setupvars.bat - ``` - - And then build the project using cmake: - - ```bash - cmake -B build -DWHISPER_OPENVINO=1 - cmake --build build -j --config Release - ``` - -- Run the examples as usual. For example: - - ```text - $ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav - - ... - - whisper_ctx_init_openvino_encoder: loading OpenVINO model from 'models/ggml-base.en-encoder-openvino.xml' - whisper_ctx_init_openvino_encoder: first run on a device may take a while ... - whisper_openvino_init: path_model = models/ggml-base.en-encoder-openvino.xml, device = GPU, cache_dir = models/ggml-base.en-encoder-openvino-cache - whisper_ctx_init_openvino_encoder: OpenVINO model loaded - - system_info: n_threads = 4 / 8 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | COREML = 0 | OPENVINO = 1 | - - ... - ``` - - The first time run on an OpenVINO device is slow, since the OpenVINO framework will compile the IR (Intermediate Representation) model to a device-specific 'blob'. This device-specific blob will get - cached for the next run. - -For more information about the OpenVINO implementation please refer to PR [#1037](https://github.com/ggml-org/whisper.cpp/pull/1037). - -## NVIDIA GPU support - -With NVIDIA cards the processing of the models is done efficiently on the GPU via cuBLAS and custom CUDA kernels. -First, make sure you have installed `cuda`: https://developer.nvidia.com/cuda-downloads - -Now build `whisper.cpp` with CUDA support: - -``` -cmake -B build -DGGML_CUDA=1 -cmake --build build -j --config Release -``` - -or for newer NVIDIA GPU's (RTX 5000 series): -``` -cmake -B build -DGGML_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="86" -cmake --build build -j --config Release -``` - -## Vulkan GPU support -Cross-vendor solution which allows you to accelerate workload on your GPU. -First, make sure your graphics card driver provides support for Vulkan API. - -Now build `whisper.cpp` with Vulkan support: -``` -cmake -B build -DGGML_VULKAN=1 -cmake --build build -j --config Release -``` - -## BLAS CPU support via OpenBLAS - -Encoder processing can be accelerated on the CPU via OpenBLAS. -First, make sure you have installed `openblas`: https://www.openblas.net/ - -Now build `whisper.cpp` with OpenBLAS support: - -``` -cmake -B build -DGGML_BLAS=1 -cmake --build build -j --config Release -``` - -## Ascend NPU support - -Ascend NPU provides inference acceleration via [`CANN`](https://www.hiascend.com/en/software/cann) and AI cores. - -First, check if your Ascend NPU device is supported: - -**Verified devices** -| Ascend NPU | Status | -|:-----------------------------:|:-------:| -| Atlas 300T A2 | Support | -| Atlas 300I Duo | Support | - -Then, make sure you have installed [`CANN toolkit`](https://www.hiascend.com/en/software/cann/community) . The lasted version of CANN is recommanded. - -Now build `whisper.cpp` with CANN support: - -``` -cmake -B build -DGGML_CANN=1 -cmake --build build -j --config Release -``` - -Run the inference examples as usual, for example: +# Download the tiny.en model (~75 MB) for a fast smoke test +./models/download-ggml-model.sh tiny.en +# Or grab any ggml-*.bin from https://huggingface.co/ggerganov/whisper.cpp ``` -./build/bin/whisper-cli -f samples/jfk.wav -m models/ggml-base.en.bin -t 8 -``` - -*Notes:* - -- If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag. -- If you run successfully with your Ascend NPU device, please help update the table `Verified devices`. -## Moore Threads GPU support - -With Moore Threads cards the processing of the models is done efficiently on the GPU via muBLAS and custom MUSA kernels. -First, make sure you have installed `MUSA SDK rc4.2.0`: https://developer.mthreads.com/sdk/download/musa?equipment=&os=&driverVersion=&version=4.2.0 - -Now build `whisper.cpp` with MUSA support: - -``` -cmake -B build -DGGML_MUSA=1 -cmake --build build -j --config Release -``` - -or specify the architecture for your Moore Threads GPU. For example, if you have a MTT S80 GPU, you can specify the architecture as follows: - -``` -cmake -B build -DGGML_MUSA=1 -DMUSA_ARCHITECTURES="21" -cmake --build build -j --config Release -``` - -## FFmpeg support (Linux only) - -If you want to support more audio formats (such as Opus and AAC), you can turn on the `WHISPER_FFMPEG` build flag to enable FFmpeg integration. - -First, you need to install required libraries: +### 2. Transcribe the bundled sample ```bash -# Debian/Ubuntu -sudo apt install libavcodec-dev libavformat-dev libavutil-dev +# Linux +./whisper-cli -m models/ggml-tiny.en.bin -f samples/jfk.wav -# RHEL/Fedora -sudo dnf install libavcodec-free-devel libavformat-free-devel libavutil-free-devel +# Windows +whisper-cli.exe -m models\ggml-tiny.en.bin -f samples\jfk.wav ``` -Then you can build the project as follows: - -```bash -cmake -B build -D WHISPER_FFMPEG=yes -cmake --build build -``` +Expected: a transcription of the JFK "Ask not what your country can do for you" excerpt. -Run the following example to confirm it's working: +### 3. Verify GPU is active (ROCm) ```bash -# Convert an audio file to Opus format -ffmpeg -i samples/jfk.wav jfk.opus - -# Transcribe the audio file -./build/bin/whisper-cli --model models/ggml-base.en.bin --file jfk.opus -``` - -## Docker - -### Prerequisites - -- Docker must be installed and running on your system. -- Create a folder to store big models & intermediate files (ex. /whisper/models) - -### Images - -We have multiple Docker images available for this project: - -1. `ghcr.io/ggml-org/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`) -2. `ghcr.io/ggml-org/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`) -3. `ghcr.io/ggml-org/whisper.cpp:main-musa`: Same as `main` but compiled with MUSA support. (platforms: `linux/amd64`) -4. `ghcr.io/ggml-org/whisper.cpp:main-vulkan`: Same as `main` but compiled with Vulkan support. (platforms: `linux/amd64`) - -### Usage - -```shell -# download model and persist it in a local folder -docker run -it --rm \ - -v path/to/models:/models \ - whisper.cpp:main "./models/download-ggml-model.sh base /models" - -# transcribe an audio file -docker run -it --rm \ - -v path/to/models:/models \ - -v path/to/audios:/audios \ - whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f /audios/jfk.wav" - -# transcribe an audio file in samples folder -docker run -it --rm \ - -v path/to/models:/models \ - whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f ./samples/jfk.wav" - -# run the web server -docker run -it --rm -p "8080:8080" \ - -v path/to/models:/models \ - whisper.cpp:main "whisper-server --host 127.0.0.1 -m /models/ggml-base.bin" - -# run the bench too on the small.en model using 4 threads -docker run -it --rm \ - -v path/to/models:/models \ - whisper.cpp:main "whisper-bench -m /models/ggml-small.en.bin -t 4" +# At startup whisper-cli prints the backend in use — look for: +# ggml_hip: using device ... +./whisper-cli -m models/ggml-tiny.en.bin -f samples/jfk.wav 2>&1 | grep -i "hip\|rocm\|device" ``` -## Installing with Conan - -You can install pre-built binaries for whisper.cpp or build it from source using [Conan](https://conan.io/). Use the following command: +### 4. Verify NPU is active (VitisAI) ``` -conan install --requires="whisper-cpp/[*]" --build=missing +# Place the .rai encoder alongside the .bin model, then run normally. +# Look for this line in stdout: +# whisper_vitisai_encode: Vitis AI model inference completed. +whisper-cli.exe -m models\ggml-tiny.en.bin -f samples\jfk.wav ``` -For detailed instructions on how to use Conan, please refer to the [Conan documentation](https://docs.conan.io/2/). - -## Limitations - -- Inference only - -## Real-time audio input example - -This is a naive example of performing real-time inference on audio from your microphone. -The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continuously. -More info is available in [issue #10](https://github.com/ggml-org/whisper.cpp/issues/10). -You will need to have [sdl2](https://wiki.libsdl.org/SDL2/Installation) installed for it to work properly. +### 5. Verify portability (Linux ROCm) ```bash -cmake -B build -DWHISPER_SDL2=ON -cmake --build build -j --config Release -./build/bin/whisper-stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000 +# ROCm runtime libs are bundled — RPATH should point to $ORIGIN (same dir as binary) +readelf -d whisper-cli | grep RPATH # -> $ORIGIN +ldd whisper-cli | grep "not found" # -> (empty — all deps resolved locally) ``` -https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4 +--- -## Confidence color-coding +## 🔄 Release Cadence -Adding the `--print-colors` argument will print the transcribed text using an experimental color coding strategy -to highlight words with high or low confidence: +Releases are fully automated and mirror upstream whisper.cpp releases with no manual steps: -```bash -./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/gb0.wav --print-colors ``` - -image - -## Controlling the length of the generated text segments (experimental) - -For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`: - -```text -$ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16 - -whisper_model_load: loading model from './models/ggml-base.en.bin' -... -system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | - -main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ... - -[00:00:00.000 --> 00:00:00.850] And so my -[00:00:00.850 --> 00:00:01.590] fellow -[00:00:01.590 --> 00:00:04.140] Americans, ask -[00:00:04.140 --> 00:00:05.660] not what your -[00:00:05.660 --> 00:00:06.840] country can do -[00:00:06.840 --> 00:00:08.430] for you, ask -[00:00:08.430 --> 00:00:09.440] what you can do -[00:00:09.440 --> 00:00:10.020] for your -[00:00:10.020 --> 00:00:11.000] country. +upstream whisper.cpp releases vX.Y.Z + | + v (detected within 24 h by daily sync job) + sync.yml merges upstream into main, pushes tag vX.Y.Z + | + v (tag push triggers build pipeline) + build.yml builds all backend/OS combinations in parallel + | + v + GitHub Release: "whisper.cpp vX.Y.Z — AMD Builds" + with 13 artifacts across all backends and OS targets ``` -## Word-level timestamp (experimental) - -The `--max-len` argument can be used to obtain word-level timestamps. Simply use `-ml 1`: - -```text -$ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1 - -whisper_model_load: loading model from './models/ggml-base.en.bin' -... -system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | - -main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ... - -[00:00:00.000 --> 00:00:00.320] -[00:00:00.320 --> 00:00:00.370] And -[00:00:00.370 --> 00:00:00.690] so -[00:00:00.690 --> 00:00:00.850] my -[00:00:00.850 --> 00:00:01.590] fellow -[00:00:01.590 --> 00:00:02.850] Americans -[00:00:02.850 --> 00:00:03.300] , -[00:00:03.300 --> 00:00:04.140] ask -[00:00:04.140 --> 00:00:04.990] not -[00:00:04.990 --> 00:00:05.410] what -[00:00:05.410 --> 00:00:05.660] your -[00:00:05.660 --> 00:00:06.260] country -[00:00:06.260 --> 00:00:06.600] can -[00:00:06.600 --> 00:00:06.840] do -[00:00:06.840 --> 00:00:07.010] for -[00:00:07.010 --> 00:00:08.170] you -[00:00:08.170 --> 00:00:08.190] , -[00:00:08.190 --> 00:00:08.430] ask -[00:00:08.430 --> 00:00:08.910] what -[00:00:08.910 --> 00:00:09.040] you -[00:00:09.040 --> 00:00:09.320] can -[00:00:09.320 --> 00:00:09.440] do -[00:00:09.440 --> 00:00:09.760] for -[00:00:09.760 --> 00:00:10.020] your -[00:00:10.020 --> 00:00:10.510] country -[00:00:10.510 --> 00:00:11.000] . -``` +**Every release ships up to 14 artifacts:** -## Speaker segmentation via tinydiarize (experimental) - -More information about this approach is available here: https://github.com/ggml-org/whisper.cpp/pull/1058 - -Sample usage: - -```py -# download a tinydiarize compatible model -./models/download-ggml-model.sh small.en-tdrz - -# run as usual, adding the "-tdrz" command-line argument -./build/bin/whisper-cli -f ./samples/a13.wav -m ./models/ggml-small.en-tdrz.bin -tdrz -... -main: processing './samples/a13.wav' (480000 samples, 30.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, tdrz = 1, timestamps = 1 ... -... -[00:00:00.000 --> 00:00:03.800] Okay Houston, we've had a problem here. [SPEAKER_TURN] -[00:00:03.800 --> 00:00:06.200] This is Houston. Say again please. [SPEAKER_TURN] -[00:00:06.200 --> 00:00:08.260] Uh Houston we've had a problem. -[00:00:08.260 --> 00:00:11.320] We've had a main beam up on a volt. [SPEAKER_TURN] -[00:00:11.320 --> 00:00:13.820] Roger main beam interval. [SPEAKER_TURN] -[00:00:13.820 --> 00:00:15.100] Uh uh [SPEAKER_TURN] -[00:00:15.100 --> 00:00:18.020] So okay stand, by thirteen we're looking at it. [SPEAKER_TURN] -[00:00:18.020 --> 00:00:25.740] Okay uh right now uh Houston the uh voltage is uh is looking good um. -[00:00:27.620 --> 00:00:29.940] And we had a a pretty large bank or so. ``` - -## Karaoke-style movie generation (experimental) - -The [whisper-cli](examples/cli) example provides support for output of karaoke-style movies, where the -currently pronounced word is highlighted. Use the `-owts` argument and run the generated bash script. -This requires to have `ffmpeg` installed. - -Here are a few _"typical"_ examples: - -```bash -./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts -source ./samples/jfk.wav.wts -ffplay ./samples/jfk.wav.mp4 +whisper-{version}-linux-rocm-gfx1151.tar.gz +whisper-{version}-linux-rocm-gfx1150.tar.gz +whisper-{version}-linux-rocm-gfx120X.tar.gz +whisper-{version}-linux-rocm-gfx110X.tar.gz +whisper-{version}-windows-rocm-gfx1151.zip +whisper-{version}-windows-rocm-gfx1150.zip +whisper-{version}-windows-rocm-gfx120X.zip +whisper-{version}-windows-rocm-gfx110X.zip +whisper-{version}-linux-vulkan-x86_64.tar.gz +whisper-{version}-windows-vulkan-x64.zip +whisper-{version}-windows-npu-x64.zip (may be absent if NPU runner offline) +whisper-{version}-linux-cpu-x86_64.tar.gz +whisper-{version}-windows-cpu-x64.zip +whisper-{version}-darwin-metal-arm64.tar.gz ``` -https://user-images.githubusercontent.com/1991296/199337465-dbee4b5e-9aeb-48a3-b1c6-323ac4db5b2c.mp4 +> [!TIP] +> **Linux APU out of VRAM despite free memory (gfx1150 / gfx1151)?** +> Add `ttm.pages_limit=12582912` to your kernel command line (e.g. in GRUB), run `update-grub`, and reboot. +> See the [TheRock FAQ](https://github.com/ROCm/TheRock/blob/main/docs/faq.md#gfx1151-strix-halo-specific-questions) for details. --- -```bash -./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts -source ./samples/mm0.wav.wts -ffplay ./samples/mm0.wav.mp4 -``` +## 🖥️ Local Builds (Windows) -https://user-images.githubusercontent.com/1991296/199337504-cc8fd233-0cb7-4920-95f9-4227de3570aa.mp4 +Reproduce any CI build locally using the bundled PowerShell script. Produces identical artifacts to what CI publishes. ---- - -```bash -./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts -source ./samples/gb0.wav.wts -ffplay ./samples/gb0.wav.mp4 -``` +```powershell +# Prerequisites: CMake, VS Build Tools 2022, 7-Zip, internet access -https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a0cd-f28a317987ba.mp4 +# CPU only (~2 min, no GPU needed) +.\scripts\local-build.ps1 -Backend cpu ---- +# Vulkan — requires Vulkan SDK from https://vulkan.lunarg.com +.\scripts\local-build.ps1 -Backend vulkan -## Video comparison of different models +# ROCm for RDNA3 iGPU — downloads ROCm tarball (~2-4 GB, cached after first run) +.\scripts\local-build.ps1 -Backend rocm -GfxTarget gfx1151 -Use the [scripts/bench-wts.sh](https://github.com/ggml-org/whisper.cpp/blob/master/scripts/bench-wts.sh) script to generate a video in the following format: +# NPU — requires RyzenAI hardware + NPU driver >= .280 +.\scripts\local-build.ps1 -Backend npu -```bash -./scripts/bench-wts.sh samples/jfk.wav -ffplay ./samples/jfk.wav.all.mp4 +# All backends, version-stamped artifacts placed in .\dist\ +.\scripts\local-build.ps1 -Backend all -Version 1.8.4 ``` -https://user-images.githubusercontent.com/1991296/223206245-2d36d903-cf8e-4f09-8c3b-eb9f9c39d6fc.mp4 - --- -## Benchmarks - -In order to have an objective comparison of the performance of the inference across different system configurations, -use the [whisper-bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it -took to execute it. The results are summarized in the following Github issue: - -[Benchmark results](https://github.com/ggml-org/whisper.cpp/issues/89) +## 📦 Dependencies -Additionally a script to run whisper.cpp with different models and audio files is provided [bench.py](scripts/bench.py). +### Bundled in every release (no installation needed) -You can run it with the following command, by default it will run against any standard model in the models folder. +| Backend | What is included | +|---|---| +| ROCm | `amdhip64`, `rocblas`, `hipblaslt` + library data, LLVM runtime, all system deps; RPATH=`$ORIGIN` on Linux | +| Vulkan | SPIR-V shaders embedded at build time; links against system Vulkan loader | +| Metal | Uses macOS system Metal framework; no extra bundling needed | +| NPU | FlexML Runtime DLLs (`flexmlrt/bin` + `flexmlrt/lib`) | +| CPU | SDL2.dll included on Windows | -```bash -python3 scripts/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2 -``` +### Build-time only -It is written in python with the intention of being easy to modify and extend for your benchmarking use case. - -It outputs a csv file with the results of the benchmarking. - -## `ggml` format - -The original models are converted to a custom binary format. This allows to pack everything needed into a single file: - -- model parameters -- mel filters -- vocabulary -- weights - -You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script -or manually from here: - -- https://huggingface.co/ggerganov/whisper.cpp - -For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or [models/README.md](models/README.md). - -## [Bindings](https://github.com/ggml-org/whisper.cpp/discussions/categories/bindings) - -- [x] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggml-org/whisper.cpp/discussions/310) -- [x] JavaScript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggml-org/whisper.cpp/discussions/309) - - React Native (iOS / Android): [whisper.rn](https://github.com/mybigday/whisper.rn) -- [x] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggml-org/whisper.cpp/discussions/312) -- [x] Java: - - [GiviMAD/whisper-jni](https://github.com/GiviMAD/whisper-jni) -- [x] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggml-org/whisper.cpp/discussions/507) -- [x] Objective-C / Swift: [ggml-org/whisper.spm](https://github.com/ggml-org/whisper.spm) | [#313](https://github.com/ggml-org/whisper.cpp/discussions/313) - - [exPHAT/SwiftWhisper](https://github.com/exPHAT/SwiftWhisper) -- [x] .NET: | [#422](https://github.com/ggml-org/whisper.cpp/discussions/422) - - [sandrohanea/whisper.net](https://github.com/sandrohanea/whisper.net) - - [NickDarvey/whisper](https://github.com/NickDarvey/whisper) -- [x] Python: | [#9](https://github.com/ggml-org/whisper.cpp/issues/9) - - [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython) - - [AIWintermuteAI/whispercpp](https://github.com/AIWintermuteAI/whispercpp) (Updated fork of aarnphm/whispercpp) - - [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11) - - [abdeladim-s/pywhispercpp](https://github.com/abdeladim-s/pywhispercpp) (Pybind11) -- [x] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper) -- [x] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity) - -## XCFramework -The XCFramework is a precompiled version of the library for iOS, visionOS, tvOS, -and macOS. It can be used in Swift projects without the need to compile the -library from source. For example, the v1.7.5 version of the XCFramework can be -used as follows: - -```swift -// swift-tools-version: 5.10 -// The swift-tools-version declares the minimum version of Swift required to build this package. - -import PackageDescription - -let package = Package( - name: "Whisper", - targets: [ - .executableTarget( - name: "Whisper", - dependencies: [ - "WhisperFramework" - ]), - .binaryTarget( - name: "WhisperFramework", - url: "https://github.com/ggml-org/whisper.cpp/releases/download/v1.7.5/whisper-v1.7.5-xcframework.zip", - checksum: "c7faeb328620d6012e130f3d705c51a6ea6c995605f2df50f6e1ad68c59c6c4a" - ) - ] -) -``` +| Tool | Purpose | +|---|---| +| [whisper.cpp](https://github.com/ggerganov/whisper.cpp) | Upstream source | +| [ROCm / TheRock](https://github.com/ROCm/TheRock) | HIP compiler + GPU runtime (tarball, not installed globally) | +| [FlexML Runtime](https://github.com/lemonade-sdk/whisper.cpp/releases/tag/deps) | VitisAI NPU inference | +| [Vulkan SDK](https://vulkan.lunarg.com/sdk/home) | GLSL to SPIR-V shader compilation | +| [CMake >= 3.21](https://cmake.org/) | Build system | +| [Ninja](https://ninja-build.org/) | Fast build backend (ROCm builds) | +| [VS Build Tools 2022](https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2022) | Windows MSVC toolchain | -## Voice Activity Detection (VAD) -Support for Voice Activity Detection (VAD) can be enabled using the `--vad` -argument to `whisper-cli`. In addition to this option a VAD model is also -required. - -The way this works is that first the audio samples are passed through -the VAD model which will detect speech segments. Using this information, -only the speech segments that are detected are extracted from the original audio -input and passed to whisper for processing. This reduces the amount of audio -data that needs to be processed by whisper and can significantly speed up the -transcription process. - -The following VAD models are currently supported: - -### Silero-VAD -[Silero-vad](https://github.com/snakers4/silero-vad) is a lightweight VAD model -written in Python that is fast and accurate. - -Models can be downloaded by running the following command on Linux or MacOS: -```console -$ ./models/download-vad-model.sh silero-v6.2.0 -Downloading ggml model silero-v6.2.0 from 'https://huggingface.co/ggml-org/whisper-vad' ... -ggml-silero-v6.2.0.bin 100%[==============================================>] 864.35K --.-KB/s in 0.04s -Done! Model 'silero-v6.2.0' saved in '/path/models/ggml-silero-v6.2.0.bin' -You can now use it like this: +--- - $ ./build/bin/whisper-cli -vm /path/models/ggml-silero-v6.2.0.bin --vad -f samples/jfk.wav -m models/ggml-base.en.bin +## 🏗️ Repository Structure ``` -And the following command on Windows: -```console -> .\models\download-vad-model.cmd silero-v6.2.0 -Downloading vad model silero-v6.2.0... -Done! Model silero-v6.2.0 saved in C:\Users\danie\work\ai\whisper.cpp\ggml-silero-v6.2.0.bin -You can now use it like this: - -C:\path\build\bin\Release\whisper-cli.exe -vm C:\path\ggml-silero-v6.2.0.bin --vad -m models/ggml-base.en.bin -f samples\jfk.wav - +whisper.cpp-rocm/ +├── .github/ +│ └── workflows/ +│ ├── build.yml # All AMD backends — builds + publishes releases +│ └── sync.yml # Daily upstream sync + auto-tagging +├── ci/ +│ ├── resolve-rocm-version.sh # Resolves AMD tarball URL for a given ROCm version +│ └── map-gpu-target.sh # Maps gfx110X/gfx120X shorthands to specific arch lists +├── src/ +│ └── vitisai/ +│ ├── whisper-vitisai-encoder.h # VitisAI NPU encoder C interface +│ └── whisper-vitisai-encoder.cpp # FlexML runtime integration +├── scripts/ +│ └── local-build.ps1 # Local Windows build script (mirrors CI jobs exactly) +├── ggml/ # GGML library (all GPU backends live here) +├── src/ # whisper.cpp source (VitisAI hooks added) +└── CMakeLists.txt # Adds -DWHISPER_VITISAI option ``` -To see a list of all available models, run the above commands without any -arguments. +--- -This model can be also be converted manually to ggml using the following command: -```console -$ python3 -m venv venv && source venv/bin/activate -$ (venv) pip install silero-vad -$ (venv) $ python models/convert-silero-vad-to-ggml.py --output models/silero.bin -Saving GGML Silero-VAD model to models/silero-v6.2.0-ggml.bin -``` -And it can then be used with whisper as follows: -```console -$ ./build/bin/whisper-cli \ - --file ./samples/jfk.wav \ - --model ./models/ggml-base.en.bin \ - --vad \ - --vad-model ./models/silero-v6.2.0-ggml.bin -``` +## 📄 License + +This project is licensed under the MIT License — see [LICENSE](LICENSE) for details. -### VAD Options - -* --vad-threshold: Threshold probability for speech detection. A probability -for a speech segment/frame above this threshold will be considered as speech. - -* --vad-min-speech-duration-ms: Minimum speech duration in milliseconds. Speech -segments shorter than this value will be discarded to filter out brief noise or -false positives. - -* --vad-min-silence-duration-ms: Minimum silence duration in milliseconds. Silence -periods must be at least this long to end a speech segment. Shorter silence -periods will be ignored and included as part of the speech. - -* --vad-max-speech-duration-s: Maximum speech duration in seconds. Speech segments -longer than this will be automatically split into multiple segments at silence -points exceeding 98ms to prevent excessively long segments. - -* --vad-speech-pad-ms: Speech padding in milliseconds. Adds this amount of padding -before and after each detected speech segment to avoid cutting off speech edges. - -* --vad-samples-overlap: Amount of audio to extend from each speech segment into -the next one, in seconds (e.g., 0.10 = 100ms overlap). This ensures speech isn't -cut off abruptly between segments when they're concatenated together. - -## Examples - -There are various examples of using the library for different projects in the [examples](examples) folder. -Some of the examples are even ported to run in the browser using WebAssembly. Check them out! - -| Example | Web | Description | -| --------------------------------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- | -| [whisper-cli](examples/cli) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper | -| [whisper-bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine | -| [whisper-stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture | -| [whisper-command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic | -| [whisper-server](examples/server) | | HTTP transcription server with OAI-like API | -| [whisper-talk-llama](examples/talk-llama) | | Talk with a LLaMA bot | -| [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp | -| [whisper.swiftui](examples/whisper.swiftui) | | SwiftUI iOS / macOS application using whisper.cpp | -| [whisper.android](examples/whisper.android) | | Android mobile application using whisper.cpp | -| [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim | -| [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture | -| [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggml-org/whisper.cpp/issues/185) | -| [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) | -| [wchess](examples/wchess) | [wchess.wasm](examples/wchess) | Voice-controlled chess | - -## [Discussions](https://github.com/ggml-org/whisper.cpp/discussions) - -If you have any kind of feedback about this project feel free to use the Discussions section and open a new topic. -You can use the [Show and tell](https://github.com/ggml-org/whisper.cpp/discussions/categories/show-and-tell) category -to share your own projects that use `whisper.cpp`. If you have a question, make sure to check the -[Frequently asked questions (#126)](https://github.com/ggml-org/whisper.cpp/discussions/126) discussion. +whisper.cpp is copyright Georgi Gerganov and contributors — [ggerganov/whisper.cpp](https://github.com/ggerganov/whisper.cpp). +ROCm is copyright Advanced Micro Devices, Inc. +VitisAI encoder copyright 2025 Advanced Micro Devices, Inc. diff --git a/bindings/ruby/ext/ruby_whisper_context.c b/bindings/ruby/ext/ruby_whisper_context.c index 6e38ead6321..9f326c47a5b 100644 --- a/bindings/ruby/ext/ruby_whisper_context.c +++ b/bindings/ruby/ext/ruby_whisper_context.c @@ -308,7 +308,7 @@ check_memory_view(rb_memory_view_t *memview) rb_warn("currently only format \"f\" is supported for MemoryView, but given: %s", memview->format); return false; } - if (memview->format != NULL && memview->ndim != 1) { + if (memview->ndim != 1) { rb_warn("currently only 1 dimensional MemoryView is supported, but given: %zd", memview->ndim); return false; } diff --git a/ci/map-gpu-target.sh b/ci/map-gpu-target.sh new file mode 100755 index 00000000000..1e7de7c9fcf --- /dev/null +++ b/ci/map-gpu-target.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# +# Map a GFX target shorthand to specific GPU architectures for CMake. +# +# Usage: +# source ci/map-gpu-target.sh +# +# Arguments: +# gfx_target - GPU target (gfx1151, gfx1150, gfx110X, gfx120X, or specific) +# +# Outputs (exported): +# MAPPED_GPU_TARGET - Semicolon-separated list of GPU architectures + +gfx_target="$1" + +if [ -z "$gfx_target" ]; then + echo "Usage: source ci/map-gpu-target.sh " + return 1 2>/dev/null || exit 1 +fi + +case "$gfx_target" in + gfx110X) MAPPED_GPU_TARGET="gfx1100;gfx1101;gfx1102" ;; + gfx120X) MAPPED_GPU_TARGET="gfx1200;gfx1201" ;; + *) MAPPED_GPU_TARGET="$gfx_target" ;; +esac + +export MAPPED_GPU_TARGET +echo "Mapped GPU target: $gfx_target -> $MAPPED_GPU_TARGET" diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh new file mode 100755 index 00000000000..cf3bccbe778 --- /dev/null +++ b/ci/resolve-rocm-version.sh @@ -0,0 +1,61 @@ +#!/bin/bash +# +# Resolve the ROCm tarball URL for a given platform and version. +# +# Uses AMD's official repo tarball distribution: +# https://repo.amd.com/rocm/tarball/therock-dist-{platform}-{gfx_target}-{version}.tar.gz +# +# Usage: +# source ci/resolve-rocm-version.sh +# +# Arguments: +# platform - "linux" or "windows" +# gfx_target - GPU target (defaults to gfx1151 if not specified or is a group target) +# rocm_version - Specific version (e.g. 7.12.0, 7.2.1) - required, no "latest" auto-detection +# +# Outputs (exported): +# ROCM_RESOLVED_VERSION - The resolved version string +# ROCM_TARBALL_URL - The full URL to download + +platform="$1" +gfx_target="$2" +rocm_version="$3" + +if [ -z "$platform" ] || [ -z "$gfx_target" ] || [ -z "$rocm_version" ]; then + echo "Usage: source ci/resolve-rocm-version.sh " + return 1 2>/dev/null || exit 1 +fi + +# Validate that a specific version was provided (no "latest" auto-detection) +if [ "$rocm_version" = "latest" ]; then + echo "ERROR: 'latest' auto-detection is not supported." + echo "Please specify a concrete ROCm version (e.g., 7.12.0, 7.2.1)." + echo "Available versions: https://repo.amd.com/rocm/tarball/" + return 1 2>/dev/null || exit 1 +fi + +# Validate version format (should be X.Y.Z or X.Y.ZaNNNNNNNN pattern) +if ! echo "$rocm_version" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then + echo "ERROR: Invalid ROCm version format: '$rocm_version'" + echo "Expected format: X.Y.Z (e.g., 7.12.0) or X.Y.ZaNNNNNNNN (e.g., 7.11.0a20251205)" + return 1 2>/dev/null || exit 1 +fi + +# Exact tarball names published at repo.amd.com/rocm/tarball/ for 7.12.0: +# linux: gfx110X-all, gfx120X-all, gfx1150, gfx1151, gfx1152 +# windows: gfx110X-all, gfx120X-all, gfx1150, gfx1151, gfx1152 +case "$gfx_target" in + gfx110X) tarball_target="gfx110X-all" ;; + gfx120X) tarball_target="gfx120X-all" ;; + gfx1150) tarball_target="gfx1150" ;; + gfx1151) tarball_target="gfx1151" ;; + gfx1152) tarball_target="gfx1152" ;; + *) tarball_target="$gfx_target" ;; +esac + +# Construct the AMD official repo URL +ROCM_TARBALL_URL="https://repo.amd.com/rocm/tarball/therock-dist-${platform}-${tarball_target}-${rocm_version}.tar.gz" + +export ROCM_RESOLVED_VERSION="$rocm_version" +echo "ROCm version: $ROCM_RESOLVED_VERSION" +echo "ROCm URL: $ROCM_TARBALL_URL" diff --git a/ci/run.sh b/ci/run.sh index cbe28442e16..9f6d73d9c04 100644 --- a/ci/run.sh +++ b/ci/run.sh @@ -79,6 +79,13 @@ if [ ! -z ${GG_BUILD_ROCM} ]; then fi CMAKE_EXTRA="${CMAKE_EXTRA} -DAMDGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS}" + + # Set HIP environment if not already set + export HIP_PLATFORM=${HIP_PLATFORM:-amd} + export ROCM_PATH=${ROCM_PATH:-/opt/rocm} + export HIP_PATH=${HIP_PATH:-/opt/rocm} + export LD_LIBRARY_PATH=${ROCM_PATH}/lib:${LD_LIBRARY_PATH} + CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_PREFIX_PATH=${ROCM_PATH} -DCMAKE_HIP_COMPILER=${ROCM_PATH}/lib/llvm/bin/clang++" fi if [ ! -z ${GG_BUILD_SYCL} ]; then @@ -223,7 +230,7 @@ function gg_run_ctest { gg_check_build_requirements (time cmake -DCMAKE_BUILD_TYPE=${mode} ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log - (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + (time make -j $(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log diff --git a/scripts/local-build.ps1 b/scripts/local-build.ps1 new file mode 100644 index 00000000000..db90dd5a747 --- /dev/null +++ b/scripts/local-build.ps1 @@ -0,0 +1,395 @@ +<# +.SYNOPSIS + Local build script for whisper-cpp-amd. Mirrors the GitHub Actions build.yml jobs for Windows. + +.DESCRIPTION + Builds one or more AMD backends locally, producing the same zip artifacts that CI publishes. + +.PARAMETER Backend + Which backend to build: cpu, vulkan, rocm, npu, all. Default: cpu + +.PARAMETER GfxTarget + ROCm GPU target. Default: gfx1151 + Common: gfx1151, gfx1150, gfx1100, gfx1200 + +.PARAMETER RocmVersion + ROCm version to download. Default: 7.12.0 + +.PARAMETER OutputDir + Directory for final zip artifacts. Default: .\dist + +.PARAMETER BuildDir + CMake build directory prefix. Default: .\build-local + +.PARAMETER Version + Version string used in artifact filenames. Default: local + +.EXAMPLE + .\scripts\local-build.ps1 -Backend cpu + .\scripts\local-build.ps1 -Backend vulkan + .\scripts\local-build.ps1 -Backend rocm -GfxTarget gfx1151 + .\scripts\local-build.ps1 -Backend npu + .\scripts\local-build.ps1 -Backend all -Version 1.8.4 +#> + +param( + [ValidateSet("cpu","vulkan","rocm","npu","all")] + [string]$Backend = "cpu", + [string]$GfxTarget = "gfx1151", + [string]$RocmVersion = "7.12.0", + [string]$OutputDir = ".\dist", + [string]$BuildDir = ".\build-local", + [string]$Version = "local" +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = "Stop" + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +function Write-Step([string]$msg) { + Write-Host "" + Write-Host "================================================" -ForegroundColor Cyan + Write-Host " $msg" -ForegroundColor Cyan + Write-Host "================================================" -ForegroundColor Cyan +} + +function Write-Ok([string]$msg) { Write-Host " [OK] $msg" -ForegroundColor Green } +function Write-Info([string]$msg) { Write-Host " --> $msg" -ForegroundColor Yellow } +function Write-Fail([string]$msg) { Write-Host " [X] $msg" -ForegroundColor Red } + +function Require-Command([string]$cmd) { + if (-not (Get-Command $cmd -ErrorAction SilentlyContinue)) { + Write-Fail "$cmd not found in PATH" + throw "Missing requirement: $cmd" + } + Write-Ok "$cmd found" +} + +function Download-SDL2 { + param([string]$Ver = "2.28.5") + $sdlDir = "SDL2-$Ver" + if (Test-Path $sdlDir) { + Write-Info "SDL2 already extracted at $sdlDir" + } else { + Write-Info "Downloading SDL2 $Ver ..." + $url = "https://github.com/libsdl-org/SDL/releases/download/release-$Ver/SDL2-devel-$Ver-VC.zip" + Invoke-WebRequest -Uri $url -OutFile "sdl2.zip" + 7z x sdl2.zip -y | Out-Null + Remove-Item sdl2.zip + + # Patch SDL_endian.h (needed for AMD clang compatibility) + $hdr = Get-ChildItem -Recurse -Filter "SDL_endian.h" | Select-Object -First 1 + if ($hdr) { + $content = Get-Content $hdr.FullName -Raw + if ($content -match 'extern void _m_prefetch') { + $patched = $content -replace 'extern void _m_prefetch\(void \*__P\);', '// extern void _m_prefetch(void *__P);' + Set-Content -Path $hdr.FullName -Value $patched -NoNewline + Write-Ok "Patched SDL_endian.h" + } + } + } + $cmake = Get-ChildItem -Recurse -Filter "sdl2-config.cmake" | Select-Object -First 1 + if (-not $cmake) { throw "sdl2-config.cmake not found after SDL2 extraction" } + return $cmake.DirectoryName +} + +function Package-Build { + param([string]$Name, [string]$BinPath) + New-Item -ItemType Directory -Force -Path $OutputDir | Out-Null + $zip = Join-Path $OutputDir "$Name.zip" + Write-Info "Creating $zip ..." + Compress-Archive -Path "$BinPath\*" -DestinationPath $zip -Force + $mb = [math]::Round((Get-Item $zip).Length / 1MB, 2) + Write-Ok "Created $zip ($mb MB)" + return $zip +} + +function Run-MSBuild { + param([string]$Dir, [string[]]$ConfigArgs, [string]$Config = "Release", [string]$Arch = "x64") + Write-Info "CMake configure ..." + & cmake -S . -B $Dir @ConfigArgs + if ($LASTEXITCODE -ne 0) { throw "CMake configure failed (exit $LASTEXITCODE)" } + Write-Info "MSBuild $Config ..." + & cmake --build $Dir --config $Config -j $env:NUMBER_OF_PROCESSORS + if ($LASTEXITCODE -ne 0) { throw "Build failed (exit $LASTEXITCODE)" } +} + +# ── Preflight ───────────────────────────────────────────────────────────────── + +if (-not (Test-Path "CMakeLists.txt") -or -not (Test-Path "src\whisper.cpp")) { + Write-Fail "Run this script from the whisper-cpp-amd repo root." + exit 1 +} + +Require-Command cmake +New-Item -ItemType Directory -Force -Path $OutputDir | Out-Null + +# ── Build functions ─────────────────────────────────────────────────────────── + +function Build-CPU { + Write-Step "CPU - Windows x64" + Require-Command msbuild + + $SDL2_DIR = Download-SDL2 + $dir = "$BuildDir-cpu" + + Run-MSBuild $dir @( + "-A", "x64", + "-DCMAKE_BUILD_TYPE=Release", + "-DBUILD_SHARED_LIBS=ON", + "-DWHISPER_SDL2=ON", + "-DSDL2_DIR=$SDL2_DIR" + ) + + $sdl2dll = Get-ChildItem -Path "SDL2-*\lib\x64\SDL2.dll" -ErrorAction SilentlyContinue | Select-Object -First 1 + if ($sdl2dll) { Copy-Item $sdl2dll.FullName "$dir\bin\Release\" -Force } + + $zip = Package-Build "whisper-$Version-windows-cpu-x64" "$dir\bin\Release" + Write-Ok "CPU build done. Artifact: $zip" +} + +function Build-Vulkan { + Write-Step "Vulkan - Windows x64" + Require-Command msbuild + + # Locate Vulkan SDK + $VULKAN_SDK = $env:VULKAN_SDK + if (-not $VULKAN_SDK) { + $sdkDir = Get-ChildItem "C:\VulkanSDK" -ErrorAction SilentlyContinue | + Sort-Object Name -Descending | Select-Object -First 1 + if (-not $sdkDir) { + Write-Fail "Vulkan SDK not found. Install from https://vulkan.lunarg.com/sdk/home" + throw "Missing Vulkan SDK" + } + $VULKAN_SDK = $sdkDir.FullName + } + Write-Ok "Vulkan SDK: $VULKAN_SDK" + + $SDL2_DIR = Download-SDL2 + $dir = "$BuildDir-vulkan" + + Run-MSBuild $dir @( + "-A", "x64", + "-DCMAKE_BUILD_TYPE=Release", + "-DBUILD_SHARED_LIBS=ON", + "-DGGML_VULKAN=ON", + "-DWHISPER_SDL2=ON", + "-DSDL2_DIR=$SDL2_DIR", + "-DVULKAN_SDK=$VULKAN_SDK" + ) + + $sdl2dll = Get-ChildItem -Path "SDL2-*\lib\x64\SDL2.dll" -ErrorAction SilentlyContinue | Select-Object -First 1 + if ($sdl2dll) { Copy-Item $sdl2dll.FullName "$dir\bin\Release\" -Force } + + $zip = Package-Build "whisper-$Version-windows-vulkan-x64" "$dir\bin\Release" + Write-Ok "Vulkan build done. Artifact: $zip" +} + +function Build-ROCm { + Write-Step "ROCm - Windows x64 (target: $GfxTarget)" + Require-Command ninja + + # ── Download ROCm tarball ────────────────────────────────────────────── + $rocmRoot = "C:\opt\rocm" + if (-not (Test-Path "$rocmRoot\bin\amdclang.exe")) { + Write-Info "Downloading ROCm $RocmVersion for $GfxTarget (2-4 GB, takes a few minutes) ..." + + # Replicate resolve-rocm-version.sh: group targets use gfx1151 as the base tarball + $baseTarget = $GfxTarget + if ($GfxTarget -in @("gfx110X","gfx120X","gfx1150","gfx1100")) { + $baseTarget = "gfx1151" + } + $tarballUrl = "https://repo.amd.com/rocm/tarball/therock-dist-windows-${baseTarget}-${RocmVersion}.tar.gz" + Write-Info "URL: $tarballUrl" + + Invoke-WebRequest -Uri $tarballUrl -OutFile rocm.tar.gz + New-Item -ItemType Directory -Force -Path $rocmRoot | Out-Null + & tar -xzf rocm.tar.gz -C $rocmRoot --strip-components=1 + if ($LASTEXITCODE -ne 0) { throw "ROCm extraction failed" } + Remove-Item rocm.tar.gz + Write-Ok "ROCm extracted to $rocmRoot" + } else { + Write-Ok "ROCm already present at $rocmRoot" + } + + # ── Map GFX target (mirrors map-gpu-target.sh) ───────────────────────── + $mappedTarget = switch ($GfxTarget) { + "gfx110X" { "gfx1100;gfx1101;gfx1102" } + "gfx120X" { "gfx1200;gfx1201" } + default { $GfxTarget } + } + Write-Info "GPU target: $GfxTarget -> $mappedTarget" + + $SDL2_DIR = Download-SDL2 + + # ── Set ROCm env ────────────────────────────────────────────────────── + $env:HIP_PATH = $rocmRoot + $env:HIP_PLATFORM = "amd" + $env:PATH = "$rocmRoot\bin;$rocmRoot\lib\llvm\bin;$env:PATH" + + # ── Configure ───────────────────────────────────────────────────────── + $dir = "$BuildDir-rocm-$GfxTarget" + Write-Info "CMake configure (Ninja Multi-Config) ..." + & cmake -S . -B $dir ` + -G "Ninja Multi-Config" ` + "-DGPU_TARGETS=$mappedTarget" ` + -DGGML_HIP=ON ` + "-DCMAKE_C_COMPILER=$rocmRoot/lib/llvm/bin/amdclang.exe" ` + "-DCMAKE_CXX_COMPILER=$rocmRoot/lib/llvm/bin/amdclang++.exe" ` + "-DCMAKE_HIP_COMPILER=$rocmRoot/lib/llvm/bin/amdclang++.exe" ` + "-DCMAKE_C_FLAGS=-D__PRFCHWINTRIN_H" ` + "-DCMAKE_CXX_FLAGS=-D__PRFCHWINTRIN_H" ` + "-DCMAKE_HIP_FLAGS=--rocm-path=$rocmRoot" ` + "-DCMAKE_PREFIX_PATH=$rocmRoot" ` + -DCMAKE_BUILD_TYPE=Release ` + -DBUILD_SHARED_LIBS=ON ` + -DWHISPER_SDL2=ON ` + "-DSDL2_DIR=$SDL2_DIR" + if ($LASTEXITCODE -ne 0) { throw "CMake configure failed" } + + Write-Info "Building ..." + & cmake --build $dir --config Release -j $env:NUMBER_OF_PROCESSORS + if ($LASTEXITCODE -ne 0) { throw "Build failed" } + + # ── Copy ROCm DLLs ──────────────────────────────────────────────────── + $binOut = "$dir\bin\Release" + $rocBin = "$rocmRoot\bin" + Write-Info "Copying ROCm DLLs ..." + @("amdhip64_*.dll","amd_comgr*.dll","libhipblas.dll","rocblas.dll", + "rocsolver.dll","hipblaslt.dll","libhipblaslt.dll","hipblas.dll") | ForEach-Object { + Get-ChildItem $rocBin -Name $_ -ErrorAction SilentlyContinue | + ForEach-Object { Copy-Item (Join-Path $rocBin $_) (Join-Path $binOut $_) -Force } + } + $rocblasLib = Join-Path $rocBin "rocblas\library" + if (Test-Path $rocblasLib) { + Copy-Item $rocblasLib -Destination (Join-Path $binOut "rocblas\library") -Recurse -Force + } + $hipblasltLib = Join-Path $rocBin "hipblaslt\library" + if (Test-Path $hipblasltLib) { + Copy-Item $hipblasltLib -Destination (Join-Path $binOut "hipblaslt\library") -Recurse -Force + } + + $sdl2dll = Get-ChildItem -Path "SDL2-*\lib\x64\SDL2.dll" -ErrorAction SilentlyContinue | Select-Object -First 1 + if ($sdl2dll) { Copy-Item $sdl2dll.FullName $binOut -Force } + + $zip = Package-Build "whisper-$Version-windows-rocm-$GfxTarget" $binOut + Write-Ok "ROCm build done. Artifact: $zip" +} + +function Build-NPU { + Write-Step "NPU (VitisAI / RyzenAI) - Windows x64" + Require-Command msbuild + + # ── FlexML Runtime ──────────────────────────────────────────────────── + $flexmlDir = Get-ChildItem -Directory | Where-Object { $_.Name -like "flexmlrt*" } | Select-Object -First 1 + if (-not $flexmlDir) { + Write-Info "Downloading FlexML Runtime ..." + $url = "https://github.com/lemonade-sdk/whisper.cpp/releases/download/deps/flexmlrt1.7.0-win.zip" + Invoke-WebRequest -Uri $url -OutFile flexmlrt.zip + if (-not (Test-Path "flexmlrt.zip") -or (Get-Item "flexmlrt.zip").Length -eq 0) { + throw "flexmlrt.zip download failed or is empty" + } + $mb = [math]::Round((Get-Item "flexmlrt.zip").Length / 1MB, 2) + Write-Ok "Downloaded FlexML: $mb MB" + + & tar xvf flexmlrt.zip + if ($LASTEXITCODE -ne 0) { throw "FlexML extraction failed" } + Remove-Item flexmlrt.zip + + $flexmlDir = Get-ChildItem -Directory | Where-Object { $_.Name -like "flexmlrt*" } | Select-Object -First 1 + if (-not $flexmlDir) { throw "No flexmlrt directory found after extraction" } + } + Write-Ok "FlexML Runtime: $($flexmlDir.FullName)" + + # ── Run setup.bat via a temporary cmd script ─────────────────────────── + # cmd /c with && is not reliable from PowerShell; use a temp .bat file instead + $tempBat = [System.IO.Path]::GetTempFileName() + ".bat" + $setupPath = Join-Path $flexmlDir.FullName "setup.bat" + Set-Content -Path $tempBat -Value "@echo off`r`ncall `"$setupPath`"`r`nif errorlevel 1 exit /b 1`r`necho FLEXML_OK" + Write-Info "Running FlexML setup.bat ..." + $setupOut = & cmd /c $tempBat 2>&1 + Remove-Item $tempBat -ErrorAction SilentlyContinue + + if ($LASTEXITCODE -ne 0 -or ($setupOut -notmatch "FLEXML_OK")) { + Write-Fail "FlexML setup.bat failed. Output:" + $setupOut | ForEach-Object { Write-Host " $_" } + throw "FlexML setup failed. Ensure NPU drivers (>= .280) are installed." + } + Write-Ok "FlexML environment configured" + + # ── CMake configure + build ─────────────────────────────────────────── + $dir = "$BuildDir-npu" + Write-Info "CMake configure with -DWHISPER_VITISAI=ON ..." + & cmake -B $dir -A x64 -DCMAKE_BUILD_TYPE=Release -DWHISPER_VITISAI=ON + if ($LASTEXITCODE -ne 0) { throw "CMake configure failed" } + + Write-Info "Building ..." + & cmake --build $dir --config Release -j $env:NUMBER_OF_PROCESSORS + if ($LASTEXITCODE -ne 0) { throw "Build failed" } + + # ── List output ─────────────────────────────────────────────────────── + $binOut = "$dir\bin\Release" + if (Test-Path $binOut) { + Write-Info "Build output:" + Get-ChildItem $binOut | Format-Table Name, Length -AutoSize + } else { + throw "Expected output directory $binOut not found" + } + + # ── Copy FlexML DLLs ───────────────────────────────────────────────── + Write-Info "Copying FlexML DLLs ..." + $copied = 0 + foreach ($sub in @("bin", "lib")) { + $subPath = Join-Path $flexmlDir.FullName $sub + if (Test-Path $subPath) { + $dlls = Get-ChildItem "$subPath\*.dll" -ErrorAction SilentlyContinue + if ($dlls) { + Copy-Item $dlls.FullName $binOut -Force + $copied += $dlls.Count + } + } + } + Write-Ok "Copied $copied FlexML DLLs" + + $zip = Package-Build "whisper-$Version-windows-npu-x64" $binOut + Write-Ok "NPU build done. Artifact: $zip" + Write-Info "To run: place the .rai encoder model next to your ggml-*.bin and run whisper-cli.exe normally." +} + +# ── Main dispatch ───────────────────────────────────────────────────────────── + +$targets = if ($Backend -eq "all") { @("cpu","vulkan","rocm","npu") } else { @($Backend) } +$results = [ordered]@{} + +foreach ($t in $targets) { + try { + switch ($t) { + "cpu" { Build-CPU } + "vulkan" { Build-Vulkan } + "rocm" { Build-ROCm } + "npu" { Build-NPU } + } + $results[$t] = "[OK] PASSED" + } catch { + Write-Fail "[$t] failed: $_" + $results[$t] = "[FAIL] $_" + } +} + +# ── Summary ─────────────────────────────────────────────────────────────────── + +Write-Step "Build Summary" +foreach ($t in $targets) { + $color = if ($results[$t].StartsWith("[OK]")) { "Green" } else { "Red" } + Write-Host " $t : $($results[$t])" -ForegroundColor $color +} + +Write-Host "" +Write-Host "Artifacts in: $(Resolve-Path $OutputDir)" -ForegroundColor Cyan +if (Test-Path $OutputDir) { + Get-ChildItem $OutputDir -Filter "*.zip" | ForEach-Object { + $mb = [math]::Round($_.Length / 1MB, 2) + Write-Host " $($_.Name) ($mb MB)" + } +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 095a2791de5..fe10876eaf7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -48,6 +48,10 @@ if (WHISPER_OPENVINO) find_package(OpenVINO REQUIRED COMPONENTS Runtime) endif() +if (WHISPER_VITISAI) + find_package(FlexmlRT REQUIRED) +endif() + # # libraries # @@ -101,6 +105,30 @@ if (WHISPER_OPENVINO) set_target_properties(${TARGET} PROPERTIES FOLDER "libs") endif() +if (WHISPER_VITISAI) + set(TARGET whisper.vitisai) + + add_library(${TARGET} OBJECT + vitisai/whisper-vitisai-encoder.h + vitisai/whisper-vitisai-encoder.cpp + ) + + target_include_directories(${TARGET} PUBLIC + . + ) + + set_property(TARGET ${TARGET} PROPERTY POSITION_INDEPENDENT_CODE ON) + set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_VITISAI) + + # C++17 required for MSVC (FlexML headers use structured bindings etc.) + if (MSVC) + target_compile_options(${TARGET} PRIVATE /std:c++17) + endif() + + target_link_libraries(${TARGET} PRIVATE ggml flexmlrt::flexmlrt) + set_target_properties(${TARGET} PROPERTIES FOLDER "libs") +endif() + # whisper add_library(whisper @@ -137,6 +165,10 @@ if (WHISPER_OPENVINO) target_link_libraries(whisper PRIVATE whisper.openvino) endif() +if (WHISPER_VITISAI) + target_link_libraries(whisper PRIVATE whisper.vitisai) +endif() + if (WHISPER_MKL) target_link_libraries(whisper PRIVATE MKL::MKL) endif() diff --git a/src/vitisai/whisper-vitisai-encoder.cpp b/src/vitisai/whisper-vitisai-encoder.cpp new file mode 100644 index 00000000000..a6d20a88c9a --- /dev/null +++ b/src/vitisai/whisper-vitisai-encoder.cpp @@ -0,0 +1,204 @@ +// Copyright(C) 2025 Advanced Micro Devices, Inc. All rights reserved. +#include "vitisai/whisper-vitisai-encoder.h" +#include "FlexMLClient.h" +#include "ggml.h" +#include "ggml-backend.h" + +#include +#include +#ifdef _WIN32 + #include +#else + #include + #include + #include +#endif +#include +#include + +struct whisper_vitisai_context { + std::string model_path; + std::shared_ptr runner; + uint8_t * fbs_buffer; + size_t fbs_buffer_size; +}; + +// Function to mmap rai file for Linux and MapViewOfFile for Windows +bool map_rai_file(const char * path, uint8_t ** buffer, size_t * size) { +#ifdef _WIN32 + // Open the file + HANDLE hFile = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (hFile == INVALID_HANDLE_VALUE) { + std::fprintf(stderr, "%s: %d: Failed to open rai file '%s'\n", __func__, __LINE__, path); + return false; + } + + // Get the file size + LARGE_INTEGER fileSize; + if (!GetFileSizeEx(hFile, &fileSize)) { + CloseHandle(hFile); + std::fprintf(stderr, "%s: %d: Failed to get file size for rai file '%s'\n", __func__, __LINE__, path); + return false; + } + + // Create a file mapping object + HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, fileSize.QuadPart, NULL); + if (hMapping == NULL) { + CloseHandle(hFile); + std::fprintf(stderr, "%s: %d: Failed to create file mapping for rai file '%s'\n", __func__, __LINE__, path); + return false; + } + + // Map the file + *buffer = (uint8_t *)MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, fileSize.QuadPart); + if (*buffer == NULL) { + CloseHandle(hMapping); + CloseHandle(hFile); + std::fprintf(stderr, "%s: %d: Failed to map rai file '%s'\n", __func__, __LINE__, path); + return false; + } + *size = fileSize.QuadPart; + return true; +#else + // Open the file + FILE * fd = fopen(path, "rb"); + if (!fd) { + std::fprintf(stderr, "%s: %d: Failed to open rai file '%s'\n", __func__, __LINE__, path); + return false; + } + + // Get the file size + struct stat st; + if (fstat(fileno(fd), &st) == -1) { + fclose(fd); + std::fprintf(stderr, "%s: %d: Failed to get file size for rai file '%s'\n", __func__, __LINE__, path); + return false; + } + + // Mmap the file + *buffer = (uint8_t *)mmap(nullptr, st.st_size, PROT_READ, MAP_SHARED, fileno(fd), 0); + if (*buffer == MAP_FAILED) { + fclose(fd); + std::fprintf(stderr, "%s: %d: Failed to mmap rai file '%s'\n", __func__, __LINE__, path); + return false; + } + *size = st.st_size; + return true; +#endif // _WIN32 +} + +void unmap_rai_file(uint8_t * buffer, size_t size) { +#ifdef _WIN32 + UnmapViewOfFile(buffer); +#else + munmap(buffer, size); +#endif // _WIN32 +} + +struct whisper_vitisai_context * whisper_vitisai_init(const char * path_model) { + if (!path_model) { + std::fprintf(stderr, "%s: path_model is null\n", __func__); + return nullptr; + } + + auto * ctx = new whisper_vitisai_context; + ctx->model_path = path_model; + + // Override the model path with the environment variable if it is set + if (const char * env_model_path = std::getenv("OVERRIDE_VITISAI_MODEL_PATH")) { + if (env_model_path[0] != '\0') { + ctx->model_path = env_model_path; + } + } + + // Step 1: Set up the model + flexmlrt::client::Options options; + options.modelPath = ctx->model_path; + options.deviceName = "stx"; + options.debug = false; + options.executeMode = 2; + options.extOptions["ai_analyzer_profiling"] = true; // Enable AIA profiling + options.extOptions["enable_preemption"] = true; + + // Check if model_path is rai file and if so, add fbs_buffer and fbs_buffer_size to the options + if (ctx->model_path.find(".rai") != std::string::npos) { + // mmap rai file for both Linux and Windows and pass the buffer to the options + ctx->fbs_buffer = nullptr; + ctx->fbs_buffer_size = 0; + if (map_rai_file(ctx->model_path.c_str(), &ctx->fbs_buffer, &ctx->fbs_buffer_size)) { + options.extOptions["fbs_buffer"] = ctx->fbs_buffer; + options.extOptions["fbs_buffer_size"] = ctx->fbs_buffer_size; + options.subgraphName = "vaiml_par_0"; + options.extOptions["cache_dir"] = std::string("."); + } else { + std::fprintf(stderr, "%s: Failed to mmap rai file '%s'\n", __func__, ctx->model_path.c_str()); + delete ctx; + return nullptr; + } + } + + try { + ctx->runner = std::make_shared(options); + + if (!ctx->runner->good()) { + throw std::runtime_error("Runner creation ran into an error"); + } + } catch (const std::exception & e) { + std::fprintf(stderr, "%s: Exception during Vitis AI runner creation: %s\n", __func__, e.what()); + delete ctx; + return nullptr; + } + return ctx; +} + +void whisper_vitisai_free(struct whisper_vitisai_context * ctx) { + if (!ctx) { + return; + } + + std::fprintf(stderr, "%s: releasing Vitis AI encoder context for model '%s'\n", __func__, ctx->model_path.c_str()); + if (ctx->fbs_buffer) { + unmap_rai_file(ctx->fbs_buffer, ctx->fbs_buffer_size); + } + delete ctx; +} + +int whisper_vitisai_encode(struct whisper_vitisai_context * ctx, struct ggml_tensor * mel, struct ggml_tensor * out) { + if (!ctx || !mel || !out) { + std::fprintf(stderr, "%s: ctx/mel/out must not be null\n", __func__); + return 0; + } + + if (ggml_n_dims(mel) != 2) { + std::fprintf(stderr, "%s: mel tensor expected to have 2 dims, got %d\n", __func__, ggml_n_dims(mel)); + return 0; + } + + if (ggml_n_dims(out) != 2) { + std::fprintf(stderr, "%s: out tensor expected to have 2 dims, got %d\n", __func__, ggml_n_dims(out)); + return 0; + } + + // setup input and output tensors for Vitis AI model + std::vector input_tensors, output_tensors; + auto model = ctx->runner; + + // Get tensors as CPU tensors (hwTensor = false) + input_tensors = model->getIOTensors("input", false); + output_tensors = model->getIOTensors("output", false); + + // TODO: add assert checks for tensor numbers and shapes + + input_tensors[0].data = mel->data; + output_tensors[0].data = out->data; + + try { + model->forward(input_tensors, output_tensors); + std::fprintf(stdout, "%s: Vitis AI model inference completed.\n", __func__); + } catch (const std::exception & e) { + std::fprintf(stderr, "%s: Exception during model inference: %s\n", __func__, e.what()); + return 0; + } + + return 1; +} diff --git a/src/vitisai/whisper-vitisai-encoder.h b/src/vitisai/whisper-vitisai-encoder.h new file mode 100644 index 00000000000..05dc812be88 --- /dev/null +++ b/src/vitisai/whisper-vitisai-encoder.h @@ -0,0 +1,32 @@ +// Copyright(C) 2025 Advanced Micro Devices, Inc. All rights reserved. + +#pragma once + +#include +#include +#include + +#if __cplusplus +extern "C" { +#endif + +struct whisper_vitisai_context; + +struct whisper_vitisai_context * whisper_vitisai_init(const char * path_model); +void whisper_vitisai_free(struct whisper_vitisai_context * ctx); + +// Function to mmap rai file for Linux and MapViewOfFile for Windows +bool map_rai_file(const char * path, uint8_t ** buffer, size_t * size); +// Function to unmap rai file for Linux and UnmapViewOfFile for Windows +void unmap_rai_file(uint8_t * buffer, size_t size); + +struct ggml_tensor; + +int whisper_vitisai_encode( + struct whisper_vitisai_context * ctx, + struct ggml_tensor * mel, + struct ggml_tensor * out); + +#if __cplusplus +} +#endif diff --git a/src/whisper.cpp b/src/whisper.cpp index 2f356da0f06..a038a5959ea 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -14,6 +14,10 @@ #include "openvino/whisper-openvino-encoder.h" #endif +#ifdef WHISPER_USE_VITISAI +#include "vitisai/whisper-vitisai-encoder.h" +#endif + #include #include #include @@ -903,6 +907,10 @@ struct whisper_state { whisper_openvino_context * ctx_openvino = nullptr; #endif +#ifdef WHISPER_USE_VITISAI + whisper_vitisai_context * ctx_vitisai = nullptr; +#endif + // [EXPERIMENTAL] token-level timestamps data int64_t t_beg = 0; int64_t t_last = 0; @@ -1970,7 +1978,13 @@ static bool whisper_encode_external(const whisper_state & wstate) { const bool use_openvino = wstate.ctx_openvino != nullptr; #endif - return use_coreml || use_openvino; +#ifndef WHISPER_USE_VITISAI + const bool use_vitisai = false; +#else + const bool use_vitisai = wstate.ctx_vitisai != nullptr; +#endif + + return use_coreml || use_openvino || use_vitisai; } static struct ggml_cgraph * whisper_build_graph_conv( @@ -2411,6 +2425,8 @@ static bool whisper_encode_internal( #if defined(WHISPER_USE_COREML) whisper_coreml_encode(wstate.ctx_coreml, mel->ne[0], mel->ne[1], (float *) mel->data, (float *) wstate.embd_enc->data); +#elif defined(WHISPER_USE_VITISAI) + whisper_vitisai_encode(wstate.ctx_vitisai, mel, wstate.embd_enc); #elif defined(WHISPER_USE_OPENVINO) whisper_openvino_encode(wstate.ctx_openvino, mel, wstate.embd_enc); #endif @@ -3346,6 +3362,20 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) { } #endif +#ifdef WHISPER_USE_VITISAI +// replace extension with Vitis AI encoder artifact (.rai) +static std::string whisper_get_vitisai_path_encoder_cache(std::string path_bin) { + auto pos = path_bin.rfind('.'); + if (pos != std::string::npos) { + path_bin = path_bin.substr(0, pos); + } + + path_bin += "-encoder-vitisai.rai"; + + return path_bin; +} +#endif + #ifdef WHISPER_USE_OPENVINO // replace .bin with-encoder-openvino.xml static std::string whisper_openvino_get_path_encoder(std::string path_bin) { @@ -3455,6 +3485,19 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) { } #endif +#ifdef WHISPER_USE_VITISAI + const auto path_vitisai = whisper_get_vitisai_path_encoder_cache(ctx->path_model); + + state->ctx_vitisai = whisper_vitisai_init(path_vitisai.c_str()); + if (!state->ctx_vitisai) { + WHISPER_LOG_ERROR("%s: failed to load Vitis AI model from '%s'\n", __func__, path_vitisai.c_str()); + whisper_free_state(state); + return nullptr; + } else { + WHISPER_LOG_INFO("%s: Vitis AI model loaded\n", __func__); + } +#endif + state->logits.reserve(ctx->vocab.n_vocab * ctx->model.hparams.n_text_ctx); state->batch = whisper_batch_init(ctx->model.hparams.n_text_ctx, WHISPER_MAX_DECODERS); @@ -3821,6 +3864,13 @@ void whisper_free_state(struct whisper_state * state) { } #endif +#ifdef WHISPER_USE_VITISAI + if (state->ctx_vitisai != nullptr) { + whisper_vitisai_free(state->ctx_vitisai); + state->ctx_vitisai = nullptr; + } +#endif + whisper_batch_free(state->batch); ggml_backend_sched_free(state->sched_conv.sched); @@ -4312,11 +4362,20 @@ static int whisper_has_openvino(void) { #endif } +static int whisper_has_vitisai(void) { +#ifdef WHISPER_USE_VITISAI + return 1; +#else + return 0; +#endif +} + const char * whisper_print_system_info(void) { static std::string s; s = ""; s += "WHISPER : "; + s += "VITISAI = " + std::to_string(whisper_has_vitisai()) + " | "; s += "COREML = " + std::to_string(whisper_has_coreml()) + " | "; s += "OPENVINO = " + std::to_string(whisper_has_openvino()) + " | ";