diff --git a/.github/actions/cleanup-processes-linux/action.yml b/.github/actions/cleanup-processes-linux/action.yml
new file mode 100644
index 00000000000..58649fcc41b
--- /dev/null
+++ b/.github/actions/cleanup-processes-linux/action.yml
@@ -0,0 +1,19 @@
+name: 'Cleanup GPU Processes (Linux)'
+description: 'Kill zombie whisper/GPU processes on self-hosted Linux runners'
+
+runs:
+  using: 'composite'
+  steps:
+    - name: Kill zombie processes
+      shell: bash
+      run: |
+        echo "=== Cleaning up stale processes ==="
+        pkill -f "whisper-cli" 2>/dev/null || true
+        pkill -f "whisper-bench" 2>/dev/null || true
+        pkill -f "whisper-server" 2>/dev/null || true
+        pkill -f "ctest.*whisper" 2>/dev/null || true
+        if command -v rocm-smi &>/dev/null; then
+          echo "=== GPU process check ==="
+          rocm-smi --showpids 2>/dev/null || true
+        fi
+        echo "=== Cleanup complete ==="
diff --git a/.github/actions/cleanup-processes-windows/action.yml b/.github/actions/cleanup-processes-windows/action.yml
new file mode 100644
index 00000000000..91a9424dd22
--- /dev/null
+++ b/.github/actions/cleanup-processes-windows/action.yml
@@ -0,0 +1,15 @@
+name: 'Cleanup GPU Processes (Windows)'
+description: 'Kill zombie whisper/GPU processes on self-hosted Windows runners'
+
+runs:
+  using: 'composite'
+  steps:
+    - name: Kill zombie processes
+      shell: pwsh
+      run: |
+        Write-Host "=== Cleaning up stale processes ==="
+        $processNames = @("whisper-cli", "whisper-bench", "whisper-server", "ctest")
+        foreach ($name in $processNames) {
+          Get-Process -Name $name -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue
+        }
+        Write-Host "=== Cleanup complete ==="
diff --git a/.github/workflows/bindings-go.yml b/.github/workflows/bindings-go.yml
deleted file mode 100644
index 83473e4636a..00000000000
--- a/.github/workflows/bindings-go.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-name: Bindings Tests (Go)
-on:
-  push:
-    paths:
-      - bindings/go/**
-      - whisper.h
-  pull_request:
-    paths:
-      - bindings/go/**
-      - whisper.h
-
-jobs:
-  ubuntu-22:
-    runs-on: ubuntu-22.04
-    steps:
-      - uses: actions/setup-go@v6
-        with:
-          go-version: '^1.23'
-      - uses: actions/checkout@v6
-      - run: |
-          cd bindings/go
-          make test
diff --git a/.github/workflows/bindings-ruby.yml b/.github/workflows/bindings-ruby.yml
deleted file mode 100644
index c3f158e26e4..00000000000
--- a/.github/workflows/bindings-ruby.yml
+++ /dev/null
@@ -1,21 +0,0 @@
-name: Bindings Tests (Ruby)
-
-on:
-  push:
-    branches:
-      - master
-  pull_request:
-    types: [opened, synchronize, reopened]
-
-jobs:
-  ubuntu-22:
-    runs-on: ubuntu-22.04
-    defaults:
-      run:
-        working-directory: bindings/ruby
-    steps:
-      - uses: ruby/setup-ruby@v1
-        with:
-          ruby-version: '3.2'
-      - uses: actions/checkout@v6
-      - run: rake test
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index fb115b22abb..5c4710b1663 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -1,77 +1,83 @@
-name: CI
+name: AMD Build & Release
 
+# ──────────────────────────────────────────────────────────────────────────────
+# Triggers
+# ──────────────────────────────────────────────────────────────────────────────
 on:
-  push:
-    branches:
-      - master
-    tags:
-      - 'v*'
-    paths: ['.github/workflows/build.yml',
-            '**/CMakeLists.txt',
-            '**/Makefile',
-            '**/*.mk',
-            '**/*.cmake',
-            '**/*.in',
-            '**/*.h',
-            '**/*.hpp',
-            '**/*.c',
-            '**/*.cpp',
-            '**/*.cu',
-            '**/*.cuh',
-            '**/*.cl',
-            '**/*.swift',
-            '**/*.m',
-            '**/*.mm',
-            '**/*.metal',
-            '**/*.comp',
-            '**/*.java']
-
-  pull_request:
-    types: [opened, synchronize, reopened]
+  schedule:
+    - cron: '0 2 * * 1'   # Weekly – every Monday at 02:00 UTC
   workflow_dispatch:
     inputs:
       create_release:
-        description: 'Create new release'
+        description: 'Create GitHub Release'
         required: true
         type: boolean
+        default: false
       pre_release_tag:
-        description: 'Pre-release tag name'
+        description: 'Pre-release tag name (optional, overrides auto-tag)'
         required: false
         type: string
       run_type:
-        description: 'Workflow type to run'
+        description: 'Workflow scope'
         required: true
         type: choice
         options:
-          - full-ci
-          - release-only
+          - full-ci        # all jobs
+          - release-only   # release-producing jobs only
+        default: full-ci
+      gfx_targets:
+        description: 'ROCm GPU targets (comma-separated)'
+        required: false
+        type: string
+        default: 'gfx1151,gfx1150,gfx120X,gfx110X'
+      rocm_version:
+        description: 'ROCm version (e.g. 7.12.0)'
+        required: false
+        type: string
+        default: '7.12.0'
+  push:
+    tags:
+      - 'v*'
+  pull_request:
+    branches:
+      - master
+      - main
+    types: [opened, synchronize, reopened]
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
   cancel-in-progress: true
 
 permissions:
-  contents: write  # for creating release
+  contents: write
 
+# ──────────────────────────────────────────────────────────────────────────────
+# Shared environment
+# ──────────────────────────────────────────────────────────────────────────────
 env:
   BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
-  ubuntu_image: "ubuntu:22.04"
+  GFX_TARGETS: ${{ github.event.inputs.gfx_targets || 'gfx1151,gfx1150,gfx120X,gfx110X' }}
+  ROCM_VERSION: ${{ github.event.inputs.rocm_version || '7.12.0' }}
+  FLEXML_URL: "https://github.com/lemonade-sdk/whisper.cpp/releases/download/deps/flexmlrt1.7.0-win.zip"
   VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite"
 
 jobs:
+
+# ════════════════════════════════════════════════════════════════════════════════
+# 0. Determine release tag
+# ════════════════════════════════════════════════════════════════════════════════
   determine-tag:
     runs-on: ubuntu-latest
     outputs:
-      tag_name: ${{ steps.tag.outputs.name }}
+      tag_name:       ${{ steps.tag.outputs.name }}
+      version:        ${{ steps.tag.outputs.version }}
       should_release: ${{ steps.tag.outputs.should_release }}
-
     steps:
-      - name: Checkout with full history
-        uses: actions/checkout@v6
+      - uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
-      - name: Determine tag name
+      - name: Determine tag and version
         id: tag
         shell: bash
         run: |
@@ -80,1481 +86,1255 @@ jobs:
           CUSTOM_TAG="${{ github.event.inputs.pre_release_tag }}"
           SHOULD_RELEASE="false"
 
-          echo "Raw values:"
-          echo "BUILD_NUMBER: $BUILD_NUMBER"
-          echo "SHORT_HASH: $SHORT_HASH"
-          echo "BRANCH_NAME: ${{ env.BRANCH_NAME }}"
-          echo "CUSTOM_TAG: $CUSTOM_TAG"
-
           if [[ "${{ github.ref_type }}" == "tag" ]]; then
-            echo "Using pushed tag name"
+            # Triggered by sync.yml pushing a vX.Y.Z tag — this is the primary release path
             TAG_NAME="${{ github.ref_name }}"
             SHOULD_RELEASE="true"
           elif [[ -n "$CUSTOM_TAG" ]]; then
-            echo "Using custom tag"
-            TAG_NAME="${CUSTOM_TAG}"
+            TAG_NAME="$CUSTOM_TAG"
             SHOULD_RELEASE="true"
           elif [[ "${{ github.event.inputs.create_release }}" == "true" ]]; then
-            echo "Manual release requested"
-            SHOULD_RELEASE="true"
             TAG_NAME="b${BUILD_NUMBER}"
-          elif [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
-            echo "Using master branch format"
+            SHOULD_RELEASE="true"
+          elif [[ "${{ env.BRANCH_NAME }}" == "main" || "${{ env.BRANCH_NAME }}" == "master" ]]; then
             TAG_NAME="b${BUILD_NUMBER}"
             SHOULD_RELEASE="false"
           else
-            echo "Using non-master branch format"
-            SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
-            TAG_NAME="${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}"
+            SAFE=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
+            TAG_NAME="${SAFE}-b${BUILD_NUMBER}-${SHORT_HASH}"
             SHOULD_RELEASE="false"
           fi
 
-          echo "Final tag name: $TAG_NAME"
-          echo "Should release: $SHOULD_RELEASE"
-          echo "name=$TAG_NAME" >> $GITHUB_OUTPUT
-          echo "should_release=$SHOULD_RELEASE" >> $GITHUB_OUTPUT
-
-
-  ubuntu-22:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
-    runs-on: ubuntu-22.04
-
-    strategy:
-      fail-fast: false
-      matrix:
-        arch: [linux/amd64, linux/ppc64le]
-
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      - name: Build ${{ matrix.arch }}
-        run: |
-          docker run --platform ${{ matrix.arch }} --rm \
-            -v ${{ github.workspace }}:/workspace \
-            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
-            set -e
-            export DEBIAN_FRONTEND=noninteractive
-            sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-            sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-
-            apt update
-            apt install -y build-essential libsdl2-dev cmake git
-            cmake -B build
-            cmake --build build --config Release -j $(nproc)'
-
-  ubuntu-22-arm64:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
-    runs-on: ubuntu-22.04
-
-    strategy:
-      fail-fast: false
-      matrix:
-        arch: [linux/arm64]
-
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      - name: Build ${{ matrix.arch }}
-        run: |
-          docker run --platform ${{ matrix.arch }} --rm \
-            -v ${{ github.workspace }}:/workspace \
-            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
-            set -e
-            export DEBIAN_FRONTEND=noninteractive
-            sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-            sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-
-            apt update
-            apt install -y build-essential libsdl2-dev cmake git
-            cmake -B build -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8-a
-            cmake --build build --config Release -j $(nproc)'
-
-  ubuntu-22-arm-v7:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
-    runs-on: ubuntu-22.04
-
-    strategy:
-      fail-fast: false
-      matrix:
-        arch: [linux/arm/v7]
-
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
+          # Version used in artifact filenames — keep leading 'v' to match lemonade expectations
+          # e.g. v1.8.4 → v1.8.4,  b1234 → b1234
+          VERSION="${TAG_NAME}"
 
-      - name: Build ${{ matrix.arch }}
-        run: |
-          docker run --platform ${{ matrix.arch }} --rm \
-            -v ${{ github.workspace }}:/workspace \
-            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
-            set -e
-            export DEBIAN_FRONTEND=noninteractive
-            sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-            sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-
-            apt update
-            apt install -y build-essential libsdl2-dev cmake git
-            cmake -B build -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv7-a+fp
-            cmake --build build --config Release -j $(nproc)'
-
-  macOS-latest:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
-    runs-on: macOS-latest
-
-    strategy:
-      matrix:
-        destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
+          echo "name=$TAG_NAME"                >> $GITHUB_OUTPUT
+          echo "version=$VERSION"              >> $GITHUB_OUTPUT
+          echo "should_release=$SHOULD_RELEASE" >> $GITHUB_OUTPUT
 
+# ════════════════════════════════════════════════════════════════════════════════
+# 1. ROCm matrix (Linux + Windows per GFX target)
+# ════════════════════════════════════════════════════════════════════════════════
+  prepare-rocm-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      ubuntu_matrix:  ${{ steps.m.outputs.ubuntu_matrix }}
+      windows_matrix: ${{ steps.m.outputs.windows_matrix }}
     steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v6
-
-      - name: ccache
-        uses: hendrikmuhs/ccache-action@v1.2.16
-        with:
-          key: macOS-latest-swift
-          evict-old-files: 1d
-
-      - name: Dependencies
+      - name: Build matrix JSON
+        id: m
         run: |
-          brew update
-          cmake --version
-          brew install sdl2
+          targets="${{ env.GFX_TARGETS }}"
+          arr=$(echo "$targets" | tr ',' '\n' | sed 's/^ *//;s/ *$//' | jq -R . | jq -s .)
 
-      - name: Build
-        run: |
-          sysctl -a
-          cmake -B build -G Xcode \
-            -DGGML_METAL_USE_BF16=ON \
-            -DGGML_METAL_EMBED_LIBRARY=ON \
-            -DWHISPER_BUILD_EXAMPLES=OFF \
-            -DWHISPER_BUILD_TESTS=OFF \
-            -DWHISPER_BUILD_SERVER=OFF \
-            -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
-          cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
-
-
-#  freeBSD-latest:
-#    runs-on: macos-13
-#
-#    steps:
-#      - name: Clone
-#        uses: actions/checkout@v6
-#
-#      - name: Build
-#        uses: cross-platform-actions/action@v0.27.0
-#        with:
-#          operating_system: freebsd
-#          version: '14.2'
-#          run: |
-#            sudo pkg update
-#            sudo pkg install -y gmake sdl2 cmake git
-#            cmake -B build
-#            cmake --build build --config Release
-
-  ubuntu-22-gcc:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
-    runs-on: ubuntu-22.04
+          ubuntu_matrix=$(echo "$arr" | jq -c \
+            '{gfx_target: ., build: ["Release"], sdl2: ["ON"], arch: ["linux/amd64"]}')
 
-    strategy:
-      fail-fast: false
-      matrix:
-        build: [Debug, Release]
-        arch: [linux/amd64, linux/ppc64le]
+          windows_matrix=$(echo "$arr" | jq -c \
+            '{gfx_target: ., build: ["Release"], sdl2: ["ON"], arch: ["x64"], s2arc: ["x64"], s2ver: ["2.28.5"]}')
 
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      - name: Build ${{ matrix.arch }}
-        run: |
-          docker run --platform ${{ matrix.arch }} --rm \
-            -v ${{ github.workspace }}:/workspace \
-            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
-            set -e
-            export DEBIAN_FRONTEND=noninteractive
-            sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-            sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-
-            apt update
-            apt install -y build-essential cmake libsdl2-dev git
-            cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-            make
-            ctest -L gh --output-on-failure'
-
-  ubuntu-22-gcc-arm64:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
-    runs-on: ubuntu-22.04
+          echo "ubuntu_matrix=$ubuntu_matrix"   >> $GITHUB_OUTPUT
+          echo "windows_matrix=$windows_matrix" >> $GITHUB_OUTPUT
 
-    strategy:
-      fail-fast: false
-      matrix:
-        build: [Debug, Release]
-        arch: [linux/arm64]
-
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      - name: Build ${{ matrix.arch }}
-        run: |
-          docker run --platform ${{ matrix.arch }} --rm \
-            -v ${{ github.workspace }}:/workspace \
-            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
-            set -e
-            export DEBIAN_FRONTEND=noninteractive
-            sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-            sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-
-            apt update
-            apt install -y build-essential cmake libsdl2-dev git
-            cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8-a
-            make
-            ctest -L gh --output-on-failure'
-
-  ubuntu-22-gcc-arm-v7:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
+# ════════════════════════════════════════════════════════════════════════════════
+# 2. ROCm — Linux
+# ════════════════════════════════════════════════════════════════════════════════
+  linux-rocm:
     runs-on: ubuntu-22.04
-
-    strategy:
-      fail-fast: false
-      matrix:
-        build: [Debug, Release]
-        arch: [linux/arm/v7]
-
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      - name: Build ${{ matrix.arch }}
-        run: |
-          docker run --platform ${{ matrix.arch }} --rm \
-            -v ${{ github.workspace }}:/workspace \
-            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
-            set -e
-            export DEBIAN_FRONTEND=noninteractive
-            sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-            sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-
-            apt update
-            apt install -y build-essential cmake libsdl2-dev git
-            cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv7-a+fp
-            make
-            ctest -L gh --output-on-failure'
-
-  ubuntu-22-clang:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
-    runs-on: ubuntu-22.04
-
-    strategy:
-      fail-fast: false
-      matrix:
-        build: [Debug, Release]
-        #arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
-        # TODO: arm/v7 disabled due to clang bug
-        #       https://github.com/ggerganov/whisper.cpp/actions/runs/9657764109/job/26637633042?pr=2256#step:4:1990
-        arch: [linux/amd64, linux/arm64, linux/ppc64le]
-
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      - name: Build ${{ matrix.arch }}
-        run: |
-          docker run --platform ${{ matrix.arch }} --rm \
-            -v ${{ github.workspace }}:/workspace \
-            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
-            set -e
-            export DEBIAN_FRONTEND=noninteractive
-            sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-            sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-
-            apt update
-            apt install -y clang build-essential cmake libsdl2-dev git
-            cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
-            make
-            ctest -L gh --output-on-failure'
-
-  ubuntu-22-gcc-sanitized:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
-    runs-on: ubuntu-22.04
-
+    needs: [determine-tag, prepare-rocm-matrix]
     strategy:
+      matrix: ${{ fromJson(needs.prepare-rocm-matrix.outputs.ubuntu_matrix) }}
       fail-fast: false
-      matrix:
-        sanitizer: [ADDRESS, THREAD, UNDEFINED]
-        arch: [linux/amd64]
 
     steps:
-      - name: Clone
-        uses: actions/checkout@v6
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      - name: Build ${{ matrix.arch }}
-        run: |
-          docker run --platform ${{ matrix.arch }} --rm \
-            -v ${{ github.workspace }}:/workspace \
-            -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
-            set -e
-            export DEBIAN_FRONTEND=noninteractive
-            sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-            sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list
-
-            apt update
-            apt install -y build-essential cmake git
-            cmake . -DCMAKE_BUILD_TYPE=Debug \
-              -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON \
-              -DGGML_OPENMP=OFF
-            make
-            ctest -L gh --output-on-failure'
-
-  ubuntu-22-cmake-sycl:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
-    runs-on: ubuntu-22.04
-
-    strategy:
-      fail-fast: false
-      matrix:
-        dwhisper_sycl: [ON]
-        dcmake_c_compiler: [icx]
-        dcmake_cxx_compiler: [icpx]
-        arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
-
-    continue-on-error: true
-
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-
-      - name: add oneAPI to apt
-        shell: bash
-        run: |
-          cd /tmp
-          wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
-          sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
-          rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
-          sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
-
-      - name: install oneAPI dpcpp compiler
-        shell: bash
+      - name: Free disk space
         run: |
-          sudo apt update
-          sudo apt install intel-oneapi-compiler-dpcpp-cpp git
+          sudo rm -rf /usr/local/lib/android /opt/ghc /usr/local/share/boost \
+            /usr/share/dotnet /usr/local/.ghcup /opt/hostedtoolcache/CodeQL
+          sudo docker image prune --all --force 2>/dev/null || true
 
-      - name: install oneAPI MKL library
-        shell: bash
-        run: |
-          sudo apt install intel-oneapi-mkl-devel git
+      - uses: actions/checkout@v4
 
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v6
+      - name: Install dependencies
+        run: sudo apt-get update && sudo apt-get install -y cmake ninja-build curl build-essential libsdl2-dev git patchelf
 
-      - name: Build
-        id: cmake_build
+      - name: Download ROCm tarball
         run: |
-          source /opt/intel/oneapi/setvars.sh
-          mkdir build
-          cd build
-          cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
-          cmake --build . --config Release -j $(nproc)
-
-  ubuntu-22-cmake-sycl-fp16:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
-    runs-on: ubuntu-22.04
-
-    strategy:
-      fail-fast: false
-      matrix:
-        dwhisper_sycl: [ON]
-        dcmake_c_compiler: [icx]
-        dcmake_cxx_compiler: [icpx]
-        arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
+          source ci/resolve-rocm-version.sh linux "${{ matrix.gfx_target }}" "${{ env.ROCM_VERSION }}"
+          echo "DETECTED_ROCM_VERSION=$ROCM_RESOLVED_VERSION" >> $GITHUB_ENV
+          sudo mkdir -p /opt/rocm
+          curl -L "$ROCM_TARBALL_URL" | sudo tar --use-compress-program=gzip -xf - -C /opt/rocm --strip-components=1
 
-    continue-on-error: true
-
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-
-      - name: add oneAPI to apt
-        shell: bash
+      - name: Set ROCm env
         run: |
-          cd /tmp
-          wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
-          sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
-          rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
-          sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
+          echo "HIP_PATH=/opt/rocm"  >> $GITHUB_ENV
+          echo "ROCM_PATH=/opt/rocm" >> $GITHUB_ENV
+          echo "HIP_PLATFORM=amd"    >> $GITHUB_ENV
+          echo "/opt/rocm/bin:/opt/rocm/llvm/bin:$PATH" >> $GITHUB_PATH
 
-      - name: install oneAPI dpcpp compiler
-        shell: bash
+      - name: Find bitcode path
         run: |
-          sudo apt update
-          sudo apt install intel-oneapi-compiler-dpcpp-cpp git
+          BITCODE_PATH=$(find /opt/rocm -type d -name bitcode -print -quit)
+          [ -z "$BITCODE_PATH" ] && { echo "::error::bitcode dir not found"; exit 1; }
+          echo "ROCM_BITCODE_PATH=$BITCODE_PATH" >> $GITHUB_ENV
 
-      - name: install oneAPI MKL library
-        shell: bash
+      - name: Configure CMake
         run: |
-          sudo apt install intel-oneapi-mkl-devel
-
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v6
+          source ci/map-gpu-target.sh "${{ matrix.gfx_target }}"
+          cmake -S . -B build -G Ninja \
+            -DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \
+            -DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \
+            -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm --rocm-device-lib-path=${{ env.ROCM_BITCODE_PATH }}" \
+            -DCMAKE_PREFIX_PATH=/opt/rocm \
+            -DCMAKE_BUILD_TYPE=${{ matrix.build }} \
+            -DGPU_TARGETS="$MAPPED_GPU_TARGET" \
+            -DGGML_HIP=ON \
+            -DWHISPER_BUILD_SERVER=ON \
+            -DWHISPER_SDL2=${{ matrix.sdl2 }}
 
       - name: Build
-        id: cmake_build
         run: |
-          source /opt/intel/oneapi/setvars.sh
-          mkdir build
-          cd build
-          cmake -DGGML_SYCL_F16=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
-          cmake --build . --config Release -j $(nproc)
+          cmake --build build --config ${{ matrix.build }} -j$(nproc) > build.log 2>&1
+          exit_code=$?
+          grep -E "error:|FAILED|Linking|Built target|warning:" build.log || true
+          if [ $exit_code -ne 0 ]; then
+            echo "--- Last 100 lines of build log ---"
+            tail -100 build.log
+            echo "Build failed with exit code $exit_code"
+            exit $exit_code
+          fi
+          echo "Build succeeded."
 
-  windows-msys2:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
-    runs-on: windows-latest
+      - name: Verify build output
+        run: |
+          if [ ! -f build/bin/whisper-cli ]; then
+            echo "::error::whisper-cli not found - build likely truncated"
+            ls -lh build/bin/ 2>/dev/null || true
+            exit 1
+          fi
+          if [ ! -f build/bin/whisper-server ]; then
+            echo "::error::whisper-server not found - build likely truncated"
+            ls -lh build/bin/ 2>/dev/null || true
+            exit 1
+          fi
+          echo "Build output:"; ls -lh build/bin/whisper-cli build/bin/whisper-server
+
+      - name: Inspect shared library dependencies
+        run: |
+          echo "--- ldd whisper-cli ---"
+          ldd build/bin/whisper-cli || true
+          echo "--- ldd whisper-server ---"
+          ldd build/bin/whisper-server || true
+          echo "--- missing libs ---"
+          MISSING_CLI=$(ldd build/bin/whisper-cli 2>/dev/null | grep "not found" || true)
+          MISSING_SRV=$(ldd build/bin/whisper-server 2>/dev/null | grep "not found" || true)
+          if [ -z "$MISSING_CLI" ] && [ -z "$MISSING_SRV" ]; then
+            echo "All dependencies resolved."
+          else
+            [ -n "$MISSING_CLI" ] && echo "whisper-cli missing:" && echo "$MISSING_CLI"
+            [ -n "$MISSING_SRV" ] && echo "whisper-server missing:" && echo "$MISSING_SRV"
+          fi
+          echo "--- .so* files in build/bin/ ---"
+          ls -lh build/bin/*.so* 2>/dev/null || echo "(none)"
+
+      - name: Copy ROCm runtime libs
+        run: |
+          BIN="build/bin"
+          mkdir -p "$BIN"
+
+          cp -a build/src/libwhisper.so*                "$BIN"/ 2>/dev/null || true
+          cp -a build/ggml/src/libggml.so*              "$BIN"/ 2>/dev/null || true
+          cp -a build/ggml/src/libggml-base.so*         "$BIN"/ 2>/dev/null || true
+          cp -a build/ggml/src/libggml-cpu.so*          "$BIN"/ 2>/dev/null || true
+          cp -a build/ggml/src/ggml-hip/libggml-hip.so* "$BIN"/ 2>/dev/null || true
+          export LD_LIBRARY_PATH="$PWD/$BIN:/opt/rocm/lib:/opt/rocm/lib/llvm/lib:/opt/rocm/lib/rocm_sysdeps/lib:${LD_LIBRARY_PATH:-}"
+
+          ROCM_ROOTS="/opt/rocm/lib /opt/rocm/lib/llvm/lib /opt/rocm/lib/rocm_sysdeps/lib"
+
+          for binary in "$BIN"/*; do
+            [ -f "$binary" ] && [ -x "$binary" ] && file "$binary" | grep -q ELF || continue
+            ldd "$binary" 2>/dev/null | awk '{print $3}' | grep -E "^/opt/rocm" | while read lib; do
+              [ -f "$lib" ] || continue
+              cp -n "$lib" "$BIN/" 2>/dev/null || true
+              # Also copy the soname symlink if it exists alongside the real file
+              dir=$(dirname "$lib")
+              base=$(basename "$lib")
+              find "$dir" -maxdepth 1 -name "${base%%.*}.so*" -exec cp -Pn {} "$BIN/" \; 2>/dev/null || true
+            done
+          done
+
+          # Transitive deps: repeat ldd over any newly copied ROCm .so to catch indirect deps
+          for pass in 1 2; do
+            for lib in "$BIN"/lib*.so*; do
+              [ -f "$lib" ] && [ ! -L "$lib" ] || continue
+              ldd "$lib" 2>/dev/null | awk '{print $3}' | grep -E "^/opt/rocm" | while read dep; do
+                [ -f "$dep" ] || continue
+                cp -n "$dep" "$BIN/" 2>/dev/null || true
+              done
+            done
+          done
+
+          # Kernel library data dirs (loaded at runtime by path, not via soname)
+          [ -d /opt/rocm/lib/rocblas/library ]   && { mkdir -p "$BIN/rocblas";   cp -r /opt/rocm/lib/rocblas/library   "$BIN/rocblas/"; }
+          [ -d /opt/rocm/lib/hipblaslt/library ] && { mkdir -p "$BIN/hipblaslt"; cp -r /opt/rocm/lib/hipblaslt/library "$BIN/hipblaslt/"; }
+
+      - name: Set portable RPATH
+        run: |
+          cd build/bin
+          for f in *.so* whisper-*; do
+            [ -f "$f" ] && [ ! -L "$f" ] && file "$f" | grep -q ELF && patchelf --set-rpath '$ORIGIN' "$f" 2>/dev/null || true
+          done
+
+      - name: Package
+        run: |
+          VER="${{ needs.determine-tag.outputs.version }}"
+          ARCHIVE="whisper-${VER}-linux-rocm-${{ matrix.gfx_target }}.tar.gz"
+          STAGE="whisper-${VER}-linux-rocm-${{ matrix.gfx_target }}"
+          mkdir -p "$STAGE" && cp -r build/bin/* "$STAGE/"
+          tar -czf "$ARCHIVE" "$STAGE"
+          echo "ARCHIVE=$ARCHIVE" >> $GITHUB_ENV
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: ${{ env.ARCHIVE }}
+          path: ${{ env.ARCHIVE }}
 
+# ════════════════════════════════════════════════════════════════════════════════
+# 3. ROCm — Windows
+# ════════════════════════════════════════════════════════════════════════════════
+  windows-rocm:
+    runs-on: windows-2022
+    needs: [determine-tag, prepare-rocm-matrix]
     strategy:
+      matrix: ${{ fromJson(needs.prepare-rocm-matrix.outputs.windows_matrix) }}
       fail-fast: false
-      matrix:
-        include:
-          - { sys: UCRT64,  env: ucrt-x86_64,  build: Release }
-          - { sys: CLANG64, env: clang-x86_64, build: Release }
 
     steps:
-      - name: Clone
-        uses: actions/checkout@v6
+      - uses: actions/checkout@v4
 
-      - name: Setup ${{ matrix.sys }}
-        uses: msys2/setup-msys2@v2
-        with:
-          update: true
-          msystem: ${{matrix.sys}}
-          install: >-
-            base-devel
-            git
-            mingw-w64-${{matrix.env}}-toolchain
-            mingw-w64-${{matrix.env}}-cmake
-            mingw-w64-${{matrix.env}}-SDL2
-            mingw-w64-${{matrix.env}}-openblas
+      - name: Install Ninja
+        shell: powershell
+        run: choco install ninja -y
 
-      - name: Build using CMake
-        shell: msys2 {0}
+      - name: Fetch SDL2 and patch header
+        if: matrix.sdl2 == 'ON'
+        shell: powershell
         run: |
-            cmake -B build -DWHISPER_SDL2=ON
-            cmake --build build --config ${{ matrix.build }} -j $(nproc)
+          $url = "https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip"
+          Invoke-WebRequest -Uri $url -OutFile sdl2.zip
+          7z x sdl2.zip
+          $cmake = Get-ChildItem -Recurse -Filter "sdl2-config.cmake" | Select-Object -First 1
+          if ($cmake) { echo "SDL2_DIR=$($cmake.DirectoryName)" >> $env:GITHUB_ENV }
+          else { Write-Error "sdl2-config.cmake not found"; exit 1 }
+          $hdr = Get-ChildItem -Recurse -Filter "SDL_endian.h" | Select-Object -First 1
+          if ($hdr) {
+            $c = Get-Content $hdr.FullName -Raw
+            if ($c -match 'extern void _m_prefetch') {
+              $c = $c -replace 'extern void _m_prefetch\(void \*__P\);','// extern void _m_prefetch(void *__P);'
+              Set-Content $hdr.FullName $c
+            }
+          } else { Write-Error "SDL_endian.h not found"; exit 1 }
+
+      - name: Download and extract ROCm tarball
+        shell: powershell
+        run: |
+          $gfx  = "${{ matrix.gfx_target }}"
+          $ver  = "${{ env.ROCM_VERSION }}"
+          $base = switch ($gfx) {
+            "gfx110X" { "gfx110X-all" }
+            "gfx120X" { "gfx120X-all" }
+            "gfx1150" { "gfx1150"     }
+            "gfx1151" { "gfx1151"     }
+            "gfx1152" { "gfx1152"     }
+            default   { $gfx          }
+          }
+          $url  = "https://repo.amd.com/rocm/tarball/therock-dist-windows-${base}-${ver}.tar.gz"
+          Write-Host "ROCm URL: $url"
+          "DETECTED_ROCM_VERSION=$ver" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
+          curl.exe -L --retry 3 --retry-delay 5 -o rocm.tar.gz $url
+          if ($LASTEXITCODE -ne 0) { Write-Error "curl failed with exit code $LASTEXITCODE"; exit 1 }
+          New-Item -ItemType Directory -Force -Path "C:\opt\rocm" | Out-Null
+          tar -xzf rocm.tar.gz -C C:\opt\rocm --strip-components=1
+
+      - name: Map GPU target
+        id: gpu
+        shell: powershell
+        run: |
+          $gfx = "${{ matrix.gfx_target }}"
+          $mapped = switch ($gfx) {
+            "gfx110X" { "gfx1100;gfx1101;gfx1102" }
+            "gfx120X" { "gfx1200;gfx1201" }
+            default   { $gfx }
+          }
+          Write-Host "Mapped GPU target: $gfx -> $mapped"
+          "mapped=$mapped" | Out-File $env:GITHUB_OUTPUT -Append -Encoding utf8
+
+      - name: Configure CMake
+        shell: powershell
+        run: |
+          $env:HIP_PATH   = "C:\opt\rocm"
+          $env:HIP_PLATFORM = "amd"
+          $env:PATH = "$env:HIP_PATH\bin;$env:HIP_PATH\lib\llvm\bin;$env:PATH"
+          cmake -S . -B build `
+            -G "Ninja Multi-Config" `
+            -DGPU_TARGETS="${{ steps.gpu.outputs.mapped }}" `
+            -DGGML_HIP=ON `
+            -DCMAKE_C_COMPILER="$env:HIP_PATH/lib/llvm/bin/amdclang.exe" `
+            -DCMAKE_CXX_COMPILER="$env:HIP_PATH/lib/llvm/bin/amdclang++.exe" `
+            -DCMAKE_HIP_COMPILER="$env:HIP_PATH/lib/llvm/bin/amdclang++.exe" `
+            "-DCMAKE_C_FLAGS='-D__PRFCHWINTRIN_H'" `
+            "-DCMAKE_CXX_FLAGS='-D__PRFCHWINTRIN_H'" `
+            "-DCMAKE_HIP_FLAGS=--rocm-path=C:/opt/rocm" `
+            -DCMAKE_PREFIX_PATH="$env:HIP_PATH" `
+            -DCMAKE_BUILD_TYPE=${{ matrix.build }} `
+            -DWHISPER_BUILD_SERVER=ON `
+            -DWHISPER_SDL2=${{ matrix.sdl2 }}
 
-      - name: Clean after building using CMake
-        shell: msys2 {0}
-        run: |
-            rm -rf build
+      - name: Build
+        shell: powershell
+        run: |
+          cmake --build build --config ${{ matrix.build }} -j $env:NUMBER_OF_PROCESSORS > build.log 2>&1
+          $exit = $LASTEXITCODE
+          # Show only errors and link steps - keeps log under GitHub's line limit
+          Get-Content build.log | Select-String -Pattern "error:|FAILED|Linking|Built target|warning: " | Write-Host
+          if ($exit -ne 0) {
+            Write-Host "--- Last 100 lines of build log ---"
+            Get-Content build.log -Tail 100
+            Write-Error "Build failed with exit code $exit"
+            exit $exit
+          }
+          Write-Host "Build succeeded."
+
+      - name: Copy ROCm DLLs
+        shell: powershell
+        run: |
+          $bin    = "build/bin/${{ matrix.build }}"
+          $rocBin = "C:\opt\rocm\bin"
+          @("amdhip64_*.dll","amd_comgr*.dll","libhipblas.dll","rocblas.dll",
+            "rocsolver.dll","hipblaslt.dll","libhipblaslt.dll","hipblas.dll") | ForEach-Object {
+            Get-ChildItem $rocBin -Name $_ -ErrorAction SilentlyContinue |
+              ForEach-Object { Copy-Item (Join-Path $rocBin $_) (Join-Path $bin $_) }
+          }
+          $rocLib = Join-Path $rocBin "rocblas\library"
+          if (Test-Path $rocLib) { Copy-Item $rocLib -Destination (Join-Path $bin "rocblas\library") -Recurse -Force }
+          $hipLib = Join-Path $rocBin "hipblaslt\library"
+          if (Test-Path $hipLib) { Copy-Item $hipLib -Destination (Join-Path $bin "hipblaslt\library") -Recurse -Force }
 
-      - name: Build using CMake w/ OpenBLAS
-        shell: msys2 {0}
-        run: |
-            cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
-            cmake --build build --config ${{ matrix.build }} -j $(nproc)
+      - name: Copy SDL2.dll
+        if: matrix.sdl2 == 'ON'
+        shell: powershell
+        run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" "build/bin/${{ matrix.build }}"
+
+      - name: Verify build output
+        shell: powershell
+        run: |
+          $bin = "build/bin/${{ matrix.build }}"
+          if (-not (Test-Path "$bin/whisper-cli.exe")) {
+            Write-Error "whisper-cli.exe not found in $bin - build likely truncated"
+            Get-ChildItem $bin -ErrorAction SilentlyContinue | Format-Table Name, Length
+            exit 1
+          }
+          if (-not (Test-Path "$bin/whisper-server.exe")) {
+            Write-Error "whisper-server.exe not found in $bin - build likely truncated"
+            Get-ChildItem $bin -ErrorAction SilentlyContinue | Format-Table Name, Length
+            exit 1
+          }
+          Write-Host "Build output:"
+          Get-ChildItem $bin -Filter "*.exe" | Format-Table Name, Length
+
+      - name: Package
+        shell: powershell
+        run: |
+          $a = "whisper-${{ needs.determine-tag.outputs.version }}-windows-rocm-${{ matrix.gfx_target }}.zip"
+          Compress-Archive -Path "build/bin/${{ matrix.build }}/*" -DestinationPath $a -Force
+          "ARCHIVE=$a" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: ${{ env.ARCHIVE }}
+          path: ${{ env.ARCHIVE }}
 
-  windows:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
-    runs-on: windows-latest
+# ════════════════════════════════════════════════════════════════════════════════
+# 4. Vulkan — Linux
+# ════════════════════════════════════════════════════════════════════════════════
+  linux-vulkan:
+    runs-on: ubuntu-latest
     needs: determine-tag
 
-    strategy:
-      matrix:
-        build: [Release]
-        arch: [Win32, x64]
-        sdl2: [ON]
-        include:
-          - arch: Win32
-            s2arc: x86
-            jnaPath: win32-x86
-          - arch: x64
-            s2arc: x64
-            jnaPath: win32-x86-64
-          - sdl2: ON
-            s2ver: 2.28.5
-
     steps:
-      - name: Clone
-        uses: actions/checkout@v6
+      - uses: actions/checkout@v4
 
-      - name: Add msbuild to PATH
-        uses: microsoft/setup-msbuild@v2
-
-      - name: Fetch SDL2 and set SDL2_DIR
-        if: matrix.sdl2 == 'ON'
+      - name: Install dependencies
         run: |
-          C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
-          7z x sdl2.zip
-          echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
-
-      - name: Configure
-        run: >
-          cmake -S . -B ./build -A ${{ matrix.arch }}
-          -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-          -DBUILD_SHARED_LIBS=ON
-          -DWHISPER_SDL2=${{ matrix.sdl2 }}
+          sudo apt-get update
+          sudo apt-get install -y build-essential cmake git libsdl2-dev pkg-config libvulkan-dev vulkan-tools
+          sudo apt-get install -y glslc || sudo apt-get install -y shaderc
 
-      - name: Build
+      - name: Check Vulkan availability
         run: |
-          cd ./build
-          msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
-
-      - name: Copy SDL2.dll
-        if: matrix.sdl2 == 'ON'
-        run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
-
-      - name: Upload SDL2.dll
-        if: matrix.sdl2 == 'ON'
-        uses: actions/upload-artifact@v6
-        with:
-          name: ${{ matrix.s2arc }}_SDL2.dll
-          path: build/bin/${{ matrix.build }}/SDL2.dll
-
-      - name: Upload whisper dll
-        uses: actions/upload-artifact@v6
-        with:
-          name: whisper_${{ matrix.arch }}.dll
-          path: build/bin/${{ matrix.build }}/whisper.dll
+          if ! command -v glslc >/dev/null 2>&1 && ! command -v glslangValidator >/dev/null 2>&1; then
+            echo "::error::No GLSL compiler found (glslc / shaderc)"; exit 1
+          fi
 
-      - name: Upload ggml dll
-        uses: actions/upload-artifact@v6
-        with:
-          name: ggml_${{ matrix.arch }}.dll
-          path: build/bin/${{ matrix.build }}/ggml.dll
+      - name: Configure CMake
+        run: |
+          cmake -B build \
+            -DCMAKE_BUILD_TYPE=Release \
+            -DGGML_NATIVE=OFF \
+            -DGGML_AVX=ON \
+            -DGGML_AVX2=ON \
+            -DGGML_FMA=ON \
+            -DGGML_AVX512=OFF \
+            -DCMAKE_C_FLAGS="-march=x86-64-v3" \
+            -DCMAKE_CXX_FLAGS="-march=x86-64-v3" \
+            -DGGML_VULKAN=ON \
+            -DWHISPER_BUILD_EXAMPLES=ON \
+            -DWHISPER_BUILD_TESTS=OFF \
+            -DWHISPER_BUILD_SERVER=ON
 
-      - name: Upload ggml base dll
-        uses: actions/upload-artifact@v6
-        with:
-          name: ggml_base_${{ matrix.arch }}.dll
-          path: build/bin/${{ matrix.build }}/ggml-base.dll
+      - name: Build
+        run: cmake --build build --config Release -j$(nproc)
 
-      - name: Upload ggml cpu dll
-        uses: actions/upload-artifact@v6
-        with:
-          name: ggml_cpu_${{ matrix.arch }}.dll
-          path: build/bin/${{ matrix.build }}/ggml-cpu.dll
+      - name: Validate Vulkan artifacts
+        run: |
+          VFILES=$(find build -type f \( -iname "*vulkan*.so*" -o -iname "*vulkan*" \) 2>/dev/null | wc -l)
+          if [ "$VFILES" -eq 0 ]; then
+            echo "::warning::No Vulkan-related artifacts found"
+          else
+            echo "Vulkan artifacts found: $VFILES file(s)"
+          fi
 
-      - name: Pack bin artifacts
-        shell: pwsh
+      - name: Package
         run: |
-              Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-bin-${{ matrix.arch }}.zip"
+          VER="${{ needs.determine-tag.outputs.version }}"
+          ARCHIVE="whisper-${VER}-linux-vulkan-x86_64.tar.gz"
+          STAGE="whisper-${VER}-linux-vulkan-x86_64"
+          mkdir -p "$STAGE"
+          cp -r build/bin/* "$STAGE/" 2>/dev/null || true
+          find build -name "*.so*" -exec cp {} "$STAGE/" \; 2>/dev/null || true
+          tar -czf "$ARCHIVE" "$STAGE"
+          echo "ARCHIVE=$ARCHIVE" >> $GITHUB_ENV
 
-      - name: Upload binaries
-        if: matrix.sdl2 == 'ON' && ${{ needs.determine-tag.outputs.should_release }}
-        uses: actions/upload-artifact@v6
+      - uses: actions/upload-artifact@v4
         with:
-          name: whisper-bin-${{ matrix.arch }}.zip
-          path: whisper-bin-${{ matrix.arch }}.zip
+          name: ${{ env.ARCHIVE }}
+          path: ${{ env.ARCHIVE }}
 
-  windows-blas:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
+# ════════════════════════════════════════════════════════════════════════════════
+# 5. Vulkan — Windows
+# ════════════════════════════════════════════════════════════════════════════════
+  windows-vulkan:
     runs-on: windows-latest
-
-    strategy:
-      matrix:
-        build: [Release]
-        arch: [Win32, x64]
-        blas: [ON]
-        sdl2: [ON]
-        blasver: [0.3.29]
-        include:
-          - arch: Win32
-            s2arc: x86
-            blasfile: x86
-          - arch: x64
-            s2arc: x64
-            blasfile: x64_64
-          - sdl2: ON
-            s2ver: 2.28.5
+    needs: determine-tag
 
     steps:
-      - name: Clone
-        uses: actions/checkout@v6
-
-      - name: Export GitHub Actions cache environment variables
-        uses: actions/github-script@v8
-        with:
-          script: |
-            core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
-            core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
+      - uses: actions/checkout@v4
 
-      - name: Add msbuild to PATH
-        uses: microsoft/setup-msbuild@v2
+      - uses: microsoft/setup-msbuild@v2
 
-      - name: Install OpenBLAS and pkgconfiglite
-        if: matrix.blas == 'ON'
+      - name: Install Vulkan SDK
+        shell: pwsh
         run: |
-          Invoke-WebRequest "https://github.com/OpenMathLib/OpenBLAS/releases/download/v${{matrix.blasver}}/OpenBLAS-${{matrix.blasver}}_${{matrix.blasfile}}.zip" -OutFile "OpenBLAS-${{matrix.blasver}}.zip"
-          Expand-Archive "OpenBLAS-${{matrix.blasver}}.zip" -DestinationPath "OpenBLAS-${{matrix.blasver}}"
-          choco install pkgconfiglite
+          winget install --id KhronosGroup.VulkanSDK -e --silent --accept-package-agreements --accept-source-agreements
+          $sdk = Get-ChildItem "C:\VulkanSDK" -ErrorAction SilentlyContinue | Select-Object -First 1
+          if (-not $sdk) { throw "Vulkan SDK not found under C:\VulkanSDK" }
+          "VULKAN_SDK=$($sdk.FullName)" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
 
-      - name: Fetch SDL2 and set SDL2_DIR
-        if: matrix.sdl2 == 'ON'
+      - name: Fetch SDL2
+        shell: pwsh
         run: |
-          C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
+          C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-2.28.5/SDL2-devel-2.28.5-VC.zip
           7z x sdl2.zip
-          echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
-
-      - name: Configure
-        run: >
-          cmake -S . -B ./build -A ${{ matrix.arch }}
-          -DCMAKE_TOOLCHAIN_FILE="$env:VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake"
-          -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-          -DGGML_BLAS=${{ matrix.blas }}
-          -DGGML_BLAS_VENDOR=OpenBLAS
-          -DBLAS_LIBRARIES="$env:GITHUB_WORKSPACE/OpenBLAS-${{matrix.blasver}}/lib/libopenblas.lib"
-          -DBLAS_INCLUDE_DIRS="$env:GITHUB_WORKSPACE/OpenBLAS-${{matrix.blasver}}/include"
-          -DWHISPER_SDL2=${{ matrix.sdl2 }}
+          "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-2.28.5/cmake" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
 
-      - name: Build
+      - name: Configure CMake
+        shell: pwsh
         run: |
-          cd ./build
-          msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
+          cmake -S . -B ./build -A x64 `
+            -DCMAKE_BUILD_TYPE=Release `
+            -DBUILD_SHARED_LIBS=ON `
+            -DGGML_VULKAN=ON `
+            -DWHISPER_BUILD_SERVER=ON `
+            -DWHISPER_SDL2=ON `
+            -DVULKAN_SDK="$env:VULKAN_SDK"
 
-      - name: Copy openblas.dll
-        if: matrix.blas == 'ON'
-        run: copy "$env:GITHUB_WORKSPACE/OpenBLAS-${{matrix.blasver}}/bin/libopenblas.dll" build/bin/${{ matrix.build }}
+      - name: Build
+        run: cd ./build && msbuild ALL_BUILD.vcxproj -t:build -p:configuration=Release -p:platform=x64
 
       - name: Copy SDL2.dll
-        if: matrix.sdl2 == 'ON'
-        run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
+        shell: pwsh
+        run: copy "$env:SDL2_DIR/../lib/x64/SDL2.dll" build/bin/Release
 
-      - name: Pack bin artifacts
+      - name: Package
         shell: pwsh
         run: |
-              Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-blas-bin-${{ matrix.arch }}.zip"
+          $a = "whisper-${{ needs.determine-tag.outputs.version }}-windows-vulkan-x64.zip"
+          Compress-Archive -Path "build/bin/Release/*" -DestinationPath $a -Force
+          "ARCHIVE=$a" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
 
-      - name: Upload binaries
-        if: matrix.blas == 'ON' && matrix.sdl2 == 'ON' && ${{ needs.determine-tag.outputs.should_release }}
-        uses: actions/upload-artifact@v6
+      - uses: actions/upload-artifact@v4
         with:
-          name: whisper-blas-bin-${{ matrix.arch }}.zip
-          path: whisper-blas-bin-${{ matrix.arch }}.zip
-
-  windows-cublas:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
-    runs-on: windows-2022
+          name: ${{ env.ARCHIVE }}
+          path: ${{ env.ARCHIVE }}
+
+# ════════════════════════════════════════════════════════════════════════════════
+# 6. NPU (VitisAI / RyzenAI) — Windows only (self-hosted runner)
+# ════════════════════════════════════════════════════════════════════════════════
+  windows-npu:
+    runs-on: [self-hosted, Windows, stx, rai300_400]
     needs: determine-tag
-    strategy:
-      fail-fast: false
-      matrix:
-        build: [Release]
-        arch: [x64]
-        cublas: [ON]
-        sdl2: [ON]
-        cuda-toolkit: [12.4.0, 11.8.0]
-        include:
-          - arch: x64
-            sdl2: ON
-            sdl2_ver: 2.28.5
-    steps:
-      - name: Clone repository
-        uses: actions/checkout@v6
-
-      - name: Install Ninja
-        id: install_ninja
-        run: |
-          choco install ninja
-
-      - name: Install ccache
-        uses: hendrikmuhs/ccache-action@v1.2.16
-        with:
-          key: ${{ github.job }}-${{ matrix.cuda-toolkit }}-${{ matrix.build }}
-          variant: sccache
-          evict-old-files: 5d
-
-      - name: Install Cuda Toolkit 11.8.0
-        if: ${{ matrix.cuda-toolkit == '11.8.0' }}
-        run: |
-          $CUDA_VERSION = ${{ matrix.cuda-toolkit }}
-          $CUDA_TOOLKIT_DIR = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_VERSION"
-          $CUDA_DOWNLOAD = "https://developer.download.nvidia.com/compute/cuda/redist"
-
-          # Components versions
-          $CUDART_VER = "11.8.89"
-          $NVCC_VER   = "11.8.89"
-          $NVRTC_VER  = "11.8.89"
-          $CUBLAS_VER = "11.8.1.74"
-          $NVTX_VER   = "11.8.86"
-          $VS_VER     = "11.8.86"
-          $NVPROF_VER = "11.8.87"
-          $CCCL_VER   = "11.8.89"
-
-          # Create the directory where the CUDA Toolkit will be installed
-          mkdir -p $CUDA_TOOLKIT_DIR
-
-          # Install unzip to extract the downloaded files
-          choco install unzip -y
-
-          # Download all the required components
-          curl -O "$CUDA_DOWNLOAD/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-${CUDART_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-${NVCC_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/libcublas/windows-x86_64/libcublas-windows-x86_64-${CUBLAS_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-${NVTX_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-${VS_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-${CCCL_VER}-archive.zip"
-
-          # Extract all the downloaded files to the CUDA Toolkit directory
-          unzip '*.zip' -d $CUDA_TOOLKIT_DIR
-
-          # Copy all the extracted files to the main CUDA Toolkit directory
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_cudart-windows-x86_64-${CUDART_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_nvcc-windows-x86_64-${NVCC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\libcublas-windows-x86_64-${CUBLAS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_nvtx-windows-x86_64-${NVTX_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_cccl-windows-x86_64-${CCCL_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-
-          # Visual Studio integration
-          xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\v170\BuildCustomizations" /E /I /H /Y
-
-          # Set environment variables
-          echo "$CUDA_TOOLKIT_DIR\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-          echo "$CUDA_TOOLKIT_DIR\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-          echo "CUDA_PATH=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
-          echo "CUDA_PATH_V11_8=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
-
-      - name: Install Cuda Toolkit 12.4.0
-        if: ${{ matrix.cuda-toolkit == '12.4.0' }}
-        run: |
-          $CUDA_VERSION = ${{ matrix.cuda-toolkit }}
-          $CUDA_TOOLKIT_DIR = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_VERSION"
-          $CUDA_DOWNLOAD = "https://developer.download.nvidia.com/compute/cuda/redist"
-
-          # Components versions
-          $CUDART_VER   = "12.4.127"
-          $NVCC_VER     = "12.4.131"
-          $NVRTC_VER    = "12.4.127"
-          $CUBLAS_VER   = "12.4.5.8"
-          $NVTX_VER     = "12.4.127"
-          $PROFILER_VER = "12.4.127"
-          $VS_VER       = "12.4.127"
-          $NVPROF_VER   = "12.4.128"
-          $CCCL_VER     = "12.4.127"
-
-          # Create the directory where the CUDA Toolkit will be installed
-          mkdir -p $CUDA_TOOLKIT_DIR
-
-          # Install unzip to extract the downloaded files
-          choco install unzip -y
-
-          # Download all the required components
-          curl -O "$CUDA_DOWNLOAD/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-${CUDART_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-${NVCC_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/libcublas/windows-x86_64/libcublas-windows-x86_64-${CUBLAS_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-${NVTX_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-${PROFILER_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-${VS_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive.zip"
-          curl -O "$CUDA_DOWNLOAD/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-${CCCL_VER}-archive.zip"
-
-          # Extract all the downloaded files to the CUDA Toolkit directory
-          unzip -q '*.zip' -d $CUDA_TOOLKIT_DIR
-
-          # Copy all the extracted files to the main CUDA Toolkit directory
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_cudart-windows-x86_64-${CUDART_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_nvcc-windows-x86_64-${NVCC_VER}-archive\*"     "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive\*"   "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\libcublas-windows-x86_64-${CUBLAS_VER}-archive\*"   "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_nvtx-windows-x86_64-${NVTX_VER}-archive\*"     "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_cccl-windows-x86_64-${CCCL_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\cuda_profiler_api-windows-x86_64-${PROFILER_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-          xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y
-
-          # Visual Studio integration
-          xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\v170\BuildCustomizations" /E /I /H /Y
-
-          # Set environment variables
-          echo "$CUDA_TOOLKIT_DIR\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-          echo "$CUDA_TOOLKIT_DIR\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-          echo "CUDA_PATH=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
-          echo "CUDA_PATH_V12_2=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
-
-      - name: Add msbuild to PATH
-        uses: microsoft/setup-msbuild@v2
-
-      - name: Install 7-Zip
-        run: choco install 7zip -y
-
-      - name: Fetch SDL2 and set SDL2_DIR
-        if: matrix.sdl2 == 'ON'
-        run: |
-          Invoke-WebRequest -Uri https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.sdl2_ver }}/SDL2-devel-${{ matrix.sdl2_ver }}-VC.zip -OutFile sdl2.zip
-          7z x sdl2.zip
-          echo "SDL2_DIR=${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" | Out-File -FilePath $env:GITHUB_ENV -Append
-          echo "${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" > SDL2_PATH.txt
-
-      - name: Install cmake
-        run: choco install cmake
+    continue-on-error: true   # runner may be offline; don't block release
 
-      - name: Build Project
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: microsoft/setup-msbuild@v2
+
+      - name: Install CMake if not available
+        shell: powershell
+        run: |
+          $installed = Get-Command cmake -ErrorAction SilentlyContinue
+          if (-not $installed) {
+            $ver = "3.28.1"
+            $url = "https://github.com/Kitware/CMake/releases/download/v$ver/cmake-$ver-windows-x86_64.msi"
+            Invoke-WebRequest -Uri $url -OutFile cmake.msi
+            Start-Process msiexec.exe -ArgumentList "/i cmake.msi /quiet /norestart" -Wait
+            $p = "C:\Program Files\CMake\bin"
+            $env:PATH = "$p;$env:PATH"
+            echo $p >> $env:GITHUB_PATH
+            cmake --version
+            if ($LASTEXITCODE -ne 0) { Write-Error "CMake install failed"; exit 1 }
+          } else { cmake --version }
+
+      - name: Download FlexML Runtime
+        shell: powershell
+        run: |
+          Invoke-WebRequest -Uri "${{ env.FLEXML_URL }}" -OutFile flexmlrt.zip
+          if (-Not (Test-Path "flexmlrt.zip")) { Write-Error "flexmlrt.zip not downloaded"; exit 1 }
+          if ((Get-Item "flexmlrt.zip").Length -eq 0) { Write-Error "flexmlrt.zip is empty"; exit 1 }
+          Write-Host "FlexML: $([math]::Round((Get-Item 'flexmlrt.zip').Length/1MB,2)) MB downloaded"
+
+      - name: Extract FlexML Runtime
+        shell: powershell
+        run: |
+          tar xvf flexmlrt.zip
+          if ($LASTEXITCODE -ne 0) { Write-Error "Extraction failed"; exit 1 }
+          $dirs = Get-ChildItem -Directory | Where-Object { $_.Name -like "flexmlrt*" }
+          if (-not $dirs) { Write-Error "No flexmlrt directory found after extraction"; exit 1 }
+          Write-Host "Extracted: $($dirs.Name)"
+
+      - name: Setup FlexML, configure and build
         shell: cmd
         run: |
-          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
-          cmake --version
-          where cmake
-          if "${{ matrix.cuda-toolkit }}" == "11.8.0" (
-            set CUDA_FLAGS=-allow-unsupported-compiler -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH -D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR
-          ) else (
-            set CUDA_FLAGS=
-          )
-          cmake -S . -B build -G "Ninja Multi-Config" ^
-            -DCMAKE_BUILD_TYPE=${{ matrix.build }} ^
-            -DGGML_CUDA=${{ matrix.cublas }} ^
-            -DWHISPER_SDL2=${{ matrix.sdl2 }} ^
-            -DSDL2_DIR="%SDL2_DIR%" ^
-            -DCMAKE_POLICY_VERSION_MINIMUM=3.5 ^
-            -DCMAKE_CUDA_FLAGS="%CUDA_FLAGS%"
-          set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
-          cmake --build build --config ${{ matrix.build }} -j %NUMBER_OF_PROCESSORS%
-
-      - name: Check sccache status after build
-        run: |
-          sccache --show-stats
-
-      - name: Copy CUDA DLLs
-        run: |
-          Get-ChildItem "$env:CUDA_PATH\bin\" -Filter "*.dll" |
-          Copy-Item -Destination "build/bin/${{ matrix.build }}"
-
-      - name: Copy SDL2.dll
-        if: matrix.sdl2 == 'ON'
-        run: copy "$env:SDL2_DIR/../lib/${{ matrix.arch }}/SDL2.dll" build/bin/${{ matrix.build }}
-
-      - name: Pack bin artifacts
-        shell: pwsh
-        run: |
-              Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip"
-
-      - name: Upload binaries
-        if: ${{ needs.determine-tag.outputs.should_release }}
-        uses: actions/upload-artifact@v6
+          cd flexmlrt
+          call setup.bat
+          if errorlevel 1 ( echo ERROR: FlexML setup.bat failed! & exit /b 1 )
+          cd ..
+          cmake -B build -A x64 -DCMAKE_BUILD_TYPE=Release -DWHISPER_VITISAI=ON -DWHISPER_BUILD_SERVER=ON
+          if errorlevel 1 ( echo ERROR: CMake configure failed! & exit /b 1 )
+          cmake --build build --config Release -j
+          if errorlevel 1 ( echo ERROR: Build failed! & exit /b 1 )
+
+      - name: List build output
+        shell: powershell
+        run: |
+          if (Test-Path "build/bin/Release") {
+            Get-ChildItem -Path "build/bin/Release" -Recurse | Format-Table Name, Length
+          } else { Write-Error "build/bin/Release not found"; exit 1 }
+
+      - name: Copy FlexML DLLs to build output
+        shell: powershell
+        run: |
+          $copied = 0
+          if (Test-Path "flexmlrt/bin") {
+            $d = Get-ChildItem -Path "flexmlrt/bin/*.dll" -ErrorAction SilentlyContinue
+            if ($d) { Copy-Item "flexmlrt/bin/*.dll" "build/bin/Release/" -Force; $copied += $d.Count }
+          }
+          if (Test-Path "flexmlrt/lib") {
+            $d = Get-ChildItem -Path "flexmlrt/lib/*.dll" -ErrorAction SilentlyContinue
+            if ($d) { Copy-Item "flexmlrt/lib/*.dll" "build/bin/Release/" -Force; $copied += $d.Count }
+          }
+          Write-Host "FlexML DLLs copied: $copied"
+
+      - name: Package
+        shell: powershell
+        run: |
+          $a = "whisper-${{ needs.determine-tag.outputs.version }}-windows-npu-x64.zip"
+          Compress-Archive -Path "build/bin/Release/*" -DestinationPath $a -Force
+          if (-not (Test-Path $a)) { Write-Error "Package creation failed"; exit 1 }
+          $mb = [math]::Round((Get-Item $a).Length/1MB,2)
+          Write-Host "Package: $a ($mb MB)"
+          "ARCHIVE=$a" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
+
+      - name: Build summary
+        shell: powershell
+        run: |
+          Write-Host "NPU build complete. Artifact: $env:ARCHIVE"
+
+      - uses: actions/upload-artifact@v4
         with:
-          name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip
-          path: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip
-
-  emscripten:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
-    runs-on: ubuntu-22.04
+          name: ${{ env.ARCHIVE }}
+          path: ${{ env.ARCHIVE }}
 
-    strategy:
-      matrix:
-        build: [Release]
+# ════════════════════════════════════════════════════════════════════════════════
+# 7. Metal — macOS (arm64)
+# ════════════════════════════════════════════════════════════════════════════════
+  macos-metal:
+    runs-on: macos-latest
+    needs: determine-tag
 
     steps:
-      - name: Clone
-        uses: actions/checkout@v6
+      - uses: actions/checkout@v4
 
-      - name: Setup emsdk
-        uses: mymindstorm/setup-emsdk@v14
+      - name: Install dependencies
+        run: brew install cmake ninja
 
-      - name: Verify
-        run: emcc -v
+      - name: Configure CMake
+        run: |
+          cmake -B build \
+            -DCMAKE_BUILD_TYPE=Release \
+            -DCMAKE_MACOSX_RPATH=ON \
+            -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
+            -DCMAKE_INSTALL_RPATH="@loader_path" \
+            -DGGML_METAL=ON \
+            -DWHISPER_BUILD_EXAMPLES=ON \
+            -DWHISPER_BUILD_TESTS=OFF \
+            -DWHISPER_BUILD_SERVER=ON
 
       - name: Build
         run: |
-          emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-          make
+          cmake --build build --config Release -j$(sysctl -n hw.logicalcpu) > build.log 2>&1
+          exit_code=$?
+          grep -E "error:|FAILED|Linking|Built target" build.log || true
+          if [ $exit_code -ne 0 ]; then
+            tail -100 build.log
+            exit $exit_code
+          fi
+          echo "Build succeeded."
 
-  ios-xcode-build:
-    runs-on: macos-latest
-    needs: determine-tag
+      - name: Verify build output
+        run: |
+          if [ ! -f build/bin/whisper-cli ]; then
+            echo "::error::whisper-cli not found"
+            ls -lh build/bin/ 2>/dev/null || true
+            exit 1
+          fi
 
-    strategy:
-      matrix:
-        build: [Release]
+          if [ ! -f build/bin/whisper-server ]; then
+            echo "::error::whisper-server not found"
+            ls -lh build/bin/ 2>/dev/null || true
+            exit 1
+          fi
 
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v6
+          echo "--- build/bin ---"
+          ls -lh build/bin/
 
-      - name: Configure
-        run: |
-          cp models/for-tests-ggml-base.en.bin models/ggml-base.en.bin
-          mkdir models/ggml-base.en-encoder.mlmodelc
+          echo "--- macOS dylibs produced by build ---"
+          find build -name "*.dylib" -print | sort
 
-      - name: Build
-        id: cmake_build
-        run: |
-          sysctl -a
-          mkdir build
-          cd build
-          cmake -G Xcode .. \
-            -DGGML_METAL_USE_BF16=ON \
-            -DGGML_METAL_EMBED_LIBRARY=ON \
-            -DWHISPER_BUILD_EXAMPLES=OFF \
-            -DWHISPER_BUILD_TESTS=OFF \
-            -DWHISPER_BUILD_SERVER=OFF \
-            -DCMAKE_SYSTEM_NAME=iOS \
-            -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
-            -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
-          cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
+          echo "--- whisper-server dependencies before packaging ---"
+          otool -L build/bin/whisper-server
 
-      - name: xcodebuild for swift package
-        id: xcodebuild
+      - name: Package
         run: |
-          ./build-xcframework.sh
+          set -euo pipefail
 
-      - name: Build objc example
-        run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO FRAMEWORK_FOLDER_PATH=./build-ios build
+          VER="${{ needs.determine-tag.outputs.version }}"
+          ARCHIVE="whisper-${VER}-darwin-metal-arm64.tar.gz"
+          STAGE="whisper-${VER}-darwin-metal-arm64"
 
-      - name: Build swiftui example
-        run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
+          rm -rf "$STAGE" "$ARCHIVE"
+          mkdir -p "$STAGE"
 
-      - name: Pack artifacts
-        id: pack_artifacts
-        run: |
-          zip --symlinks -r whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip build-apple/whisper.xcframework
+          cp -R build/bin/* "$STAGE/" 2>/dev/null || true
 
-      - name: Upload artifacts
-        if: ${{ needs.determine-tag.outputs.should_release }}
-        uses: actions/upload-artifact@v6
-        with:
-          path: whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip
-          name: whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip
+          # whisper-server depends on libwhisper / ggml dylibs that CMake may
+          # leave under build/src and build/ggml/src rather than build/bin.
+          # Package all produced dylibs next to the executables so @loader_path
+          # can resolve them on downstream machines and GitHub macOS runners.
+          while IFS= read -r lib; do
+            cp -P "$lib" "$STAGE/"
+          done < <(find build -name "*.dylib" -print | sort)
 
-  android:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
-    runs-on: ubuntu-22.04
+          # Make dylib lookup portable inside the extracted archive.
+          for target in "$STAGE"/whisper-* "$STAGE"/*.dylib; do
+            [ -e "$target" ] || continue
 
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-        with:
-          path: whisper
+            install_name_tool -add_rpath "@loader_path" "$target" 2>/dev/null || true
 
-      - name: Install Java
-        uses: actions/setup-java@v5
-        with:
-          distribution: zulu
-          java-version: 21
+            if [ "${target##*.}" = "dylib" ] && [ ! -L "$target" ]; then
+              install_name_tool -id "@rpath/$(basename "$target")" "$target" 2>/dev/null || true
+            fi
 
-      - name: Setup Android SDK
-        uses: android-actions/setup-android@v3
+            while IFS= read -r dep; do
+              case "$dep" in
+                "$PWD"/build/*|/Users/runner/work/whisper.cpp-rocm/*)
+                  install_name_tool -change "$dep" "@rpath/$(basename "$dep")" "$target" 2>/dev/null || true
+                  ;;
+              esac
+            done < <(otool -L "$target" 2>/dev/null | awk 'NR > 1 {print $1}')
+          done
 
-      - name: Build
-        run: |
-          cd whisper/examples/whisper.android
-          ./gradlew assembleRelease --no-daemon
+          echo "--- packaged files ---"
+          find "$STAGE" -maxdepth 1 -type f -o -type l | sort
 
-      - name: Build with external ggml
-        run: |
-          export PATH_TO_GGML=$PWD/ggml
-          cd whisper/examples/whisper.android
-          ./gradlew assembleRelease --no-daemon
+          echo "--- whisper-server dependencies after packaging ---"
+          otool -L "$STAGE/whisper-server"
+          otool -l "$STAGE/whisper-server" | grep -A2 LC_RPATH || true
 
-  android_java:
-    runs-on: ubuntu-22.04
+          if otool -L "$STAGE/whisper-server" | grep -q "/Users/runner/work/whisper.cpp-rocm"; then
+            echo "::error::whisper-server still references non-portable build paths"
+            exit 1
+          fi
 
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
+          if ! find "$STAGE" -maxdepth 1 \( -type f -o -type l \) -name "libwhisper*.dylib" | grep -q .; then
+            echo "::error::packaged archive is missing libwhisper dylib"
+            exit 1
+          fi
 
-      - name: set up JDK 11
-        uses: actions/setup-java@v5
-        with:
-          java-version: '11'
-          distribution: 'temurin'
-          cache: gradle
+          set +e
+          DYLD_LIBRARY_PATH="$PWD/$STAGE" "$STAGE/whisper-server" --help > whisper-server-smoke.log 2>&1
+          smoke_status=$?
+          set -e
 
-      - name: Setup Android SDK
-        uses: android-actions/setup-android@v3
-        with:
-          cmdline-tools-version: 9.0
+          cat whisper-server-smoke.log
 
-      - name: Build
-        run: |
-          cd examples/whisper.android.java
-          chmod +x ./gradlew
-          ./gradlew assembleRelease
+          if grep -q "Library not loaded" whisper-server-smoke.log; then
+            echo "::error::whisper-server has unresolved dylib dependencies"
+            exit 1
+          fi
 
-  bindings-java:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
-    needs: ['windows']
-    runs-on: windows-latest
-    steps:
-      - uses: actions/checkout@v6
+          echo "whisper-server smoke command exited with status ${smoke_status}"
 
-      - name: Install Java
-        uses: actions/setup-java@v5
-        with:
-          distribution: zulu
-          java-version: 20
+          tar -czf "$ARCHIVE" "$STAGE"
+          echo "ARCHIVE=$ARCHIVE" >> "$GITHUB_ENV"
 
-      - name: Download Whisper Windows lib
-        uses: actions/download-artifact@v7
+      - uses: actions/upload-artifact@v4
         with:
-          name: whisper_x64.dll
+          name: ${{ env.ARCHIVE }}
+          path: ${{ env.ARCHIVE }}
 
-      - name: Download GGML Windows lib
-        uses: actions/download-artifact@v7
-        with:
-          name: ggml_x64.dll
+# ════════════════════════════════════════════════════════════════════════════════
+# 8. CPU — Linux
+# ════════════════════════════════════════════════════════════════════════════════
+  linux-cpu:
+    runs-on: ubuntu-latest
+    needs: determine-tag
 
-      - name: Download GGML Base Windows lib
-        uses: actions/download-artifact@v7
-        with:
-          name: ggml_base_x64.dll
+    steps:
+      - uses: actions/checkout@v4
 
-      - name: Download GGML CPU Windows lib
-        uses: actions/download-artifact@v7
-        with:
-          name: ggml_cpu_x64.dll
+      - name: Install dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y build-essential cmake git libsdl2-dev pkg-config
+          echo "cmake $(cmake --version | head -1)"
+          echo "gcc $(gcc --version | head -1)"
+
+      - name: Configure CMake
+        run: |
+          cmake -B build \
+            -DCMAKE_BUILD_TYPE=Release \
+            -DGGML_NATIVE=OFF \
+            -DGGML_AVX=ON \
+            -DGGML_AVX2=ON \
+            -DGGML_FMA=ON \
+            -DGGML_AVX512=OFF \
+            -DCMAKE_C_FLAGS="-march=x86-64-v3" \
+            -DCMAKE_CXX_FLAGS="-march=x86-64-v3" \
+            -DWHISPER_BUILD_EXAMPLES=ON \
+            -DWHISPER_BUILD_TESTS=OFF \
+            -DWHISPER_BUILD_SERVER=ON
 
-      - name: Download SDL2.dll
-        uses: actions/download-artifact@v7
-        with:
-          name: x64_SDL2.dll
+      - name: Build
+        run: cmake --build build --config Release -j$(nproc)
 
-      - name: List downloaded files
-        shell: pwsh
+      - name: List build output
         run: |
-          Get-ChildItem -Path "." -Recurse -Filter "*.dll"
+          find build/bin -type f | sort
+          find build/bin -type f -executable | while read f; do ls -lh "$f"; done
 
-      - name: Move DLL to correct location
-        shell: pwsh
+      - name: Package
         run: |
-          New-Item -Path "build\bin\Release" -ItemType Directory -Force
-
-          Copy-Item -Path "whisper.dll" -Destination "build\bin\Release\whisper.dll" -Force
-          Write-Host "Copied whisper.dll to build\bin\Release\whisper.dll directory"
+          VER="${{ needs.determine-tag.outputs.version }}"
+          ARCHIVE="whisper-${VER}-linux-cpu-x86_64.tar.gz"
+          STAGE="whisper-${VER}-linux-cpu-x86_64"
+          mkdir -p "$STAGE"
+          cp -r build/bin/* "$STAGE/" 2>/dev/null || true
+          find build -name "*.so*" -exec cp {} "$STAGE/" \; 2>/dev/null || true
+          printf "whisper.cpp CPU build for Linux\nDate: %s\nArch: %s\n" \
+            "$(date -u +"%Y-%m-%d %H:%M:%S UTC")" "$(uname -m)" > "$STAGE/README.txt"
+          tar -czf "$ARCHIVE" "$STAGE"
+          echo "ARCHIVE=$ARCHIVE" >> $GITHUB_ENV
 
-          Copy-Item -Path "ggml.dll" -Destination "build\bin\Release\ggml.dll" -Force
-          Write-Host "Copied ggml.dll to build\bin\Release\ggml.dll directory"
+      - uses: actions/upload-artifact@v4
+        with:
+          name: ${{ env.ARCHIVE }}
+          path: ${{ env.ARCHIVE }}
 
-          Copy-Item -Path "ggml-base.dll" -Destination "build\bin\Release\ggml-base.dll" -Force
-          Write-Host "Copied ggml-base.dll to build\bin\Release\ggml-base.dll directory"
+# ════════════════════════════════════════════════════════════════════════════════
+# 8. CPU — Windows
+# ════════════════════════════════════════════════════════════════════════════════
+  windows-cpu:
+    runs-on: windows-latest
+    needs: determine-tag
 
-          Copy-Item -Path "ggml-cpu.dll" -Destination "build\bin\Release\ggml-cpu.dll" -Force
-          Write-Host "Copied ggml-cpu.dll to build\bin\Release\ggml-cpu.dll directory"
+    steps:
+      - uses: actions/checkout@v4
 
-          Copy-Item -Path "SDL2.dll" -Destination "build\bin\Release\SDL2.dll" -Force
-          Write-Host "Copied SDL2.dll to build\bin\Release\SDL2.dll directory"
+      - uses: microsoft/setup-msbuild@v2
 
-      - name: List build release files
+      - name: Fetch SDL2
         shell: pwsh
         run: |
-          Get-ChildItem -Path "build\Release" -Recurse -Filter "*.dll"
-
-      - name: Build
-        run: |
-          models\download-ggml-model.cmd tiny.en models/
-          cd bindings/java
-          chmod +x ./gradlew
-          ./gradlew build --info
+          C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-2.28.5/SDL2-devel-2.28.5-VC.zip
+          7z x sdl2.zip
+          "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-2.28.5/cmake" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
 
-      - name: Pack jar artifacts
+      - name: Configure CMake
         shell: pwsh
         run: |
-              Compress-Archive -Path "bindings/java/build/libs/whispercpp-*.jar" -DestinationPath "whispercpp.jar.zip"
+          cmake -S . -B ./build -A x64 `
+            -DCMAKE_BUILD_TYPE=Release `
+            -DGGML_NATIVE=OFF `
+            -DGGML_AVX=ON `
+            -DGGML_AVX2=ON `
+            -DGGML_FMA=ON `
+            -DGGML_AVX512=OFF `
+            -DBUILD_SHARED_LIBS=ON `
+            -DWHISPER_BUILD_SERVER=ON `
+            -DWHISPER_SDL2=ON
 
-      - name: Upload jar
-        uses: actions/upload-artifact@v6
-        with:
-          name: whispercpp.jar.zip
-          path: whispercpp.jar.zip
-
-#      - name: Publish package
-#        if: ${{ github.ref == 'refs/heads/master' }}
-#        uses: gradle/gradle-build-action@v2.4.2
-#        with:
-#          arguments: publish
-#          build-root-directory: bindings/java
-#        env:
-#          MAVEN_USERNAME: ${{ secrets.JIRA_USER }}
-#          MAVEN_PASSWORD: ${{ secrets.JIRA_PASS }}
-#          PGP_SECRET: ${{ secrets.GPG_PRIVATE_KEY }}
-#          PGP_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
-
-  quantize:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
-    runs-on: ubuntu-22.04
+      - name: Build
+        run: cd ./build && msbuild ALL_BUILD.vcxproj -t:build -p:configuration=Release -p:platform=x64
 
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
+      - name: Copy SDL2.dll
+        shell: pwsh
+        run: copy "$env:SDL2_DIR/../lib/x64/SDL2.dll" build/bin/Release
 
-      - name: Test quantize
+      - name: Package
+        shell: pwsh
         run: |
-          ./models/download-ggml-model.sh tiny.en
-          cmake -B build
-          cmake --build build --config Release
-          ./build/bin/whisper-quantize models/ggml-tiny.en.bin models/ggml-tiny.en-q4_0.bin q4_0
-
-  release:
-    if: ${{ github.event.inputs.create_release == 'true' || github.event.inputs.pre_release_tag != '' || startsWith(github.ref, 'refs/tags/v') }}
-
-    runs-on: ubuntu-latest
-
-    needs:
-      - determine-tag
-      - ios-xcode-build
-      - windows
-      - windows-blas
-      - windows-cublas
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v6
-        with:
-          fetch-depth: 0
+          $a = "whisper-${{ needs.determine-tag.outputs.version }}-windows-cpu-x64.zip"
+          Compress-Archive -Path "build/bin/Release/*" -DestinationPath $a -Force
+          "ARCHIVE=$a" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
 
-      - name: ccache
-        uses: hendrikmuhs/ccache-action@v1.2.16
+      - uses: actions/upload-artifact@v4
         with:
-          key: release
-          evict-old-files: 1d
+          name: ${{ env.ARCHIVE }}
+          path: ${{ env.ARCHIVE }}
+
+# ════════════════════════════════════════════════════════════════════════════════
+# 9. Publish GitHub Release
+# ════════════════════════════════════════════════════════════════════════════════
+# Shared model download step (reused across all test jobs via inline steps)
+# Models: ggml-tiny.bin from HuggingFace ggerganov/whisper.cpp
+#         ggml-tiny-encoder-vitisai.rai from amd/whisper-tiny-onnx-npu
+# ════════════════════════════════════════════════════════════════════════════════
+
+# ════════════════════════════════════════════════════════════════════════════════
+# 9. Test — CPU Windows (GitHub-hosted, no GPU needed)
+# ════════════════════════════════════════════════════════════════════════════════
+  test-cpu-windows:
+    runs-on: windows-latest
+    needs: [determine-tag, windows-cpu]
+    if: needs.windows-cpu.result == 'success'
+    continue-on-error: true
+    steps:
+      - uses: actions/checkout@v4
 
-      # Downloads all the artifacts from the previous jobs
-      - name: Download artifacts
-        id: download-artifact
-        uses: actions/download-artifact@v7
+      - name: Download artifact
+        uses: actions/download-artifact@v4
         with:
+          name: whisper-${{ needs.determine-tag.outputs.version }}-windows-cpu-x64.zip
           path: ./artifact
 
-      - name: Move artifacts
-        id: move_artifacts
-        run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
-
-      - name: Create release
-        id: create_release
-        uses: ggml-org/action-create-release@v1
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        with:
-          tag_name: ${{ needs.determine-tag.outputs.tag_name }}
-          prerelease: ${{ github.event.inputs.pre_release_tag != '' }}
-          draft: true
-
-      - name: Upload release
-        id: upload_release
-        uses: actions/github-script@v3
-        with:
-          github-token: ${{secrets.GITHUB_TOKEN}}
-          script: |
-            const path = require('path');
-            const fs = require('fs');
-            const release_id = '${{ steps.create_release.outputs.id }}';
-            for (let file of await fs.readdirSync('./artifact/release')) {
-              if (path.extname(file) === '.zip') {
-                console.log('uploadReleaseAsset', file);
-                await github.repos.uploadReleaseAsset({
-                  owner: context.repo.owner,
-                  repo: context.repo.repo,
-                  release_id: release_id,
-                  name: file,
-                  data: await fs.readFileSync(`./artifact/release/${file}`)
-                });
-              }
-            }
-
-  coreml-base-en:
-    if: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/master') ||
-            github.event.inputs.create_release == 'true' ||
-            github.event.inputs.pre_release_tag != '' ||
-            startsWith(github.ref, 'refs/tags/v') }}
-    runs-on: macos-latest
-    needs: determine-tag
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v6
-
-      - name: Set environment variables
-        id: set_vars
+      - name: Extract
+        shell: pwsh
         run: |
-          echo "MODEL_NAME=base.en" >> $GITHUB_ENV
-          echo "GEN_MODEL_NAME=whisper-${{ needs.determine-tag.outputs.tag_name }}-ggml-base.en-encoder.mlmodelc" >> $GITHUB_ENV
+          New-Item -ItemType Directory -Force -Path bin | Out-Null
+          Expand-Archive -Path (Get-ChildItem artifact -Filter "*.zip" | Select-Object -First 1).FullName -DestinationPath bin -Force
 
-      - name: Download model
+      - name: Download tiny model
+        shell: pwsh
         run: |
-          ./models/download-ggml-model.sh ${{ env.MODEL_NAME }}
+          New-Item -ItemType Directory -Force -Path models | Out-Null
+          Invoke-WebRequest -Uri "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -OutFile "models\ggml-tiny.bin"
 
-      - name: Generate CoreML model
+      - name: Run and verify
+        shell: pwsh
         run: |
-          python3.11 -m venv venv
-          source venv/bin/activate
-          pip install ane_transformers openai-whisper coremltools
-          ./models/generate-coreml-model.sh ${{ env.MODEL_NAME }}
+          .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result
+          $text = Get-Content "jfk-result.txt" -Raw
+          Write-Host $text
+          if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS" -ForegroundColor Green }
+          else { Write-Error "FAIL: expected words not found"; exit 1 }
 
-  vad:
-    if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' ||
-            github.event.inputs.run_type == 'full-ci' }}
+# ════════════════════════════════════════════════════════════════════════════════
+# 10. Test — CPU Linux (GitHub-hosted, no GPU needed)
+# ════════════════════════════════════════════════════════════════════════════════
+  test-cpu-linux:
     runs-on: ubuntu-latest
-
+    needs: [determine-tag, linux-cpu]
+    if: needs.linux-cpu.result == 'success'
+    continue-on-error: true
     steps:
-      - name: Checkout
-        uses: actions/checkout@v6
+      - uses: actions/checkout@v4
 
-      - name: Build
-        shell: bash
+      - name: Download artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: whisper-${{ needs.determine-tag.outputs.version }}-linux-cpu-x86_64.tar.gz
+          path: ./artifact
+
+      - name: Extract
         run: |
-          cmake -B build
-          cmake --build build --config Release
+          mkdir -p bin
+          tar -xzf artifact/*.tar.gz --strip-components=1 -C bin
+          chmod +x bin/whisper-cli
 
-      - name: Test
-        shell: bash
+      - name: Download tiny model
         run: |
-          ctest -R ^test-vad$ --test-dir build --output-on-failure -VV
+          mkdir -p models
+          curl -L -o models/ggml-tiny.bin "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin"
 
-# TODO: simplify the following workflows using a matrix
-  ggml-ci-x64-cpu-low-perf:
-    runs-on: ubuntu-22.04
+      - name: Run and verify
+        run: |
+          LD_LIBRARY_PATH=$(pwd)/bin:$LD_LIBRARY_PATH ./bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result
+          cat jfk-result.txt
+          grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt && echo "PASS" || { echo "FAIL"; exit 1; }
 
+# ════════════════════════════════════════════════════════════════════════════════
+# 11. Test — Vulkan Windows (stx-halo, has Vulkan driver)
+# ════════════════════════════════════════════════════════════════════════════════
+  test-vulkan-windows:
+    runs-on: [self-hosted, Windows, stx-halo]
+    needs: [determine-tag, windows-vulkan]
+    if: needs.windows-vulkan.result == 'success'
+    continue-on-error: true
     steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v6
+      - uses: actions/checkout@v4
 
-      - name: ccache
-        uses: ggml-org/ccache-action@v1.2.16
+      - name: Download artifact
+        uses: actions/download-artifact@v4
         with:
-          key: ggml-ci-x64-cpu-low-perf
-          evict-old-files: 1d
+          name: whisper-${{ needs.determine-tag.outputs.version }}-windows-vulkan-x64.zip
+          path: ./artifact
 
-      - name: Dependencies
-        id: depends
+      - name: Extract
+        shell: powershell
         run: |
-          sudo apt-get update
-          sudo apt-get install build-essential libcurl4-openssl-dev
+          New-Item -ItemType Directory -Force -Path bin | Out-Null
+          Expand-Archive -Path (Get-ChildItem artifact -Filter "*.zip" | Select-Object -First 1).FullName -DestinationPath bin -Force
 
-      - name: Test
-        id: ggml-ci
+      - name: Download tiny model
+        shell: powershell
         run: |
-          LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+          New-Item -ItemType Directory -Force -Path models | Out-Null
+          Invoke-WebRequest -Uri "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -OutFile "models\ggml-tiny.bin"
 
-  ggml-ci-arm64-cpu-low-perf:
-    runs-on: ubuntu-22.04-arm
+      - name: Run and verify
+        shell: powershell
+        run: |
+          .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result
+          $text = Get-Content "jfk-result.txt" -Raw
+          Write-Host $text
+          if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS" }
+          else { Write-Error "FAIL: expected words not found"; exit 1 }
 
+# ════════════════════════════════════════════════════════════════════════════════
+# 12. Test — Vulkan Linux (stx-halo, has Vulkan driver)
+# ════════════════════════════════════════════════════════════════════════════════
+  test-vulkan-linux:
+    runs-on: [self-hosted, Linux, stx-halo]
+    needs: [determine-tag, linux-vulkan]
+    if: needs.linux-vulkan.result == 'success'
+    continue-on-error: true
     steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v6
+      - uses: actions/checkout@v4
 
-      - name: ccache
-        uses: ggml-org/ccache-action@v1.2.16
+      - name: Download artifact
+        uses: actions/download-artifact@v4
         with:
-          key: ggml-ci-arm64-cpu-low-perf
-          evict-old-files: 1d
+          name: whisper-${{ needs.determine-tag.outputs.version }}-linux-vulkan-x86_64.tar.gz
+          path: ./artifact
 
-      - name: Dependencies
-        id: depends
+      - name: Extract
         run: |
-          sudo apt-get update
-          sudo apt-get install build-essential libcurl4-openssl-dev
+          mkdir -p bin
+          tar -xzf artifact/*.tar.gz --strip-components=1 -C bin
+          chmod +x bin/whisper-cli
 
-      - name: Test
-        id: ggml-ci
+      - name: Download tiny model
         run: |
-          LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+          mkdir -p models
+          curl -L -o models/ggml-tiny.bin "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin"
 
-  ggml-ci-x64-cpu-high-perf:
-    runs-on: ubuntu-22.04
+      - name: Run and verify
+        run: |
+          LD_LIBRARY_PATH=$(pwd)/bin:$LD_LIBRARY_PATH ./bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result
+          cat jfk-result.txt
+          grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt && echo "PASS" || { echo "FAIL"; exit 1; }
 
+# ════════════════════════════════════════════════════════════════════════════════
+# 13. Test — ROCm Windows (stx-halo, gfx1151)
+# ════════════════════════════════════════════════════════════════════════════════
+  test-rocm-windows:
+    runs-on: [self-hosted, Windows, stx-halo]
+    needs: [determine-tag, windows-rocm]
+    if: needs.windows-rocm.result == 'success'
+    continue-on-error: true
     steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v6
+      - uses: actions/checkout@v4
 
-      - name: ccache
-        uses: ggml-org/ccache-action@v1.2.16
+      - name: Download ROCm artifact (gfx1151 - stx-halo GPU target)
+        uses: actions/download-artifact@v4
         with:
-          key: ggml-ci-x64-cpu-high-perf
-          evict-old-files: 1d
+          name: whisper-${{ needs.determine-tag.outputs.version }}-windows-rocm-gfx1151.zip
+          path: ./artifact
 
-      - name: Dependencies
-        id: depends
+      - name: Extract
+        shell: powershell
         run: |
-          sudo apt-get update
-          sudo apt-get install build-essential libcurl4-openssl-dev
+          New-Item -ItemType Directory -Force -Path bin | Out-Null
+          Expand-Archive -Path (Get-ChildItem artifact -Filter "*.zip" | Select-Object -First 1).FullName -DestinationPath bin -Force
 
-      - name: Test
-        id: ggml-ci
+      - name: Download tiny model
+        shell: powershell
         run: |
-          LLAMA_ARG_THREADS=$(nproc) bash ./ci/run.sh ./tmp/results ./tmp/mnt
+          New-Item -ItemType Directory -Force -Path models | Out-Null
+          Invoke-WebRequest -Uri "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -OutFile "models\ggml-tiny.bin"
 
-  ggml-ci-arm64-cpu-high-perf:
-    runs-on: ubuntu-22.04-arm
+      - name: Run and verify
+        shell: powershell
+        run: |
+          .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result
+          $text = Get-Content "jfk-result.txt" -Raw
+          Write-Host $text
+          if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS" }
+          else { Write-Error "FAIL: expected words not found"; exit 1 }
 
+# ════════════════════════════════════════════════════════════════════════════════
+# 14. Test — ROCm Linux (stx-halo, gfx1151)
+# ════════════════════════════════════════════════════════════════════════════════
+  test-rocm-linux:
+    runs-on: [self-hosted, Linux, stx-halo]
+    needs: [determine-tag, linux-rocm]
+    if: needs.linux-rocm.result == 'success'
+    continue-on-error: true
     steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v6
+      - uses: actions/checkout@v4
 
-      - name: ccache
-        uses: ggml-org/ccache-action@v1.2.16
+      - name: Download ROCm artifact (gfx1151)
+        uses: actions/download-artifact@v4
         with:
-          key: ggml-ci-arm64-cpu-high-perf
-          evict-old-files: 1d
+          name: whisper-${{ needs.determine-tag.outputs.version }}-linux-rocm-gfx1151.tar.gz
+          path: ./artifact
 
-      - name: Dependencies
-        id: depends
+      - name: Extract
         run: |
-          sudo apt-get update
-          sudo apt-get install build-essential libcurl4-openssl-dev
+          mkdir -p bin
+          tar -xzf artifact/*.tar.gz --strip-components=1 -C bin
+          chmod +x bin/whisper-cli
 
-      - name: Test
-        id: ggml-ci
+      - name: Download tiny model
         run: |
-          LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+          mkdir -p models
+          curl -L -o models/ggml-tiny.bin "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin"
 
-  ggml-ci-arm64-cpu-high-perf-sve:
-    runs-on: ubuntu-22.04-arm
+      - name: Run and verify
+        run: |
+          LD_LIBRARY_PATH=$(pwd)/bin:$LD_LIBRARY_PATH ./bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result
+          cat jfk-result.txt
+          grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt && echo "PASS" || { echo "FAIL"; exit 1; }
 
+# ════════════════════════════════════════════════════════════════════════════════
+# 15. Test — NPU Windows (rai300_400 runner, needs FlexML + .rai model)
+# ════════════════════════════════════════════════════════════════════════════════
+  test-npu-windows:
+    runs-on: [self-hosted, Windows, stx, rai300_400]
+    needs: [determine-tag, windows-npu]
+    if: needs.windows-npu.result == 'success'
+    continue-on-error: true
     steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v6
+      - uses: actions/checkout@v4
 
-      - name: ccache
-        uses: ggml-org/ccache-action@v1.2.16
+      - name: Download NPU artifact
+        uses: actions/download-artifact@v4
         with:
-          key: ggml-ci-arm64-cpu-high-perf-sve
-          evict-old-files: 1d
+          name: whisper-${{ needs.determine-tag.outputs.version }}-windows-npu-x64.zip
+          path: ./artifact
 
-      - name: Dependencies
-        id: depends
+      - name: Extract
+        shell: powershell
         run: |
-          sudo apt-get update
-          sudo apt-get install build-essential libcurl4-openssl-dev
+          New-Item -ItemType Directory -Force -Path bin | Out-Null
+          Expand-Archive -Path (Get-ChildItem artifact -Filter "*.zip" | Select-Object -First 1).FullName -DestinationPath bin -Force
 
-      - name: Test
-        id: ggml-ci
+      - name: Download FlexML Runtime and setup environment
+        shell: powershell
         run: |
-          LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
-
-  ggml-ci-x64-nvidia-cuda:
-    runs-on: [self-hosted, Linux, mnt-root, NVIDIA]
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v6
+          Invoke-WebRequest -Uri "${{ env.FLEXML_URL }}" -OutFile flexmlrt.zip
+          tar xvf flexmlrt.zip
+          Remove-Item flexmlrt.zip
 
-      - name: Test
-        id: ggml-ci
+      - name: Setup FlexML environment
+        shell: cmd
         run: |
-          nvidia-smi
-          GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp
-
-  ggml-ci-x64-nvidia-vulkan-cm:
-    runs-on: [self-hosted, Linux, mnt-root, NVIDIA]
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v6
+          cd flexmlrt
+          call setup.bat
+          if errorlevel 1 ( echo ERROR: FlexML setup failed! & exit /b 1 )
 
-      - name: Test
-        id: ggml-ci
+      - name: Copy FlexML DLLs to bin
+        shell: powershell
         run: |
-          vulkaninfo --summary
-          GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp
-
-  ggml-ci-x64-nvidia-vulkan-cm2:
-    runs-on: [self-hosted, Linux, mnt-root, NVIDIA, COOPMAT2]
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v6
+          $flexml = Get-ChildItem -Directory | Where-Object { $_.Name -like "flexmlrt*" } | Select-Object -First 1
+          foreach ($sub in @("bin","lib")) {
+            $path = Join-Path $flexml.FullName $sub
+            if (Test-Path $path) {
+              Get-ChildItem "$path\*.dll" -ErrorAction SilentlyContinue |
+                ForEach-Object { Copy-Item $_.FullName "bin\" -Force }
+            }
+          }
 
-      - name: Test
-        id: ggml-ci
+      - name: Download models (ggml weights + .rai NPU encoder)
+        shell: powershell
         run: |
-          vulkaninfo --summary
-          GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp
-
-  #ggml-ci-x64-cpu-amx:
-  #  runs-on: [self-hosted, Linux, X64, CPU, AMX]
+          New-Item -ItemType Directory -Force -Path models | Out-Null
+          Invoke-WebRequest -Uri "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -OutFile "models\ggml-tiny.bin"
+          Invoke-WebRequest -Uri "https://huggingface.co/amd/whisper-tiny-onnx-npu/resolve/main/ggml-tiny-encoder-vitisai.rai" -OutFile "models\ggml-tiny-encoder-vitisai.rai"
+          Write-Host "Models:"
+          Get-ChildItem models | Format-Table Name, Length
 
-  #  steps:
-  #    - name: Clone
-  #      id: checkout
-  #      uses: actions/checkout@v6
-
-  #    - name: Test
-  #      id: ggml-ci
-  #      run: |
-  #        bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp
+      - name: Run and verify (NPU encoder + CPU decoder)
+        shell: powershell
+        run: |
+          .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result
+          $text = Get-Content "jfk-result.txt" -Raw
+          Write-Host $text
+          if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS: transcription correct" -ForegroundColor Green }
+          else { Write-Error "FAIL: expected words not found"; exit 1 }
 
-  ggml-ci-mac-metal:
-    runs-on: [self-hosted, macOS, ARM64]
+# ════════════════════════════════════════════════════════════════════════════════
+# 16. Publish GitHub Release (only after all tests pass or are skipped)
+# ════════════════════════════════════════════════════════════════════════════════
+  release:
+    if: |
+      always() &&
+      needs.determine-tag.outputs.should_release == 'true' &&
+      (needs.test-cpu-windows.result == 'success' || needs.test-cpu-windows.result == 'skipped') &&
+      (needs.test-cpu-linux.result == 'success' || needs.test-cpu-linux.result == 'skipped') &&
+      (needs.test-vulkan-windows.result == 'success' || needs.test-vulkan-windows.result == 'skipped') &&
+      (needs.test-vulkan-linux.result == 'success' || needs.test-vulkan-linux.result == 'skipped') &&
+      (needs.test-rocm-windows.result == 'success' || needs.test-rocm-windows.result == 'skipped') &&
+      (needs.test-rocm-linux.result == 'success' || needs.test-rocm-linux.result == 'skipped') &&
+      (needs.test-npu-windows.result == 'success' || needs.test-npu-windows.result == 'skipped') &&
+      (needs.macos-metal.result == 'success' || needs.macos-metal.result == 'skipped')
+    runs-on: ubuntu-latest
+    needs:
+      - determine-tag
+      - linux-rocm
+      - windows-rocm
+      - linux-vulkan
+      - windows-vulkan
+      - windows-npu
+      - macos-metal
+      - linux-cpu
+      - windows-cpu
+      - test-cpu-windows
+      - test-cpu-linux
+      - test-vulkan-windows
+      - test-vulkan-linux
+      - test-rocm-windows
+      - test-rocm-linux
+      - test-npu-windows
 
     steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v6
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
 
-      - name: Test
-        id: ggml-ci
+      - name: Download all artifacts
+        uses: actions/download-artifact@v4
+        with:
+          path: ./artifacts
+
+      - name: Flatten artifacts into release/
         run: |
-          GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/whisper.cpp ~/mnt/whisper.cpp
+          mkdir -p release
+          find ./artifacts -mindepth 2 \( -name '*.zip' -o -name '*.tar.gz' \) -exec mv {} release/ \;
+          echo "Release assets:"
+          ls -lh release/
 
-  ggml-ci-mac-vulkan:
-    runs-on: [self-hosted, macOS, ARM64]
+      - name: Delete existing release/tag if present
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          TAG="${{ needs.determine-tag.outputs.tag_name }}"
+          RELEASE_ID=$(gh api repos/${{ github.repository }}/releases/tags/"$TAG" --jq '.id' 2>/dev/null || true)
+          if [ -n "$RELEASE_ID" ]; then
+            echo "Deleting existing release $RELEASE_ID for tag $TAG"
+            gh api -X DELETE repos/${{ github.repository }}/releases/"$RELEASE_ID"
+          fi
+          git push --delete origin "refs/tags/$TAG" 2>/dev/null || true
 
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v6
+      - name: Create release
+        id: create_release
+        uses: ggml-org/action-create-release@v1
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          tag_name:     ${{ needs.determine-tag.outputs.tag_name }}
+          release_name: "whisper.cpp ${{ needs.determine-tag.outputs.tag_name }} - AMD Builds"
+          prerelease:   ${{ github.event.inputs.pre_release_tag != '' }}
+          draft: false
+          body: |
+            ## AMD whisper.cpp ${{ needs.determine-tag.outputs.tag_name }}
+
+            AMD-based pre-built binaries of [whisper.cpp ${{ needs.determine-tag.outputs.tag_name }}](https://github.com/ggerganov/whisper.cpp/releases/tag/${{ needs.determine-tag.outputs.tag_name }}) with full hardware acceleration across ROCm GPU (iGPU and dGPU), NPU (RyzenAI), and CPU — for Linux and Windows.
+            All ROCm runtime libraries (ROCm ${{ env.ROCM_VERSION }}) are bundled. No drivers or separate installs required — download, extract, and run.
+
+            ### Packages
+
+            | Target | Linux | Windows |
+            |---|---|---|
+            | ROCm gfx1151 (Ryzen AI MAX+ Pro 395) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-rocm-gfx1151.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-rocm-gfx1151.zip` |
+            | ROCm gfx1150 (Ryzen AI 300) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-rocm-gfx1150.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-rocm-gfx1150.zip` |
+            | ROCm gfx120X (RDNA4 dGPU) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-rocm-gfx120X.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-rocm-gfx120X.zip` |
+            | ROCm gfx110X (RDNA3 dGPU & iGPU) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-rocm-gfx110X.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-rocm-gfx110X.zip` |
+            | Vulkan (cross-vendor) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-vulkan-x86_64.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-vulkan-x64.zip` |
+            | NPU (RyzenAI) | — | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-npu-x64.zip` |
+            | Metal (Apple Silicon) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-darwin-metal-arm64.tar.gz` | — |
+            | CPU only | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-cpu-x86_64.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-cpu-x64.zip` |
+
+      - name: Upload release assets
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const fs   = require('fs');
+            const path = require('path');
+            const id   = '${{ steps.create_release.outputs.id }}';
+            for (const file of fs.readdirSync('./release')) {
+              if (!file.endsWith('.zip') && !file.endsWith('.tar.gz')) continue;
+              console.log('Uploading:', file);
+              await github.rest.repos.uploadReleaseAsset({
+                owner: context.repo.owner,
+                repo:  context.repo.repo,
+                release_id: id,
+                name: file,
+                data: fs.readFileSync(`./release/${file}`),
+              });
+            }
 
-      - name: Test
-        id: ggml-ci
+      - name: Update README download links
         run: |
-          vulkaninfo --summary
-          GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/whisper.cpp ~/mnt/whisper.cpp
+          TAG="${{ needs.determine-tag.outputs.tag_name }}"
+          # Replace the placeholder tag in all download URLs with the actual release tag
+          sed -i "s|/releases/download/[^/]*/whisper-[^-]*-|/releases/download/${TAG}/whisper-${TAG}-|g" README.md
+          git config user.name  "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git add README.md
+          git diff --cached --quiet || git commit -m "docs: update download links to ${TAG}"
+          git push
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
deleted file mode 100644
index 6c0de0ece70..00000000000
--- a/.github/workflows/docker.yml
+++ /dev/null
@@ -1,77 +0,0 @@
-name: Publish Docker image
-
-on:
-  pull_request:
-  push:
-    branches:
-      - master
-
-jobs:
-  push_to_registry:
-    name: Push Docker image to Docker Hub
-    if: github.event.pull_request.draft == false
-
-    runs-on: ubuntu-22.04
-    env:
-      COMMIT_SHA: ${{ github.sha }}
-    strategy:
-      fail-fast: false
-      matrix:
-        config:
-          - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" }
-          - { tag: "main-musa", dockerfile: ".devops/main-musa.Dockerfile", platform: "linux/amd64" }
-          - { tag: "main-intel", dockerfile: ".devops/main-intel.Dockerfile", platform: "linux/amd64" }
-          - { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
-          - { tag: "main-vulkan", dockerfile: ".devops/main-vulkan.Dockerfile", platform: "linux/amd64" }
-
-    steps:
-      - name: Check out the repo
-        uses: actions/checkout@v6
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-        with:
-          image: tonistiigi/binfmt:qemu-v7.0.0-28
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Log in to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          registry: ghcr.io
-          username: ${{ github.repository_owner }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Free up disk space
-        run: |
-          sudo apt-get remove -y '^dotnet-.*' '^llvm-.*' '^mysql-.*' '^postgresql-.*'
-          sudo apt-get autoremove -y
-          sudo apt-get autoclean
-
-          sudo rm -rf /usr/share/dotnet
-          sudo rm -rf /usr/local/lib/android
-          sudo rm -rf /opt/ghc
-          sudo rm -rf /opt/hostedtoolcache/CodeQL
-
-          docker system prune -af
-
-          df -h
-
-      - name: Generate tags
-        id: tags
-        run: |
-          TAGS="ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}"
-          if [ "${{ github.event_name }}" == "push" ]; then
-            TAGS="$TAGS,ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
-          fi
-          echo "tags=$TAGS" >> $GITHUB_OUTPUT
-
-      - name: Build and push Docker image (tagged)
-        uses: docker/build-push-action@v6
-        with:
-          context: .
-          push: ${{ github.event_name == 'push' }}
-          platforms: ${{ matrix.config.platform }}
-          tags: ${{ steps.tags.outputs.tags }}
-          file: ${{ matrix.config.dockerfile }}
diff --git a/.github/workflows/examples-wasm.yml b/.github/workflows/examples-wasm.yml
deleted file mode 100644
index 927438cdad8..00000000000
--- a/.github/workflows/examples-wasm.yml
+++ /dev/null
@@ -1,97 +0,0 @@
-name: Examples WASM
-on:
-  push:
-    branches: ["master"]
-
-  workflow_dispatch:
-
-permissions:
-  contents: read
-  pages: write
-  id-token: write
-
-concurrency:
-  group: "pages"
-  cancel-in-progress: false
-
-jobs:
-  deploy-wasm-github-pages:
-    environment:
-      name: github-pages
-      url: ${{ steps.deployment.outputs.page_url }}
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v6
-
-      - name: Setup Pages
-        uses: actions/configure-pages@v5
-
-      - name: Setup emsdk
-        uses: mymindstorm/setup-emsdk@v14
-
-      - name: Build WASM Examples
-        # Enable for real build later in whisper.cpp
-        run: |
-          mkdir -p build-em && cd build-em
-          emcmake cmake .. -DCMAKE_BUILD_TYPE=Release
-          make -j
-
-      - name: Create staging directory
-        run: mkdir -p staging
-
-      - name: Create .nojekyll file in staging directory
-        run: touch staging/.nojekyll
-
-      - name: Copy application files
-        run: |
-          build_dir=build-em/bin
-
-          ls ${build_dir}
-
-          # command.wasm
-          target_dir=staging/command.wasm
-          mkdir -p ${target_dir}
-          cp ${build_dir}/command.wasm/{index.html,command.js,helpers.js} ${target_dir}
-          cp ${build_dir}/libcommand.js ${target_dir}
-
-          # bench.wasm
-          target_dir=staging/bench.wasm
-          mkdir -p ${target_dir}
-          cp ${build_dir}/bench.wasm/{index.html,bench.js,helpers.js} ${target_dir}
-          cp ${build_dir}/libbench.js ${target_dir}
-
-          # stream.wasm
-          target_dir=staging/stream.wasm
-          mkdir -p ${target_dir}
-          cp ${build_dir}/stream.wasm/{index.html,stream.js,helpers.js} ${target_dir}
-          cp ${build_dir}/libstream.js ${target_dir}
-
-          # wchess.wasm
-          target_dir=staging/wchess.wasm
-          mkdir -p ${target_dir}
-          cp -r ${build_dir}/wchess.wasm/{index.html,css,img,js} ${target_dir}
-          cp ${build_dir}/wchess.wasm.js ${target_dir}
-
-          # whisper.wasm (this will be the main example page)
-          target_dir=staging
-          mkdir -p ${target_dir}
-          cp ${build_dir}/whisper.wasm/{index.html,main.js,helpers.js} ${target_dir}
-          cp ${build_dir}/libmain.js ${target_dir}
-
-          # Copy Cross-Origin Isolation service worker
-          cp -v examples/coi-serviceworker.js staging/
-
-      - name: List files in staging directory (for debugging)
-        run: |
-          echo "Files in staging directory:"
-          find staging -type f | sort
-
-      - name: Upload artifact
-        uses: actions/upload-pages-artifact@v4
-        with:
-          path: ./staging
-
-      - name: Deploy to GitHub Pages
-        id: deployment
-        uses: actions/deploy-pages@v4
diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml
deleted file mode 100644
index 1c9ade5a300..00000000000
--- a/.github/workflows/examples.yml
+++ /dev/null
@@ -1,48 +0,0 @@
-name: Examples Tests
-on:
-  push:
-    paths:
-      - examples/addon.node/**
-      - whisper.h
-  pull_request:
-    paths:
-      - examples/addon.node/**
-      - whisper.h
-
-jobs:
-  addon_node-ubuntu-22:
-    runs-on: ubuntu-22.04
-    strategy:
-      matrix:
-        node-version: [ 16.x, 18.x ]
-    steps:
-      - name: Clone
-        uses: actions/checkout@v6
-
-      - name: Dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install build-essential git
-          sudo apt-get install cmake
-          sudo apt-get install libsdl2-dev
-
-      - name: Use Node.js ${{ matrix.node-version }}
-        uses: actions/setup-node@v6
-        with:
-          node-version: ${{ matrix.node-version }}
-          cache: 'npm'
-
-      - name: Install package.json dependencies
-        working-directory: ./examples/addon.node
-        run: npm install
-
-      - name: Compile addon.node
-        run: npx cmake-js compile -T addon.node -B Release
-
-      - name: Download test model
-        run: |
-          bash ./models/download-ggml-model.sh base.en
-      - name: Test
-        run: |
-          cd examples/addon.node
-          npm run test
diff --git a/.github/workflows/sync.yml b/.github/workflows/sync.yml
new file mode 100644
index 00000000000..47693822a04
--- /dev/null
+++ b/.github/workflows/sync.yml
@@ -0,0 +1,146 @@
+name: Sync Upstream & Auto-Release
+
+# Runs daily to detect new upstream whisper.cpp releases.
+# When a new release is found:
+#   - clean merge  → pushes main + creates tag vX.Y.Z → triggers build.yml
+#   - conflict     → opens a PR for manual resolution, does NOT tag
+
+on:
+  workflow_dispatch:
+    inputs:
+      upstream_tag:
+        description: 'Force a specific upstream tag (e.g. v1.8.5). Leave blank to auto-detect latest.'
+        required: false
+        type: string
+      dry_run:
+        description: 'Dry run — merge locally but do not push or tag'
+        required: false
+        type: boolean
+        default: false
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  sync-and-tag:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout (full history + tags)
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Configure git identity
+        run: |
+          git config user.name  "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+
+      - name: Add upstream remote
+        run: git remote add upstream https://github.com/ggerganov/whisper.cpp || true
+
+      # ── Detect which upstream release to target ──────────────────────────
+      - name: Detect upstream release
+        id: upstream
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          if [[ -n "${{ github.event.inputs.upstream_tag }}" ]]; then
+            UPSTREAM_TAG="${{ github.event.inputs.upstream_tag }}"
+            echo "Using manually specified tag: $UPSTREAM_TAG"
+          else
+            UPSTREAM_TAG=$(gh api repos/ggerganov/whisper.cpp/releases/latest --jq '.tag_name')
+            echo "Latest upstream release: $UPSTREAM_TAG"
+          fi
+
+          # Strip leading 'v' for use in artifact filenames
+          VERSION="${UPSTREAM_TAG#v}"
+
+          echo "tag=$UPSTREAM_TAG"   >> $GITHUB_OUTPUT
+          echo "version=$VERSION"    >> $GITHUB_OUTPUT
+
+      # ── Check if we already have a release for this upstream version ──────
+      - name: Check if already released
+        id: check
+        run: |
+          git fetch --tags
+          # Our tags match the upstream tag exactly (e.g. v1.8.4)
+          EXISTING=$(git tag -l "${{ steps.upstream.outputs.tag }}" | head -1)
+          if [[ -n "$EXISTING" ]]; then
+            echo "already_released=true"  >> $GITHUB_OUTPUT
+            echo "::notice::Already have release $EXISTING — nothing to do."
+          else
+            echo "already_released=false" >> $GITHUB_OUTPUT
+            echo "New upstream release detected: ${{ steps.upstream.outputs.tag }}"
+          fi
+
+      # ── Merge upstream tag into main ─────────────────────────────────────
+      - name: Fetch upstream tags
+        if: steps.check.outputs.already_released == 'false'
+        run: git fetch upstream --tags
+
+      - name: Attempt merge
+        if: steps.check.outputs.already_released == 'false'
+        run: |
+          git merge "${{ steps.upstream.outputs.tag }}" --no-edit || echo "CONFLICT=true" >> $GITHUB_ENV
+
+      # ── Conflict path: open PR, do NOT tag ───────────────────────────────
+      - name: Open conflict PR
+        if: steps.check.outputs.already_released == 'false' && env.CONFLICT == 'true'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          CONFLICT_FILES=$(git diff --name-only --diff-filter=U | tr '\n' ', ')
+          git merge --abort
+
+          BRANCH="sync/${{ steps.upstream.outputs.tag }}"
+          git checkout -b "$BRANCH"
+          # Resolve conflicts by preferring upstream (theirs) so the branch is pushable
+          git merge "${{ steps.upstream.outputs.tag }}" --no-edit --strategy-option=theirs || true
+          git add -A
+          git commit -m "chore: merge upstream ${{ steps.upstream.outputs.tag }} (auto-resolved via theirs)" --allow-empty
+          git push origin "$BRANCH"
+
+          TAG="${{ steps.upstream.outputs.tag }}"
+          echo "## Upstream sync: ${TAG}" > /tmp/pr-body.md
+          echo "" >> /tmp/pr-body.md
+          echo "Conflicts were detected during automatic merge. Files affected:" >> /tmp/pr-body.md
+          echo "" >> /tmp/pr-body.md
+          echo "    ${CONFLICT_FILES}" >> /tmp/pr-body.md
+          echo "" >> /tmp/pr-body.md
+          echo "This PR was auto-resolved using upstream (theirs) as a baseline." >> /tmp/pr-body.md
+          echo "Please review the diff carefully before merging." >> /tmp/pr-body.md
+          echo "" >> /tmp/pr-body.md
+          echo "Once merged, manually create the release tag on main to trigger the build:" >> /tmp/pr-body.md
+          echo "" >> /tmp/pr-body.md
+          echo "    git tag ${TAG}" >> /tmp/pr-body.md
+          echo "    git push origin ${TAG}" >> /tmp/pr-body.md
+
+          gh pr create \
+            --title "Sync upstream ${TAG} - conflict resolution needed" \
+            --body-file /tmp/pr-body.md \
+            --base main \
+            --head "$BRANCH"
+
+          echo "::warning::Merge conflict detected - PR opened for manual resolution. Release build NOT triggered."
+
+      # ── Clean merge path: push main + tag → triggers build.yml ───────────
+      - name: Push merged main
+        if: steps.check.outputs.already_released == 'false' && env.CONFLICT != 'true' && github.event.inputs.dry_run != 'true'
+        run: git push origin HEAD:main
+
+      - name: Create and push release tag
+        if: steps.check.outputs.already_released == 'false' && env.CONFLICT != 'true' && github.event.inputs.dry_run != 'true'
+        run: |
+          TAG="${{ steps.upstream.outputs.tag }}"
+          git tag "$TAG" -m "AMD builds for upstream $TAG"
+          git push origin "$TAG"
+          echo "::notice::Pushed tag $TAG — build.yml will now run and publish the release."
+
+      - name: Dry-run summary
+        if: github.event.inputs.dry_run == 'true' && steps.check.outputs.already_released == 'false' && env.CONFLICT != 'true'
+        run: |
+          echo "DRY RUN — merge was clean. Would have pushed main and tagged ${{ steps.upstream.outputs.tag }}."
+          echo "Re-run with dry_run=false to publish."
diff --git a/.github/workflows/test-whisper.yml b/.github/workflows/test-whisper.yml
new file mode 100644
index 00000000000..3a9d2047ff3
--- /dev/null
+++ b/.github/workflows/test-whisper.yml
@@ -0,0 +1,282 @@
+name: Test whisper-cli
+
+# Downloads a published release artifact and runs whisper-cli against jfk.wav.
+# Tests on real self-hosted GPU hardware (stx-halo runners).
+# Trigger manually after a release, or let it run automatically via workflow_dispatch
+# from build.yml once artifacts are published.
+
+on:
+  workflow_dispatch:
+    inputs:
+      release_tag:
+        description: 'Release tag to test (e.g. v1.8.4) or "latest"'
+        required: false
+        default: 'latest'
+        type: string
+      gfx_target:
+        description: 'ROCm GPU target to test'
+        required: false
+        default: 'gfx1151'
+        type: string
+
+env:
+  RELEASE_TAG: ${{ github.event.inputs.release_tag || 'latest' }}
+  GFX_TARGET:  ${{ github.event.inputs.gfx_target  || 'gfx1151' }}
+
+jobs:
+
+  # ---------------------------------------------------------------------------
+  # Resolve release tag (latest or specific)
+  # ---------------------------------------------------------------------------
+  prepare:
+    runs-on: ubuntu-latest
+    outputs:
+      release_tag: ${{ steps.resolve.outputs.release_tag }}
+    steps:
+      - name: Resolve release tag
+        id: resolve
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          if [ "${{ env.RELEASE_TAG }}" = "latest" ]; then
+            TAG=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
+              "https://api.github.com/repos/${{ github.repository }}/releases/latest" \
+              | jq -r '.tag_name')
+            echo "Resolved latest release: $TAG"
+          else
+            TAG="${{ env.RELEASE_TAG }}"
+            STATUS=$(curl -s -o /dev/null -w "%{http_code}" \
+              -H "Authorization: token $GITHUB_TOKEN" \
+              "https://api.github.com/repos/${{ github.repository }}/releases/tags/$TAG")
+            if [ "$STATUS" != "200" ]; then
+              echo "Error: Release $TAG not found (HTTP $STATUS)"
+              exit 1
+            fi
+            echo "Using specified release: $TAG"
+          fi
+          echo "release_tag=$TAG" >> $GITHUB_OUTPUT
+
+  # ---------------------------------------------------------------------------
+  # Test ROCm artifact on Windows (self-hosted stx-halo GPU runner)
+  # ---------------------------------------------------------------------------
+  test-windows-rocm:
+    runs-on: [self-hosted, Windows, stx-halo]
+    needs: prepare
+
+    steps:
+      - name: Checkout (for samples/jfk.wav and models/ scripts)
+        uses: actions/checkout@v4
+
+      - name: Download ROCm Windows artifact
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        shell: pwsh
+        run: |
+          $tag    = "${{ needs.prepare.outputs.release_tag }}"
+          $target = "${{ env.GFX_TARGET }}"
+          # strip leading 'v' for artifact filename (e.g. v1.8.4 -> 1.8.4)
+          $ver    = $tag.TrimStart('v')
+          $asset  = "whisper-${ver}-windows-rocm-${target}.zip"
+          $repo   = "${{ github.repository }}"
+
+          Write-Host "Downloading: $asset from release $tag"
+
+          $headers  = @{ "Authorization" = "token $env:GITHUB_TOKEN" }
+          $release  = Invoke-RestMethod -Uri "https://api.github.com/repos/$repo/releases/tags/$tag" -Headers $headers
+          $found    = $release.assets | Where-Object { $_.name -eq $asset }
+
+          if (-not $found) {
+            Write-Error "Asset '$asset' not found in release '$tag'"
+            Write-Host "Available assets:"
+            $release.assets | ForEach-Object { Write-Host "  $($_.name)" }
+            exit 1
+          }
+
+          Write-Host "Found: $($found.name) ($([math]::Round($found.size/1MB,2)) MB)"
+          Invoke-WebRequest -Uri $found.browser_download_url -OutFile $asset -Headers $headers
+
+          Write-Host "Extracting..."
+          Expand-Archive -Path $asset -DestinationPath whisper-bin -Force
+          Write-Host "Binaries:"
+          Get-ChildItem whisper-bin -Filter "*.exe" | Format-Table Name, Length
+
+      - name: Download tiny model
+        shell: pwsh
+        run: |
+          New-Item -ItemType Directory -Force -Path models | Out-Null
+          $url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin"
+          Write-Host "Downloading ggml-tiny.bin..."
+          Invoke-WebRequest -Uri $url -OutFile "models\ggml-tiny.bin"
+          $mb = [math]::Round((Get-Item "models\ggml-tiny.bin").Length/1MB,2)
+          Write-Host "Downloaded: $mb MB"
+
+      - name: Run transcription
+        shell: pwsh
+        run: |
+          $cli = "whisper-bin\whisper-cli.exe"
+          if (-not (Test-Path $cli)) {
+            Write-Error "whisper-cli.exe not found. Contents of whisper-bin:"
+            Get-ChildItem -Recurse whisper-bin | Format-Table Name, Length
+            exit 1
+          }
+          Write-Host "Running whisper-cli against samples\jfk.wav ..."
+          & $cli -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result
+          Write-Host "--- Transcription output ---"
+          Get-Content jfk-result.txt
+
+      - name: Verify transcription
+        shell: pwsh
+        run: |
+          $text = Get-Content "jfk-result.txt" -Raw -ErrorAction SilentlyContinue
+          if (-not $text) { Write-Error "jfk-result.txt is empty or missing"; exit 1 }
+          if ($text -match "country|ask not|nation|kennedy") {
+            Write-Host "PASS: transcription contains expected words" -ForegroundColor Green
+          } else {
+            Write-Error "FAIL: expected words not found in transcription"
+            Write-Host $text
+            exit 1
+          }
+
+  # ---------------------------------------------------------------------------
+  # Test ROCm artifact on Linux (self-hosted stx-halo GPU runner)
+  # ---------------------------------------------------------------------------
+  test-linux-rocm:
+    runs-on: [self-hosted, Linux, stx-halo]
+    needs: prepare
+
+    steps:
+      - name: Checkout (for samples/jfk.wav)
+        uses: actions/checkout@v4
+
+      - name: Install jq if needed
+        run: |
+          if ! command -v jq &>/dev/null; then
+            mkdir -p ~/bin
+            curl -L -o ~/bin/jq https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64
+            chmod +x ~/bin/jq
+            echo "$HOME/bin" >> $GITHUB_PATH
+          fi
+
+      - name: Download ROCm Linux artifact
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          TAG="${{ needs.prepare.outputs.release_tag }}"
+          TARGET="${{ env.GFX_TARGET }}"
+          VER="${TAG#v}"
+          ASSET="whisper-${VER}-linux-rocm-${TARGET}.tar.gz"
+          REPO="${{ github.repository }}"
+
+          echo "Downloading: $ASSET from release $TAG"
+
+          RELEASE=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
+            "https://api.github.com/repos/$REPO/releases/tags/$TAG")
+
+          URL=$(echo "$RELEASE" | jq -r ".assets[] | select(.name == \"$ASSET\") | .browser_download_url")
+
+          if [ -z "$URL" ] || [ "$URL" = "null" ]; then
+            echo "Asset '$ASSET' not found in release '$TAG'"
+            echo "Available assets:"
+            echo "$RELEASE" | jq -r '.assets[].name'
+            exit 1
+          fi
+
+          SIZE=$(echo "$RELEASE" | jq -r ".assets[] | select(.name == \"$ASSET\") | .size")
+          echo "Found: $ASSET ($SIZE bytes)"
+
+          curl -L -H "Authorization: token $GITHUB_TOKEN" -o "$ASSET" "$URL"
+
+          echo "Extracting..."
+          mkdir -p whisper-bin
+          tar -xzf "$ASSET" --strip-components=1 -C whisper-bin
+          chmod +x whisper-bin/whisper-cli
+          echo "Binaries:"
+          ls -lh whisper-bin/whisper-cli
+
+      - name: Download tiny model
+        run: |
+          mkdir -p models
+          curl -L -o models/ggml-tiny.bin \
+            "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin"
+          echo "Downloaded: $(du -h models/ggml-tiny.bin | cut -f1)"
+
+      - name: Set library path
+        run: echo "LD_LIBRARY_PATH=$(pwd)/whisper-bin:$LD_LIBRARY_PATH" >> $GITHUB_ENV
+
+      - name: Run transcription
+        run: |
+          echo "Running whisper-cli against samples/jfk.wav ..."
+          ./whisper-bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result
+          echo "--- Transcription output ---"
+          cat jfk-result.txt
+
+      - name: Verify transcription
+        run: |
+          if grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt; then
+            echo "PASS: transcription contains expected words"
+          else
+            echo "FAIL: expected words not found in transcription"
+            cat jfk-result.txt
+            exit 1
+          fi
+
+  # ---------------------------------------------------------------------------
+  # Test CPU artifact on Linux (GitHub-hosted runner - no GPU needed)
+  # ---------------------------------------------------------------------------
+  test-linux-cpu:
+    runs-on: ubuntu-latest
+    needs: prepare
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Download CPU Linux artifact
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          TAG="${{ needs.prepare.outputs.release_tag }}"
+          VER="${TAG#v}"
+          ASSET="whisper-${VER}-linux-cpu-x86_64.tar.gz"
+          REPO="${{ github.repository }}"
+
+          echo "Downloading: $ASSET from release $TAG"
+
+          RELEASE=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
+            "https://api.github.com/repos/$REPO/releases/tags/$TAG")
+
+          URL=$(echo "$RELEASE" | jq -r ".assets[] | select(.name == \"$ASSET\") | .browser_download_url")
+
+          if [ -z "$URL" ] || [ "$URL" = "null" ]; then
+            echo "Asset '$ASSET' not found. Available:"
+            echo "$RELEASE" | jq -r '.assets[].name'
+            exit 1
+          fi
+
+          curl -L -H "Authorization: token $GITHUB_TOKEN" -o "$ASSET" "$URL"
+          mkdir -p whisper-bin
+          tar -xzf "$ASSET" --strip-components=1 -C whisper-bin
+          chmod +x whisper-bin/whisper-cli
+          ls -lh whisper-bin/whisper-cli
+
+      - name: Download tiny model
+        run: |
+          mkdir -p models
+          curl -L -o models/ggml-tiny.bin \
+            "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin"
+
+      - name: Run transcription
+        run: |
+          ./whisper-bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result
+          echo "--- Transcription output ---"
+          cat jfk-result.txt
+
+      - name: Verify transcription
+        run: |
+          if grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt; then
+            echo "PASS: transcription contains expected words"
+          else
+            echo "FAIL: expected words not found"
+            cat jfk-result.txt
+            exit 1
+          fi
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a0f74041321..d4dc318056a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -91,6 +91,7 @@ endif()
 option(WHISPER_COREML                "whisper: enable Core ML framework"  OFF)
 option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
 option(WHISPER_OPENVINO              "whisper: support for OpenVINO"      OFF)
+option(WHISPER_VITISAI               "whisper: support for AMD Vitis AI"  OFF)
 
 # Required for relocatable CMake package
 include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
diff --git a/README.md b/README.md
index 474a1301da7..ef7dd801b24 100644
--- a/README.md
+++ b/README.md
@@ -1,862 +1,283 @@
-# whisper.cpp
+# whisper.cpp-rocm
+
+<a href="https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest" title="Download the latest release">
+  <img src="https://img.shields.io/github/v/release/lemonade-sdk/whisper.cpp-rocm?logo=github&logoColor=white" alt="GitHub release (latest by date)" />
+</a>
+<a href="https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest" title="View latest release date">
+  <img src="https://img.shields.io/github/release-date/lemonade-sdk/whisper.cpp-rocm?logo=github&logoColor=white" alt="Latest release date" />
+</a>
+<a href="LICENSE" title="View license">
+  <img src="https://img.shields.io/github/license/lemonade-sdk/whisper.cpp-rocm?logo=opensourceinitiative&logoColor=white" alt="License" />
+</a>
+<a href="https://github.com/ROCm/ROCm" title="Powered by ROCm">
+  <img src="https://img.shields.io/badge/ROCm-7.12-blue?logo=amd&logoColor=white" alt="ROCm 7.12" />
+</a>
+<a href="https://github.com/ggerganov/whisper.cpp" title="Powered by whisper.cpp">
+  <img src="https://img.shields.io/badge/🎤 Powered%20by-whisper.cpp-blue" alt="Powered by whisper.cpp" />
+</a>
+<a href="#-supported-devices" title="Platform support">
+  <img src="https://img.shields.io/badge/OS-Windows%20%7C%20Linux%20%7C%20macOS-0078D6?logo=windows&logoColor=white" alt="Platform: Windows | Linux | macOS" />
+</a>
+<a href="#-supported-devices" title="GPU targets">
+  <img src="https://img.shields.io/badge/GPU-gfx110X%20%7C%20gfx1150%20%7C%20gfx1151%20%7C%20gfx120X-00B04F?logo=amd&logoColor=white" alt="GPU Targets" />
+</a>
+<a href="#-npu--ryzenai" title="NPU support">
+  <img src="https://img.shields.io/badge/NPU-Ryzen%20AI%20300-ED1C24?logo=amd&logoColor=white" alt="NPU: Ryzen AI 300" />
+</a>
+
+Pre-built releases of **[whisper.cpp](https://github.com/ggerganov/whisper.cpp)** with full AMD hardware acceleration — **ROCm™ GPU**, **Vulkan GPU**, **RyzenAI NPU**, and optimised **CPU** builds — for Windows and Linux.
+
+Releases track upstream whisper.cpp exactly: every time upstream publishes a new version, our automated pipeline syncs, builds all backends, and publishes a matching release within 24 hours. No manual steps. No lag.
+
+> [!IMPORTANT]
+> **No ROCm installation required.** All ROCm and Vulkan runtime libraries are bundled inside every release archive. Download, extract, and run.
+
+> [!NOTE]
+> This project is maintained by the [Lemonade SDK](https://github.com/lemonade-sdk/lemonade) team. Our primary focus is seamless integration with Lemonade and similar AMD-optimised AI applications. We welcome collaborations and contributions that advance AMD whisper.cpp support.
 
-![whisper.cpp](https://user-images.githubusercontent.com/1991296/235238348-05d0f6a4-da44-4900-a1de-d0707e75b763.jpeg)
-
-[![Actions Status](https://github.com/ggml-org/whisper.cpp/workflows/CI/badge.svg)](https://github.com/ggml-org/whisper.cpp/actions)
-[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
-[![Conan Center](https://shields.io/conan/v/whisper-cpp)](https://conan.io/center/whisper-cpp)
-[![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)
-
-Stable: [v1.8.1](https://github.com/ggml-org/whisper.cpp/releases/tag/v1.8.1) / [Roadmap](https://github.com/orgs/ggml-org/projects/4/)
-
-High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
-
-- Plain C/C++ implementation without dependencies
-- Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](#core-ml-support)
-- AVX intrinsics support for x86 architectures
-- [VSX intrinsics support for POWER architectures](#power-vsx-intrinsics)
-- Mixed F16 / F32 precision
-- [Integer quantization support](#quantization)
-- Zero memory allocations at runtime
-- [Vulkan support](#vulkan-gpu-support)
-- Support for CPU-only inference
-- [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
-- [OpenVINO Support](#openvino-support)
-- [Ascend NPU Support](#ascend-npu-support)
-- [Moore Threads GPU Support](#moore-threads-gpu-support)
-- [C-style API](https://github.com/ggml-org/whisper.cpp/blob/master/include/whisper.h)
-- [Voice Activity Detection (VAD)](#voice-activity-detection-vad)
-
-Supported platforms:
-
-- [x] Mac OS (Intel and Arm)
-- [x] [iOS](examples/whisper.objc)
-- [x] [Android](examples/whisper.android)
-- [x] [Java](bindings/java/README.md)
-- [x] Linux / [FreeBSD](https://github.com/ggml-org/whisper.cpp/issues/56#issuecomment-1350920264)
-- [x] [WebAssembly](examples/whisper.wasm)
-- [x] Windows ([MSVC](https://github.com/ggml-org/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggml-org/whisper.cpp/issues/168))
-- [x] [Raspberry Pi](https://github.com/ggml-org/whisper.cpp/discussions/166)
-- [x] [Docker](https://github.com/ggml-org/whisper.cpp/pkgs/container/whisper.cpp)
-
-The entire high-level implementation of the model is contained in [whisper.h](include/whisper.h) and [whisper.cpp](src/whisper.cpp).
-The rest of the code is part of the [`ggml`](https://github.com/ggml-org/ggml) machine learning library.
-
-Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
-As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: [whisper.objc](examples/whisper.objc)
-
-https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4
-
-You can also easily make your own offline voice assistant application: [command](examples/command)
-
-https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
-
-On Apple Silicon, the inference runs fully on the GPU via Metal:
-
-https://github.com/ggml-org/whisper.cpp/assets/1991296/c82e8f86-60dc-49f2-b048-d2fdbd6b5225
-
-## Quick start
+---
 
-First clone the repository:
+## 🎯 Supported Devices
 
-```bash
-git clone https://github.com/ggml-org/whisper.cpp.git
-```
+### ROCm GPU
 
-Navigate into the directory:
+| Architecture | Devices |
+|---|---|
+| **gfx1151** — RDNA3.5 APU | Ryzen AI MAX+ Pro 395 (Strix Halo) |
+| **gfx1150** — RDNA3.5 APU | Ryzen AI 300 series (Strix Point) |
+| **gfx120X** — RDNA4 dGPU | Radeon RX 9070 XT / 9070 / 9060 XT / 9060 |
+| **gfx110X** — RDNA3 dGPU & iGPU | RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT, RX 7600 XT/7600; iGPU Radeon 780M / 760M / 740M |
 
-```
-cd whisper.cpp
-```
+### Vulkan GPU
 
-Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example:
+Any GPU with a Vulkan 1.3-capable driver — AMD, NVIDIA, Intel. Covers iGPUs on all platforms where a Vulkan driver is present.
 
-```bash
-sh ./models/download-ggml-model.sh base.en
-```
+### NPU — RyzenAI
 
-Now build the [whisper-cli](examples/cli) example and transcribe an audio file like this:
+| Device | OS | Requirement |
+|---|---|---|
+| Ryzen AI 300 series (Strix Point / Strix Halo) | Windows only | NPU driver ≥ `.280` |
 
-```bash
-# build the project
-cmake -B build
-cmake --build build -j --config Release
+### CPU
 
-# transcribe an audio file
-./build/bin/whisper-cli -f samples/jfk.wav
-```
+Optimised CPU-only builds for x86-64. Windows and Linux. No GPU required.
 
 ---
 
-For a quick demo, simply run `make base.en`.
+## 📦 Downloads
 
-The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`.
+All builds are self-contained — no separate driver or runtime installation needed (except the NPU driver for the NPU build).
 
-For detailed usage instructions, run: `./build/bin/whisper-cli -h`
+### ROCm — GPU Accelerated
 
-Note that the [whisper-cli](examples/cli) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool.
-For example, you can use `ffmpeg` like this:
+| GPU Target | Linux | Windows |
+|---|---|---|
+| **gfx1151** (Ryzen AI MAX+ Pro 395) | [![Linux gfx1151](https://img.shields.io/badge/Download-Linux%20gfx1151-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-rocm-gfx1151.tar.gz) | [![Windows gfx1151](https://img.shields.io/badge/Download-Windows%20gfx1151-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-rocm-gfx1151.zip) |
+| **gfx1150** (Ryzen AI 300) | [![Linux gfx1150](https://img.shields.io/badge/Download-Linux%20gfx1150-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-rocm-gfx1150.tar.gz) | [![Windows gfx1150](https://img.shields.io/badge/Download-Windows%20gfx1150-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-rocm-gfx1150.zip) |
+| **gfx120X** (RDNA4 dGPU) | [![Linux gfx120X](https://img.shields.io/badge/Download-Linux%20gfx120X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-rocm-gfx120X.tar.gz) | [![Windows gfx120X](https://img.shields.io/badge/Download-Windows%20gfx120X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-rocm-gfx120X.zip) |
+| **gfx110X** (RDNA3 dGPU & iGPU) | [![Linux gfx110X](https://img.shields.io/badge/Download-Linux%20gfx110X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-rocm-gfx110X.tar.gz) | [![Windows gfx110X](https://img.shields.io/badge/Download-Windows%20gfx110X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-rocm-gfx110X.zip) |
 
-```bash
-ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
-```
-
-## More audio samples
-
-If you want some extra audio samples to play with, simply run:
-
-```
-make -j samples
-```
+### Vulkan — Cross-Vendor GPU
 
-This will download a few more audio files from Wikipedia and convert them to 16-bit WAV format via `ffmpeg`.
+| Linux | Windows |
+|---|---|
+| [![Linux Vulkan](https://img.shields.io/badge/Download-Linux%20Vulkan-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-vulkan-x86_64.tar.gz) | [![Windows Vulkan](https://img.shields.io/badge/Download-Windows%20Vulkan-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-vulkan-x64.zip) |
 
-You can download and run the other models as follows:
+### NPU — RyzenAI (Windows only)
 
-```
-make -j tiny.en
-make -j tiny
-make -j base.en
-make -j base
-make -j small.en
-make -j small
-make -j medium.en
-make -j medium
-make -j large-v1
-make -j large-v2
-make -j large-v3
-make -j large-v3-turbo
-```
+| Windows |
+|---|
+| [![Windows NPU](https://img.shields.io/badge/Download-Windows%20NPU%20(RyzenAI)-red?logo=amd&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-npu-x64.zip) |
 
-## Memory usage
+> Requires NPU driver ≥ `.280` and a pre-compiled `.rai` encoder model from [AMD's Hugging Face collection](https://huggingface.co/collections/amd/ryzen-ai-16-whisper-npu-optimized-onnx-models). Place the `.rai` file alongside your `ggml-*.bin` model — whisper-cli picks it up automatically.
 
-| Model  | Disk    | Mem     |
-| ------ | ------- | ------- |
-| tiny   | 75 MiB  | ~273 MB |
-| base   | 142 MiB | ~388 MB |
-| small  | 466 MiB | ~852 MB |
-| medium | 1.5 GiB | ~2.1 GB |
-| large  | 2.9 GiB | ~3.9 GB |
+### macOS — Metal GPU
 
-## POWER VSX Intrinsics
+| macOS (Apple Silicon) |
+|---|
+| [![macOS Metal](https://img.shields.io/badge/Download-macOS%20Metal%20(arm64)-lightgrey?logo=apple&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-darwin-metal-arm64.tar.gz) |
 
-`whisper.cpp` supports POWER architectures and includes code which
-significantly speeds operation on Linux running on POWER9/10, making it
-capable of faster-than-realtime transcription on underclocked Raptor
-Talos II. Ensure you have a BLAS package installed, and replace the
-standard cmake setup with:
+### CPU — No GPU Required
 
-```bash
-# build with GGML_BLAS defined
-cmake -B build -DGGML_BLAS=1
-cmake --build build -j --config Release
-./build/bin/whisper-cli [ .. etc .. ]
-```
+| Linux | Windows |
+|---|---|
+| [![Linux CPU](https://img.shields.io/badge/Download-Linux%20CPU-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-cpu-x86_64.tar.gz) | [![Windows CPU](https://img.shields.io/badge/Download-Windows%20CPU-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-cpu-x64.zip) |
 
-## Quantization
+---
 
-`whisper.cpp` supports integer quantization of the Whisper `ggml` models.
-Quantized models require less memory and disk space and depending on the hardware can be processed more efficiently.
+## 🧪 Quick Smoketest
 
-Here are the steps for creating and using a quantized model:
+### 1. Get a model
 
 ```bash
-# quantize a model with Q5_0 method
-cmake -B build
-cmake --build build -j --config Release
-./build/bin/quantize models/ggml-base.en.bin models/ggml-base.en-q5_0.bin q5_0
-
-# run the examples as usual, specifying the quantized model file
-./build/bin/whisper-cli -m models/ggml-base.en-q5_0.bin ./samples/gb0.wav
-```
-
-## Core ML support
-
-On Apple Silicon devices, the Encoder inference can be executed on the Apple Neural Engine (ANE) via Core ML. This can result in significant
-speed-up - more than x3 faster compared with CPU-only execution. Here are the instructions for generating a Core ML model and using it with `whisper.cpp`:
-
-- Install Python dependencies needed for the creation of the Core ML model:
-
-  ```bash
-  pip install ane_transformers
-  pip install openai-whisper
-  pip install coremltools
-  ```
-
-  - To ensure `coremltools` operates correctly, please confirm that [Xcode](https://developer.apple.com/xcode/) is installed and execute `xcode-select --install` to install the command-line tools.
-  - Python 3.11 is recommended.
-  - MacOS Sonoma (version 14) or newer is recommended, as older versions of MacOS might experience issues with transcription hallucination.
-  - [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step:
-    - To create an environment, use: `conda create -n py311-whisper python=3.11 -y`
-    - To activate the environment, use: `conda activate py311-whisper`
-
-- Generate a Core ML model. For example, to generate a `base.en` model, use:
-
-  ```bash
-  ./models/generate-coreml-model.sh base.en
-  ```
-
-  This will generate the folder `models/ggml-base.en-encoder.mlmodelc`
-
-- Build `whisper.cpp` with Core ML support:
-
-  ```bash
-  # using CMake
-  cmake -B build -DWHISPER_COREML=1
-  cmake --build build -j --config Release
-  ```
-
-- Run the examples as usual. For example:
-
-  ```text
-  $ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav
-
-  ...
-
-  whisper_init_state: loading Core ML model from 'models/ggml-base.en-encoder.mlmodelc'
-  whisper_init_state: first run on a device may take a while ...
-  whisper_init_state: Core ML model loaded
-
-  system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | COREML = 1 |
-
-  ...
-  ```
-
-  The first run on a device is slow, since the ANE service compiles the Core ML model to some device-specific format.
-  Next runs are faster.
-
-For more information about the Core ML implementation please refer to PR [#566](https://github.com/ggml-org/whisper.cpp/pull/566).
-
-## OpenVINO support
-
-On platforms that support [OpenVINO](https://github.com/openvinotoolkit/openvino), the Encoder inference can be executed
-on OpenVINO-supported devices including x86 CPUs and Intel GPUs (integrated & discrete).
-
-This can result in significant speedup in encoder performance. Here are the instructions for generating the OpenVINO model and using it with `whisper.cpp`:
-
-- First, setup python virtual env. and install python dependencies. Python 3.10 is recommended.
-
-  Windows:
-
-  ```powershell
-  cd models
-  python -m venv openvino_conv_env
-  openvino_conv_env\Scripts\activate
-  python -m pip install --upgrade pip
-  pip install -r requirements-openvino.txt
-  ```
-
-  Linux and macOS:
-
-  ```bash
-  cd models
-  python3 -m venv openvino_conv_env
-  source openvino_conv_env/bin/activate
-  python -m pip install --upgrade pip
-  pip install -r requirements-openvino.txt
-  ```
-
-- Generate an OpenVINO encoder model. For example, to generate a `base.en` model, use:
-
-  ```
-  python convert-whisper-to-openvino.py --model base.en
-  ```
-
-  This will produce ggml-base.en-encoder-openvino.xml/.bin IR model files. It's recommended to relocate these to the same folder as `ggml` models, as that
-  is the default location that the OpenVINO extension will search at runtime.
-
-- Build `whisper.cpp` with OpenVINO support:
-
-  Download OpenVINO package from [release page](https://github.com/openvinotoolkit/openvino/releases). The recommended version to use is [2024.6.0](https://github.com/openvinotoolkit/openvino/releases/tag/2024.6.0). Ready to use Binaries of the required libraries can be found in the [OpenVino Archives](https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/)
-
-  After downloading & extracting package onto your development system, set up required environment by sourcing setupvars script. For example:
-
-  Linux:
-
-  ```bash
-  source /path/to/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/setupvars.sh
-  ```
-
-  Windows (cmd):
-
-  ```powershell
-  C:\Path\To\w_openvino_toolkit_windows_2023.0.0.10926.b4452d56304_x86_64\setupvars.bat
-  ```
-
-  And then build the project using cmake:
-
-  ```bash
-  cmake -B build -DWHISPER_OPENVINO=1
-  cmake --build build -j --config Release
-  ```
-
-- Run the examples as usual. For example:
-
-  ```text
-  $ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav
-
-  ...
-
-  whisper_ctx_init_openvino_encoder: loading OpenVINO model from 'models/ggml-base.en-encoder-openvino.xml'
-  whisper_ctx_init_openvino_encoder: first run on a device may take a while ...
-  whisper_openvino_init: path_model = models/ggml-base.en-encoder-openvino.xml, device = GPU, cache_dir = models/ggml-base.en-encoder-openvino-cache
-  whisper_ctx_init_openvino_encoder: OpenVINO model loaded
-
-  system_info: n_threads = 4 / 8 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | COREML = 0 | OPENVINO = 1 |
-
-  ...
-  ```
-
-  The first time run on an OpenVINO device is slow, since the OpenVINO framework will compile the IR (Intermediate Representation) model to a device-specific 'blob'. This device-specific blob will get
-  cached for the next run.
-
-For more information about the OpenVINO implementation please refer to PR [#1037](https://github.com/ggml-org/whisper.cpp/pull/1037).
-
-## NVIDIA GPU support
-
-With NVIDIA cards the processing of the models is done efficiently on the GPU via cuBLAS and custom CUDA kernels.
-First, make sure you have installed `cuda`: https://developer.nvidia.com/cuda-downloads
-
-Now build `whisper.cpp` with CUDA support:
-
-```
-cmake -B build -DGGML_CUDA=1
-cmake --build build -j --config Release
-```
-
-or for newer NVIDIA GPU's (RTX 5000 series):
-```
-cmake -B build -DGGML_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="86"
-cmake --build build -j --config Release
-```
-
-## Vulkan GPU support
-Cross-vendor solution which allows you to accelerate workload on your GPU.
-First, make sure your graphics card driver provides support for Vulkan API.
-
-Now build `whisper.cpp` with Vulkan support:
-```
-cmake -B build -DGGML_VULKAN=1
-cmake --build build -j --config Release
-```
-
-## BLAS CPU support via OpenBLAS
-
-Encoder processing can be accelerated on the CPU via OpenBLAS.
-First, make sure you have installed `openblas`: https://www.openblas.net/
-
-Now build `whisper.cpp` with OpenBLAS support:
-
-```
-cmake -B build -DGGML_BLAS=1
-cmake --build build -j --config Release
-```
-
-## Ascend NPU support
-
-Ascend NPU provides inference acceleration via [`CANN`](https://www.hiascend.com/en/software/cann) and AI cores.
-
-First, check if your Ascend NPU device is supported:
-
-**Verified devices**
-| Ascend NPU                    | Status  |
-|:-----------------------------:|:-------:|
-| Atlas 300T A2                 | Support |
-| Atlas 300I Duo                | Support |
-
-Then, make sure you have installed [`CANN toolkit`](https://www.hiascend.com/en/software/cann/community) . The lasted version of CANN is recommanded.
-
-Now build `whisper.cpp` with CANN support:
-
-```
-cmake -B build -DGGML_CANN=1
-cmake --build build -j --config Release
-```
-
-Run the inference examples as usual, for example:
+# Download the tiny.en model (~75 MB) for a fast smoke test
+./models/download-ggml-model.sh tiny.en
 
+# Or grab any ggml-*.bin from https://huggingface.co/ggerganov/whisper.cpp
 ```
-./build/bin/whisper-cli -f samples/jfk.wav -m models/ggml-base.en.bin -t 8
-```
-
-*Notes:*
-
-- If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
-- If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
 
-## Moore Threads GPU support
-
-With Moore Threads cards the processing of the models is done efficiently on the GPU via muBLAS and custom MUSA kernels.
-First, make sure you have installed `MUSA SDK rc4.2.0`: https://developer.mthreads.com/sdk/download/musa?equipment=&os=&driverVersion=&version=4.2.0
-
-Now build `whisper.cpp` with MUSA support:
-
-```
-cmake -B build -DGGML_MUSA=1
-cmake --build build -j --config Release
-```
-
-or specify the architecture for your Moore Threads GPU. For example, if you have a MTT S80 GPU, you can specify the architecture as follows:
-
-```
-cmake -B build -DGGML_MUSA=1 -DMUSA_ARCHITECTURES="21"
-cmake --build build -j --config Release
-```
-
-## FFmpeg support (Linux only)
-
-If you want to support more audio formats (such as Opus and AAC), you can turn on the `WHISPER_FFMPEG` build flag to enable FFmpeg integration.
-
-First, you need to install required libraries:
+### 2. Transcribe the bundled sample
 
 ```bash
-# Debian/Ubuntu
-sudo apt install libavcodec-dev libavformat-dev libavutil-dev
+# Linux
+./whisper-cli -m models/ggml-tiny.en.bin -f samples/jfk.wav
 
-# RHEL/Fedora
-sudo dnf install libavcodec-free-devel libavformat-free-devel libavutil-free-devel
+# Windows
+whisper-cli.exe -m models\ggml-tiny.en.bin -f samples\jfk.wav
 ```
 
-Then you can build the project as follows:
-
-```bash
-cmake -B build -D WHISPER_FFMPEG=yes
-cmake --build build
-```
+Expected: a transcription of the JFK "Ask not what your country can do for you" excerpt.
 
-Run the following example to confirm it's working:
+### 3. Verify GPU is active (ROCm)
 
 ```bash
-# Convert an audio file to Opus format
-ffmpeg -i samples/jfk.wav jfk.opus
-
-# Transcribe the audio file
-./build/bin/whisper-cli --model models/ggml-base.en.bin --file jfk.opus
-```
-
-## Docker
-
-### Prerequisites
-
-- Docker must be installed and running on your system.
-- Create a folder to store big models & intermediate files (ex. /whisper/models)
-
-### Images
-
-We have multiple Docker images available for this project:
-
-1. `ghcr.io/ggml-org/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
-2. `ghcr.io/ggml-org/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
-3. `ghcr.io/ggml-org/whisper.cpp:main-musa`: Same as `main` but compiled with MUSA support. (platforms: `linux/amd64`)
-4. `ghcr.io/ggml-org/whisper.cpp:main-vulkan`: Same as `main` but compiled with Vulkan support. (platforms: `linux/amd64`)
-
-### Usage
-
-```shell
-# download model and persist it in a local folder
-docker run -it --rm \
-  -v path/to/models:/models \
-  whisper.cpp:main "./models/download-ggml-model.sh base /models"
-
-# transcribe an audio file
-docker run -it --rm \
-  -v path/to/models:/models \
-  -v path/to/audios:/audios \
-  whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f /audios/jfk.wav"
-
-# transcribe an audio file in samples folder
-docker run -it --rm \
-  -v path/to/models:/models \
-  whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f ./samples/jfk.wav"
-
-# run the web server
-docker run -it --rm -p "8080:8080" \
-  -v path/to/models:/models \
-  whisper.cpp:main "whisper-server --host 127.0.0.1 -m /models/ggml-base.bin"
-  
-# run the bench too on the small.en model using 4 threads
-docker run -it --rm \
-  -v path/to/models:/models \
-  whisper.cpp:main "whisper-bench -m /models/ggml-small.en.bin -t 4"
+# At startup whisper-cli prints the backend in use — look for:
+#   ggml_hip: using device ...
+./whisper-cli -m models/ggml-tiny.en.bin -f samples/jfk.wav 2>&1 | grep -i "hip\|rocm\|device"
 ```
 
-## Installing with Conan
-
-You can install pre-built binaries for whisper.cpp or build it from source using [Conan](https://conan.io/). Use the following command:
+### 4. Verify NPU is active (VitisAI)
 
 ```
-conan install --requires="whisper-cpp/[*]" --build=missing
+# Place the .rai encoder alongside the .bin model, then run normally.
+# Look for this line in stdout:
+#   whisper_vitisai_encode: Vitis AI model inference completed.
+whisper-cli.exe -m models\ggml-tiny.en.bin -f samples\jfk.wav
 ```
 
-For detailed instructions on how to use Conan, please refer to the [Conan documentation](https://docs.conan.io/2/).
-
-## Limitations
-
-- Inference only
-
-## Real-time audio input example
-
-This is a naive example of performing real-time inference on audio from your microphone.
-The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continuously.
-More info is available in [issue #10](https://github.com/ggml-org/whisper.cpp/issues/10).
-You will need to have [sdl2](https://wiki.libsdl.org/SDL2/Installation) installed for it to work properly.
+### 5. Verify portability (Linux ROCm)
 
 ```bash
-cmake -B build -DWHISPER_SDL2=ON
-cmake --build build -j --config Release
-./build/bin/whisper-stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
+# ROCm runtime libs are bundled — RPATH should point to $ORIGIN (same dir as binary)
+readelf -d whisper-cli | grep RPATH    # -> $ORIGIN
+ldd whisper-cli | grep "not found"     # -> (empty — all deps resolved locally)
 ```
 
-https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
+---
 
-## Confidence color-coding
+## 🔄 Release Cadence
 
-Adding the `--print-colors` argument will print the transcribed text using an experimental color coding strategy
-to highlight words with high or low confidence:
+Releases are fully automated and mirror upstream whisper.cpp releases with no manual steps:
 
-```bash
-./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/gb0.wav --print-colors
 ```
-
-<img width="965" alt="image" src="https://user-images.githubusercontent.com/1991296/197356445-311c8643-9397-4e5e-b46e-0b4b4daa2530.png">
-
-## Controlling the length of the generated text segments (experimental)
-
-For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`:
-
-```text
-$ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16
-
-whisper_model_load: loading model from './models/ggml-base.en.bin'
-...
-system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
-
-main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
-
-[00:00:00.000 --> 00:00:00.850]   And so my
-[00:00:00.850 --> 00:00:01.590]   fellow
-[00:00:01.590 --> 00:00:04.140]   Americans, ask
-[00:00:04.140 --> 00:00:05.660]   not what your
-[00:00:05.660 --> 00:00:06.840]   country can do
-[00:00:06.840 --> 00:00:08.430]   for you, ask
-[00:00:08.430 --> 00:00:09.440]   what you can do
-[00:00:09.440 --> 00:00:10.020]   for your
-[00:00:10.020 --> 00:00:11.000]   country.
+upstream whisper.cpp releases vX.Y.Z
+            |
+            v  (detected within 24 h by daily sync job)
+  sync.yml merges upstream into main, pushes tag vX.Y.Z
+            |
+            v  (tag push triggers build pipeline)
+  build.yml builds all backend/OS combinations in parallel
+            |
+            v
+  GitHub Release: "whisper.cpp vX.Y.Z — AMD Builds"
+  with 13 artifacts across all backends and OS targets
 ```
 
-## Word-level timestamp (experimental)
-
-The `--max-len` argument can be used to obtain word-level timestamps. Simply use `-ml 1`:
-
-```text
-$ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1
-
-whisper_model_load: loading model from './models/ggml-base.en.bin'
-...
-system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
-
-main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
-
-[00:00:00.000 --> 00:00:00.320]
-[00:00:00.320 --> 00:00:00.370]   And
-[00:00:00.370 --> 00:00:00.690]   so
-[00:00:00.690 --> 00:00:00.850]   my
-[00:00:00.850 --> 00:00:01.590]   fellow
-[00:00:01.590 --> 00:00:02.850]   Americans
-[00:00:02.850 --> 00:00:03.300]  ,
-[00:00:03.300 --> 00:00:04.140]   ask
-[00:00:04.140 --> 00:00:04.990]   not
-[00:00:04.990 --> 00:00:05.410]   what
-[00:00:05.410 --> 00:00:05.660]   your
-[00:00:05.660 --> 00:00:06.260]   country
-[00:00:06.260 --> 00:00:06.600]   can
-[00:00:06.600 --> 00:00:06.840]   do
-[00:00:06.840 --> 00:00:07.010]   for
-[00:00:07.010 --> 00:00:08.170]   you
-[00:00:08.170 --> 00:00:08.190]  ,
-[00:00:08.190 --> 00:00:08.430]   ask
-[00:00:08.430 --> 00:00:08.910]   what
-[00:00:08.910 --> 00:00:09.040]   you
-[00:00:09.040 --> 00:00:09.320]   can
-[00:00:09.320 --> 00:00:09.440]   do
-[00:00:09.440 --> 00:00:09.760]   for
-[00:00:09.760 --> 00:00:10.020]   your
-[00:00:10.020 --> 00:00:10.510]   country
-[00:00:10.510 --> 00:00:11.000]  .
-```
+**Every release ships up to 14 artifacts:**
 
-## Speaker segmentation via tinydiarize (experimental)
-
-More information about this approach is available here: https://github.com/ggml-org/whisper.cpp/pull/1058
-
-Sample usage:
-
-```py
-# download a tinydiarize compatible model
-./models/download-ggml-model.sh small.en-tdrz
-
-# run as usual, adding the "-tdrz" command-line argument
-./build/bin/whisper-cli -f ./samples/a13.wav -m ./models/ggml-small.en-tdrz.bin -tdrz
-...
-main: processing './samples/a13.wav' (480000 samples, 30.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, tdrz = 1, timestamps = 1 ...
-...
-[00:00:00.000 --> 00:00:03.800]   Okay Houston, we've had a problem here. [SPEAKER_TURN]
-[00:00:03.800 --> 00:00:06.200]   This is Houston. Say again please. [SPEAKER_TURN]
-[00:00:06.200 --> 00:00:08.260]   Uh Houston we've had a problem.
-[00:00:08.260 --> 00:00:11.320]   We've had a main beam up on a volt. [SPEAKER_TURN]
-[00:00:11.320 --> 00:00:13.820]   Roger main beam interval. [SPEAKER_TURN]
-[00:00:13.820 --> 00:00:15.100]   Uh uh [SPEAKER_TURN]
-[00:00:15.100 --> 00:00:18.020]   So okay stand, by thirteen we're looking at it. [SPEAKER_TURN]
-[00:00:18.020 --> 00:00:25.740]   Okay uh right now uh Houston the uh voltage is uh is looking good um.
-[00:00:27.620 --> 00:00:29.940]   And we had a a pretty large bank or so.
 ```
-
-## Karaoke-style movie generation (experimental)
-
-The [whisper-cli](examples/cli) example provides support for output of karaoke-style movies, where the
-currently pronounced word is highlighted. Use the `-owts` argument and run the generated bash script.
-This requires to have `ffmpeg` installed.
-
-Here are a few _"typical"_ examples:
-
-```bash
-./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts
-source ./samples/jfk.wav.wts
-ffplay ./samples/jfk.wav.mp4
+whisper-{version}-linux-rocm-gfx1151.tar.gz
+whisper-{version}-linux-rocm-gfx1150.tar.gz
+whisper-{version}-linux-rocm-gfx120X.tar.gz
+whisper-{version}-linux-rocm-gfx110X.tar.gz
+whisper-{version}-windows-rocm-gfx1151.zip
+whisper-{version}-windows-rocm-gfx1150.zip
+whisper-{version}-windows-rocm-gfx120X.zip
+whisper-{version}-windows-rocm-gfx110X.zip
+whisper-{version}-linux-vulkan-x86_64.tar.gz
+whisper-{version}-windows-vulkan-x64.zip
+whisper-{version}-windows-npu-x64.zip         (may be absent if NPU runner offline)
+whisper-{version}-linux-cpu-x86_64.tar.gz
+whisper-{version}-windows-cpu-x64.zip
+whisper-{version}-darwin-metal-arm64.tar.gz
 ```
 
-https://user-images.githubusercontent.com/1991296/199337465-dbee4b5e-9aeb-48a3-b1c6-323ac4db5b2c.mp4
+> [!TIP]
+> **Linux APU out of VRAM despite free memory (gfx1150 / gfx1151)?**
+> Add `ttm.pages_limit=12582912` to your kernel command line (e.g. in GRUB), run `update-grub`, and reboot.
+> See the [TheRock FAQ](https://github.com/ROCm/TheRock/blob/main/docs/faq.md#gfx1151-strix-halo-specific-questions) for details.
 
 ---
 
-```bash
-./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts
-source ./samples/mm0.wav.wts
-ffplay ./samples/mm0.wav.mp4
-```
+## 🖥️ Local Builds (Windows)
 
-https://user-images.githubusercontent.com/1991296/199337504-cc8fd233-0cb7-4920-95f9-4227de3570aa.mp4
+Reproduce any CI build locally using the bundled PowerShell script. Produces identical artifacts to what CI publishes.
 
----
-
-```bash
-./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts
-source ./samples/gb0.wav.wts
-ffplay ./samples/gb0.wav.mp4
-```
+```powershell
+# Prerequisites: CMake, VS Build Tools 2022, 7-Zip, internet access
 
-https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a0cd-f28a317987ba.mp4
+# CPU only (~2 min, no GPU needed)
+.\scripts\local-build.ps1 -Backend cpu
 
----
+# Vulkan — requires Vulkan SDK from https://vulkan.lunarg.com
+.\scripts\local-build.ps1 -Backend vulkan
 
-## Video comparison of different models
+# ROCm for RDNA3 iGPU — downloads ROCm tarball (~2-4 GB, cached after first run)
+.\scripts\local-build.ps1 -Backend rocm -GfxTarget gfx1151
 
-Use the [scripts/bench-wts.sh](https://github.com/ggml-org/whisper.cpp/blob/master/scripts/bench-wts.sh) script to generate a video in the following format:
+# NPU — requires RyzenAI hardware + NPU driver >= .280
+.\scripts\local-build.ps1 -Backend npu
 
-```bash
-./scripts/bench-wts.sh samples/jfk.wav
-ffplay ./samples/jfk.wav.all.mp4
+# All backends, version-stamped artifacts placed in .\dist\
+.\scripts\local-build.ps1 -Backend all -Version 1.8.4
 ```
 
-https://user-images.githubusercontent.com/1991296/223206245-2d36d903-cf8e-4f09-8c3b-eb9f9c39d6fc.mp4
-
 ---
 
-## Benchmarks
-
-In order to have an objective comparison of the performance of the inference across different system configurations,
-use the [whisper-bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it
-took to execute it. The results are summarized in the following Github issue:
-
-[Benchmark results](https://github.com/ggml-org/whisper.cpp/issues/89)
+## 📦 Dependencies
 
-Additionally a script to run whisper.cpp with different models and audio files is provided [bench.py](scripts/bench.py).
+### Bundled in every release (no installation needed)
 
-You can run it with the following command, by default it will run against any standard model in the models folder.
+| Backend | What is included |
+|---|---|
+| ROCm | `amdhip64`, `rocblas`, `hipblaslt` + library data, LLVM runtime, all system deps; RPATH=`$ORIGIN` on Linux |
+| Vulkan | SPIR-V shaders embedded at build time; links against system Vulkan loader |
+| Metal | Uses macOS system Metal framework; no extra bundling needed |
+| NPU | FlexML Runtime DLLs (`flexmlrt/bin` + `flexmlrt/lib`) |
+| CPU | SDL2.dll included on Windows |
 
-```bash
-python3 scripts/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2
-```
+### Build-time only
 
-It is written in python with the intention of being easy to modify and extend for your benchmarking use case.
-
-It outputs a csv file with the results of the benchmarking.
-
-## `ggml` format
-
-The original models are converted to a custom binary format. This allows to pack everything needed into a single file:
-
-- model parameters
-- mel filters
-- vocabulary
-- weights
-
-You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script
-or manually from here:
-
-- https://huggingface.co/ggerganov/whisper.cpp
-
-For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or [models/README.md](models/README.md).
-
-## [Bindings](https://github.com/ggml-org/whisper.cpp/discussions/categories/bindings)
-
-- [x] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggml-org/whisper.cpp/discussions/310)
-- [x] JavaScript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggml-org/whisper.cpp/discussions/309)
-  - React Native (iOS / Android): [whisper.rn](https://github.com/mybigday/whisper.rn)
-- [x] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggml-org/whisper.cpp/discussions/312)
-- [x] Java:
-  - [GiviMAD/whisper-jni](https://github.com/GiviMAD/whisper-jni)
-- [x] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggml-org/whisper.cpp/discussions/507)
-- [x] Objective-C / Swift: [ggml-org/whisper.spm](https://github.com/ggml-org/whisper.spm) | [#313](https://github.com/ggml-org/whisper.cpp/discussions/313)
-  - [exPHAT/SwiftWhisper](https://github.com/exPHAT/SwiftWhisper)
-- [x] .NET: | [#422](https://github.com/ggml-org/whisper.cpp/discussions/422)
-  - [sandrohanea/whisper.net](https://github.com/sandrohanea/whisper.net)
-  - [NickDarvey/whisper](https://github.com/NickDarvey/whisper)
-- [x] Python: | [#9](https://github.com/ggml-org/whisper.cpp/issues/9)
-  - [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython)
-  - [AIWintermuteAI/whispercpp](https://github.com/AIWintermuteAI/whispercpp) (Updated fork of aarnphm/whispercpp)
-  - [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11)
-  - [abdeladim-s/pywhispercpp](https://github.com/abdeladim-s/pywhispercpp) (Pybind11)
-- [x] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper)
-- [x] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity)
-
-## XCFramework
-The XCFramework is a precompiled version of the library for iOS, visionOS, tvOS,
-and macOS. It can be used in Swift projects without the need to compile the
-library from source. For example, the v1.7.5 version of the XCFramework can be
-used as follows:
-
-```swift
-// swift-tools-version: 5.10
-// The swift-tools-version declares the minimum version of Swift required to build this package.
-
-import PackageDescription
-
-let package = Package(
-    name: "Whisper",
-    targets: [
-        .executableTarget(
-            name: "Whisper",
-            dependencies: [
-                "WhisperFramework"
-            ]),
-        .binaryTarget(
-            name: "WhisperFramework",
-            url: "https://github.com/ggml-org/whisper.cpp/releases/download/v1.7.5/whisper-v1.7.5-xcframework.zip",
-            checksum: "c7faeb328620d6012e130f3d705c51a6ea6c995605f2df50f6e1ad68c59c6c4a"
-        )
-    ]
-)
-```
+| Tool | Purpose |
+|---|---|
+| [whisper.cpp](https://github.com/ggerganov/whisper.cpp) | Upstream source |
+| [ROCm / TheRock](https://github.com/ROCm/TheRock) | HIP compiler + GPU runtime (tarball, not installed globally) |
+| [FlexML Runtime](https://github.com/lemonade-sdk/whisper.cpp/releases/tag/deps) | VitisAI NPU inference |
+| [Vulkan SDK](https://vulkan.lunarg.com/sdk/home) | GLSL to SPIR-V shader compilation |
+| [CMake >= 3.21](https://cmake.org/) | Build system |
+| [Ninja](https://ninja-build.org/) | Fast build backend (ROCm builds) |
+| [VS Build Tools 2022](https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2022) | Windows MSVC toolchain |
 
-## Voice Activity Detection (VAD)
-Support for Voice Activity Detection (VAD) can be enabled using the `--vad`
-argument to `whisper-cli`. In addition to this option a VAD model is also
-required.
-
-The way this works is that first the audio samples are passed through
-the VAD model which will detect speech segments. Using this information,
-only the speech segments that are detected are extracted from the original audio
-input and passed to whisper for processing. This reduces the amount of audio
-data that needs to be processed by whisper and can significantly speed up the
-transcription process.
-
-The following VAD models are currently supported:
-
-### Silero-VAD
-[Silero-vad](https://github.com/snakers4/silero-vad) is a lightweight VAD model
-written in Python that is fast and accurate.
-
-Models can be downloaded by running the following command on Linux or MacOS:
-```console
-$ ./models/download-vad-model.sh silero-v6.2.0
-Downloading ggml model silero-v6.2.0 from 'https://huggingface.co/ggml-org/whisper-vad' ...
-ggml-silero-v6.2.0.bin        100%[==============================================>] 864.35K  --.-KB/s    in 0.04s
-Done! Model 'silero-v6.2.0' saved in '/path/models/ggml-silero-v6.2.0.bin'
-You can now use it like this:
+---
 
-  $ ./build/bin/whisper-cli -vm /path/models/ggml-silero-v6.2.0.bin --vad -f samples/jfk.wav -m models/ggml-base.en.bin
+## 🏗️ Repository Structure
 
 ```
-And the following command on Windows:
-```console
-> .\models\download-vad-model.cmd silero-v6.2.0
-Downloading vad model silero-v6.2.0...
-Done! Model silero-v6.2.0 saved in C:\Users\danie\work\ai\whisper.cpp\ggml-silero-v6.2.0.bin
-You can now use it like this:
-
-C:\path\build\bin\Release\whisper-cli.exe -vm C:\path\ggml-silero-v6.2.0.bin --vad -m models/ggml-base.en.bin -f samples\jfk.wav
-
+whisper.cpp-rocm/
+├── .github/
+│   └── workflows/
+│       ├── build.yml           # All AMD backends — builds + publishes releases
+│       └── sync.yml            # Daily upstream sync + auto-tagging
+├── ci/
+│   ├── resolve-rocm-version.sh    # Resolves AMD tarball URL for a given ROCm version
+│   └── map-gpu-target.sh          # Maps gfx110X/gfx120X shorthands to specific arch lists
+├── src/
+│   └── vitisai/
+│       ├── whisper-vitisai-encoder.h    # VitisAI NPU encoder C interface
+│       └── whisper-vitisai-encoder.cpp  # FlexML runtime integration
+├── scripts/
+│   └── local-build.ps1         # Local Windows build script (mirrors CI jobs exactly)
+├── ggml/                       # GGML library (all GPU backends live here)
+├── src/                        # whisper.cpp source (VitisAI hooks added)
+└── CMakeLists.txt              # Adds -DWHISPER_VITISAI option
 ```
 
-To see a list of all available models, run the above commands without any
-arguments.
+---
 
-This model can be also be converted manually to ggml using the following command:
-```console
-$ python3 -m venv venv && source venv/bin/activate
-$ (venv) pip install silero-vad
-$ (venv) $ python models/convert-silero-vad-to-ggml.py --output models/silero.bin
-Saving GGML Silero-VAD model to models/silero-v6.2.0-ggml.bin
-```
-And it can then be used with whisper as follows:
-```console
-$ ./build/bin/whisper-cli \
-   --file ./samples/jfk.wav \
-   --model ./models/ggml-base.en.bin \
-   --vad \
-   --vad-model ./models/silero-v6.2.0-ggml.bin
-```
+## 📄 License
+
+This project is licensed under the MIT License — see [LICENSE](LICENSE) for details.
 
-### VAD Options
-
-* --vad-threshold: Threshold probability for speech detection. A probability
-for a speech segment/frame above this threshold will be considered as speech.
-
-* --vad-min-speech-duration-ms: Minimum speech duration in milliseconds. Speech
-segments shorter than this value will be discarded to filter out brief noise or
-false positives.
-
-* --vad-min-silence-duration-ms: Minimum silence duration in milliseconds. Silence
-periods must be at least this long to end a speech segment. Shorter silence
-periods will be ignored and included as part of the speech.
-
-* --vad-max-speech-duration-s: Maximum speech duration in seconds. Speech segments
-longer than this will be automatically split into multiple segments at silence
-points exceeding 98ms to prevent excessively long segments.
-
-* --vad-speech-pad-ms: Speech padding in milliseconds. Adds this amount of padding
-before and after each detected speech segment to avoid cutting off speech edges.
-
-* --vad-samples-overlap: Amount of audio to extend from each speech segment into
-the next one, in seconds (e.g., 0.10 = 100ms overlap). This ensures speech isn't
-cut off abruptly between segments when they're concatenated together.
-
-## Examples
-
-There are various examples of using the library for different projects in the [examples](examples) folder.
-Some of the examples are even ported to run in the browser using WebAssembly. Check them out!
-
-| Example                                             | Web                                   | Description                                                                                                                     |
-| --------------------------------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
-| [whisper-cli](examples/cli)                         | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper                                                                       |
-| [whisper-bench](examples/bench)                     | [bench.wasm](examples/bench.wasm)     | Benchmark the performance of Whisper on your machine                                                                            |
-| [whisper-stream](examples/stream)                   | [stream.wasm](examples/stream.wasm)   | Real-time transcription of raw microphone capture                                                                               |
-| [whisper-command](examples/command)                 | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic                                                         |
-| [whisper-server](examples/server)                   |                                       | HTTP transcription server with OAI-like API                                                                                     |
-| [whisper-talk-llama](examples/talk-llama)           |                                       | Talk with a LLaMA bot                                                                                                           |
-| [whisper.objc](examples/whisper.objc)               |                                       | iOS mobile application using whisper.cpp                                                                                        |
-| [whisper.swiftui](examples/whisper.swiftui)         |                                       | SwiftUI iOS / macOS application using whisper.cpp                                                                               |
-| [whisper.android](examples/whisper.android)         |                                       | Android mobile application using whisper.cpp                                                                                    |
-| [whisper.nvim](examples/whisper.nvim)               |                                       | Speech-to-text plugin for Neovim                                                                                                |
-| [generate-karaoke.sh](examples/generate-karaoke.sh) |                                       | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture                           |
-| [livestream.sh](examples/livestream.sh)             |                                       | [Livestream audio transcription](https://github.com/ggml-org/whisper.cpp/issues/185)                                            |
-| [yt-wsp.sh](examples/yt-wsp.sh)                     |                                       | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
-| [wchess](examples/wchess)                           | [wchess.wasm](examples/wchess)        | Voice-controlled chess                                                                                                          |
-
-## [Discussions](https://github.com/ggml-org/whisper.cpp/discussions)
-
-If you have any kind of feedback about this project feel free to use the Discussions section and open a new topic.
-You can use the [Show and tell](https://github.com/ggml-org/whisper.cpp/discussions/categories/show-and-tell) category
-to share your own projects that use `whisper.cpp`. If you have a question, make sure to check the
-[Frequently asked questions (#126)](https://github.com/ggml-org/whisper.cpp/discussions/126) discussion.
+whisper.cpp is copyright Georgi Gerganov and contributors — [ggerganov/whisper.cpp](https://github.com/ggerganov/whisper.cpp).
+ROCm is copyright Advanced Micro Devices, Inc.
+VitisAI encoder copyright 2025 Advanced Micro Devices, Inc.
diff --git a/bindings/ruby/ext/ruby_whisper_context.c b/bindings/ruby/ext/ruby_whisper_context.c
index 6e38ead6321..9f326c47a5b 100644
--- a/bindings/ruby/ext/ruby_whisper_context.c
+++ b/bindings/ruby/ext/ruby_whisper_context.c
@@ -308,7 +308,7 @@ check_memory_view(rb_memory_view_t *memview)
     rb_warn("currently only format \"f\" is supported for MemoryView, but given: %s", memview->format);
     return false;
   }
-  if (memview->format != NULL && memview->ndim != 1) {
+  if (memview->ndim != 1) {
     rb_warn("currently only 1 dimensional MemoryView is supported, but given: %zd", memview->ndim);
     return false;
   }
diff --git a/ci/map-gpu-target.sh b/ci/map-gpu-target.sh
new file mode 100755
index 00000000000..1e7de7c9fcf
--- /dev/null
+++ b/ci/map-gpu-target.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# Map a GFX target shorthand to specific GPU architectures for CMake.
+#
+# Usage:
+#   source ci/map-gpu-target.sh <gfx_target>
+#
+# Arguments:
+#   gfx_target - GPU target (gfx1151, gfx1150, gfx110X, gfx120X, or specific)
+#
+# Outputs (exported):
+#   MAPPED_GPU_TARGET - Semicolon-separated list of GPU architectures
+
+gfx_target="$1"
+
+if [ -z "$gfx_target" ]; then
+    echo "Usage: source ci/map-gpu-target.sh <gfx_target>"
+    return 1 2>/dev/null || exit 1
+fi
+
+case "$gfx_target" in
+    gfx110X)  MAPPED_GPU_TARGET="gfx1100;gfx1101;gfx1102" ;;
+    gfx120X)  MAPPED_GPU_TARGET="gfx1200;gfx1201" ;;
+    *)        MAPPED_GPU_TARGET="$gfx_target" ;;
+esac
+
+export MAPPED_GPU_TARGET
+echo "Mapped GPU target: $gfx_target -> $MAPPED_GPU_TARGET"
diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh
new file mode 100755
index 00000000000..cf3bccbe778
--- /dev/null
+++ b/ci/resolve-rocm-version.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+#
+# Resolve the ROCm tarball URL for a given platform and version.
+#
+# Uses AMD's official repo tarball distribution:
+#   https://repo.amd.com/rocm/tarball/therock-dist-{platform}-{gfx_target}-{version}.tar.gz
+#
+# Usage:
+#   source ci/resolve-rocm-version.sh <platform> <gfx_target> <rocm_version>
+#
+# Arguments:
+#   platform      - "linux" or "windows"
+#   gfx_target    - GPU target (defaults to gfx1151 if not specified or is a group target)
+#   rocm_version  - Specific version (e.g. 7.12.0, 7.2.1) - required, no "latest" auto-detection
+#
+# Outputs (exported):
+#   ROCM_RESOLVED_VERSION - The resolved version string
+#   ROCM_TARBALL_URL      - The full URL to download
+
+platform="$1"
+gfx_target="$2"
+rocm_version="$3"
+
+if [ -z "$platform" ] || [ -z "$gfx_target" ] || [ -z "$rocm_version" ]; then
+    echo "Usage: source ci/resolve-rocm-version.sh <platform> <gfx_target> <rocm_version>"
+    return 1 2>/dev/null || exit 1
+fi
+
+# Validate that a specific version was provided (no "latest" auto-detection)
+if [ "$rocm_version" = "latest" ]; then
+    echo "ERROR: 'latest' auto-detection is not supported."
+    echo "Please specify a concrete ROCm version (e.g., 7.12.0, 7.2.1)."
+    echo "Available versions: https://repo.amd.com/rocm/tarball/"
+    return 1 2>/dev/null || exit 1
+fi
+
+# Validate version format (should be X.Y.Z or X.Y.ZaNNNNNNNN pattern)
+if ! echo "$rocm_version" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then
+    echo "ERROR: Invalid ROCm version format: '$rocm_version'"
+    echo "Expected format: X.Y.Z (e.g., 7.12.0) or X.Y.ZaNNNNNNNN (e.g., 7.11.0a20251205)"
+    return 1 2>/dev/null || exit 1
+fi
+
+# Exact tarball names published at repo.amd.com/rocm/tarball/ for 7.12.0:
+#   linux:   gfx110X-all, gfx120X-all, gfx1150, gfx1151, gfx1152
+#   windows: gfx110X-all, gfx120X-all, gfx1150, gfx1151, gfx1152
+case "$gfx_target" in
+    gfx110X)  tarball_target="gfx110X-all" ;;
+    gfx120X)  tarball_target="gfx120X-all" ;;
+    gfx1150)  tarball_target="gfx1150"     ;;
+    gfx1151)  tarball_target="gfx1151"     ;;
+    gfx1152)  tarball_target="gfx1152"     ;;
+    *)        tarball_target="$gfx_target" ;;
+esac
+
+# Construct the AMD official repo URL
+ROCM_TARBALL_URL="https://repo.amd.com/rocm/tarball/therock-dist-${platform}-${tarball_target}-${rocm_version}.tar.gz"
+
+export ROCM_RESOLVED_VERSION="$rocm_version"
+echo "ROCm version: $ROCM_RESOLVED_VERSION"
+echo "ROCm URL: $ROCM_TARBALL_URL"
diff --git a/ci/run.sh b/ci/run.sh
index cbe28442e16..9f6d73d9c04 100644
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -79,6 +79,13 @@ if [ ! -z ${GG_BUILD_ROCM} ]; then
     fi
 
     CMAKE_EXTRA="${CMAKE_EXTRA} -DAMDGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS}"
+
+    # Set HIP environment if not already set
+    export HIP_PLATFORM=${HIP_PLATFORM:-amd}
+    export ROCM_PATH=${ROCM_PATH:-/opt/rocm}
+    export HIP_PATH=${HIP_PATH:-/opt/rocm}
+    export LD_LIBRARY_PATH=${ROCM_PATH}/lib:${LD_LIBRARY_PATH}
+    CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_PREFIX_PATH=${ROCM_PATH} -DCMAKE_HIP_COMPILER=${ROCM_PATH}/lib/llvm/bin/clang++"
 fi
 
 if [ ! -z ${GG_BUILD_SYCL} ]; then
@@ -223,7 +230,7 @@ function gg_run_ctest {
     gg_check_build_requirements
 
     (time cmake -DCMAKE_BUILD_TYPE=${mode} ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
-    (time make -j$(nproc)                                    ) 2>&1 | tee -a $OUT/${ci}-make.log
+    (time make -j $(nproc)                                   ) 2>&1 | tee -a $OUT/${ci}-make.log
 
     (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
 
diff --git a/scripts/local-build.ps1 b/scripts/local-build.ps1
new file mode 100644
index 00000000000..db90dd5a747
--- /dev/null
+++ b/scripts/local-build.ps1
@@ -0,0 +1,395 @@
+<#
+.SYNOPSIS
+    Local build script for whisper-cpp-amd. Mirrors the GitHub Actions build.yml jobs for Windows.
+
+.DESCRIPTION
+    Builds one or more AMD backends locally, producing the same zip artifacts that CI publishes.
+
+.PARAMETER Backend
+    Which backend to build: cpu, vulkan, rocm, npu, all. Default: cpu
+
+.PARAMETER GfxTarget
+    ROCm GPU target. Default: gfx1151
+    Common: gfx1151, gfx1150, gfx1100, gfx1200
+
+.PARAMETER RocmVersion
+    ROCm version to download. Default: 7.12.0
+
+.PARAMETER OutputDir
+    Directory for final zip artifacts. Default: .\dist
+
+.PARAMETER BuildDir
+    CMake build directory prefix. Default: .\build-local
+
+.PARAMETER Version
+    Version string used in artifact filenames. Default: local
+
+.EXAMPLE
+    .\scripts\local-build.ps1 -Backend cpu
+    .\scripts\local-build.ps1 -Backend vulkan
+    .\scripts\local-build.ps1 -Backend rocm -GfxTarget gfx1151
+    .\scripts\local-build.ps1 -Backend npu
+    .\scripts\local-build.ps1 -Backend all -Version 1.8.4
+#>
+
+param(
+    [ValidateSet("cpu","vulkan","rocm","npu","all")]
+    [string]$Backend      = "cpu",
+    [string]$GfxTarget    = "gfx1151",
+    [string]$RocmVersion  = "7.12.0",
+    [string]$OutputDir    = ".\dist",
+    [string]$BuildDir     = ".\build-local",
+    [string]$Version      = "local"
+)
+
+Set-StrictMode -Version Latest
+$ErrorActionPreference = "Stop"
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+
+function Write-Step([string]$msg) {
+    Write-Host ""
+    Write-Host "================================================" -ForegroundColor Cyan
+    Write-Host "  $msg" -ForegroundColor Cyan
+    Write-Host "================================================" -ForegroundColor Cyan
+}
+
+function Write-Ok([string]$msg)   { Write-Host "  [OK] $msg" -ForegroundColor Green  }
+function Write-Info([string]$msg) { Write-Host "  -->  $msg" -ForegroundColor Yellow }
+function Write-Fail([string]$msg) { Write-Host "  [X]  $msg" -ForegroundColor Red    }
+
+function Require-Command([string]$cmd) {
+    if (-not (Get-Command $cmd -ErrorAction SilentlyContinue)) {
+        Write-Fail "$cmd not found in PATH"
+        throw "Missing requirement: $cmd"
+    }
+    Write-Ok "$cmd found"
+}
+
+function Download-SDL2 {
+    param([string]$Ver = "2.28.5")
+    $sdlDir = "SDL2-$Ver"
+    if (Test-Path $sdlDir) {
+        Write-Info "SDL2 already extracted at $sdlDir"
+    } else {
+        Write-Info "Downloading SDL2 $Ver ..."
+        $url = "https://github.com/libsdl-org/SDL/releases/download/release-$Ver/SDL2-devel-$Ver-VC.zip"
+        Invoke-WebRequest -Uri $url -OutFile "sdl2.zip"
+        7z x sdl2.zip -y | Out-Null
+        Remove-Item sdl2.zip
+
+        # Patch SDL_endian.h (needed for AMD clang compatibility)
+        $hdr = Get-ChildItem -Recurse -Filter "SDL_endian.h" | Select-Object -First 1
+        if ($hdr) {
+            $content = Get-Content $hdr.FullName -Raw
+            if ($content -match 'extern void _m_prefetch') {
+                $patched = $content -replace 'extern void _m_prefetch\(void \*__P\);', '// extern void _m_prefetch(void *__P);'
+                Set-Content -Path $hdr.FullName -Value $patched -NoNewline
+                Write-Ok "Patched SDL_endian.h"
+            }
+        }
+    }
+    $cmake = Get-ChildItem -Recurse -Filter "sdl2-config.cmake" | Select-Object -First 1
+    if (-not $cmake) { throw "sdl2-config.cmake not found after SDL2 extraction" }
+    return $cmake.DirectoryName
+}
+
+function Package-Build {
+    param([string]$Name, [string]$BinPath)
+    New-Item -ItemType Directory -Force -Path $OutputDir | Out-Null
+    $zip = Join-Path $OutputDir "$Name.zip"
+    Write-Info "Creating $zip ..."
+    Compress-Archive -Path "$BinPath\*" -DestinationPath $zip -Force
+    $mb = [math]::Round((Get-Item $zip).Length / 1MB, 2)
+    Write-Ok "Created $zip ($mb MB)"
+    return $zip
+}
+
+function Run-MSBuild {
+    param([string]$Dir, [string[]]$ConfigArgs, [string]$Config = "Release", [string]$Arch = "x64")
+    Write-Info "CMake configure ..."
+    & cmake -S . -B $Dir @ConfigArgs
+    if ($LASTEXITCODE -ne 0) { throw "CMake configure failed (exit $LASTEXITCODE)" }
+    Write-Info "MSBuild $Config ..."
+    & cmake --build $Dir --config $Config -j $env:NUMBER_OF_PROCESSORS
+    if ($LASTEXITCODE -ne 0) { throw "Build failed (exit $LASTEXITCODE)" }
+}
+
+# ── Preflight ─────────────────────────────────────────────────────────────────
+
+if (-not (Test-Path "CMakeLists.txt") -or -not (Test-Path "src\whisper.cpp")) {
+    Write-Fail "Run this script from the whisper-cpp-amd repo root."
+    exit 1
+}
+
+Require-Command cmake
+New-Item -ItemType Directory -Force -Path $OutputDir | Out-Null
+
+# ── Build functions ───────────────────────────────────────────────────────────
+
+function Build-CPU {
+    Write-Step "CPU - Windows x64"
+    Require-Command msbuild
+
+    $SDL2_DIR = Download-SDL2
+    $dir = "$BuildDir-cpu"
+
+    Run-MSBuild $dir @(
+        "-A", "x64",
+        "-DCMAKE_BUILD_TYPE=Release",
+        "-DBUILD_SHARED_LIBS=ON",
+        "-DWHISPER_SDL2=ON",
+        "-DSDL2_DIR=$SDL2_DIR"
+    )
+
+    $sdl2dll = Get-ChildItem -Path "SDL2-*\lib\x64\SDL2.dll" -ErrorAction SilentlyContinue | Select-Object -First 1
+    if ($sdl2dll) { Copy-Item $sdl2dll.FullName "$dir\bin\Release\" -Force }
+
+    $zip = Package-Build "whisper-$Version-windows-cpu-x64" "$dir\bin\Release"
+    Write-Ok "CPU build done. Artifact: $zip"
+}
+
+function Build-Vulkan {
+    Write-Step "Vulkan - Windows x64"
+    Require-Command msbuild
+
+    # Locate Vulkan SDK
+    $VULKAN_SDK = $env:VULKAN_SDK
+    if (-not $VULKAN_SDK) {
+        $sdkDir = Get-ChildItem "C:\VulkanSDK" -ErrorAction SilentlyContinue |
+                  Sort-Object Name -Descending | Select-Object -First 1
+        if (-not $sdkDir) {
+            Write-Fail "Vulkan SDK not found. Install from https://vulkan.lunarg.com/sdk/home"
+            throw "Missing Vulkan SDK"
+        }
+        $VULKAN_SDK = $sdkDir.FullName
+    }
+    Write-Ok "Vulkan SDK: $VULKAN_SDK"
+
+    $SDL2_DIR = Download-SDL2
+    $dir = "$BuildDir-vulkan"
+
+    Run-MSBuild $dir @(
+        "-A", "x64",
+        "-DCMAKE_BUILD_TYPE=Release",
+        "-DBUILD_SHARED_LIBS=ON",
+        "-DGGML_VULKAN=ON",
+        "-DWHISPER_SDL2=ON",
+        "-DSDL2_DIR=$SDL2_DIR",
+        "-DVULKAN_SDK=$VULKAN_SDK"
+    )
+
+    $sdl2dll = Get-ChildItem -Path "SDL2-*\lib\x64\SDL2.dll" -ErrorAction SilentlyContinue | Select-Object -First 1
+    if ($sdl2dll) { Copy-Item $sdl2dll.FullName "$dir\bin\Release\" -Force }
+
+    $zip = Package-Build "whisper-$Version-windows-vulkan-x64" "$dir\bin\Release"
+    Write-Ok "Vulkan build done. Artifact: $zip"
+}
+
+function Build-ROCm {
+    Write-Step "ROCm - Windows x64 (target: $GfxTarget)"
+    Require-Command ninja
+
+    # ── Download ROCm tarball ──────────────────────────────────────────────
+    $rocmRoot = "C:\opt\rocm"
+    if (-not (Test-Path "$rocmRoot\bin\amdclang.exe")) {
+        Write-Info "Downloading ROCm $RocmVersion for $GfxTarget (2-4 GB, takes a few minutes) ..."
+
+        # Replicate resolve-rocm-version.sh: group targets use gfx1151 as the base tarball
+        $baseTarget = $GfxTarget
+        if ($GfxTarget -in @("gfx110X","gfx120X","gfx1150","gfx1100")) {
+            $baseTarget = "gfx1151"
+        }
+        $tarballUrl = "https://repo.amd.com/rocm/tarball/therock-dist-windows-${baseTarget}-${RocmVersion}.tar.gz"
+        Write-Info "URL: $tarballUrl"
+
+        Invoke-WebRequest -Uri $tarballUrl -OutFile rocm.tar.gz
+        New-Item -ItemType Directory -Force -Path $rocmRoot | Out-Null
+        & tar -xzf rocm.tar.gz -C $rocmRoot --strip-components=1
+        if ($LASTEXITCODE -ne 0) { throw "ROCm extraction failed" }
+        Remove-Item rocm.tar.gz
+        Write-Ok "ROCm extracted to $rocmRoot"
+    } else {
+        Write-Ok "ROCm already present at $rocmRoot"
+    }
+
+    # ── Map GFX target (mirrors map-gpu-target.sh) ─────────────────────────
+    $mappedTarget = switch ($GfxTarget) {
+        "gfx110X" { "gfx1100;gfx1101;gfx1102" }
+        "gfx120X" { "gfx1200;gfx1201" }
+        default   { $GfxTarget }
+    }
+    Write-Info "GPU target: $GfxTarget -> $mappedTarget"
+
+    $SDL2_DIR = Download-SDL2
+
+    # ── Set ROCm env ──────────────────────────────────────────────────────
+    $env:HIP_PATH     = $rocmRoot
+    $env:HIP_PLATFORM = "amd"
+    $env:PATH         = "$rocmRoot\bin;$rocmRoot\lib\llvm\bin;$env:PATH"
+
+    # ── Configure ─────────────────────────────────────────────────────────
+    $dir = "$BuildDir-rocm-$GfxTarget"
+    Write-Info "CMake configure (Ninja Multi-Config) ..."
+    & cmake -S . -B $dir `
+        -G "Ninja Multi-Config" `
+        "-DGPU_TARGETS=$mappedTarget" `
+        -DGGML_HIP=ON `
+        "-DCMAKE_C_COMPILER=$rocmRoot/lib/llvm/bin/amdclang.exe" `
+        "-DCMAKE_CXX_COMPILER=$rocmRoot/lib/llvm/bin/amdclang++.exe" `
+        "-DCMAKE_HIP_COMPILER=$rocmRoot/lib/llvm/bin/amdclang++.exe" `
+        "-DCMAKE_C_FLAGS=-D__PRFCHWINTRIN_H" `
+        "-DCMAKE_CXX_FLAGS=-D__PRFCHWINTRIN_H" `
+        "-DCMAKE_HIP_FLAGS=--rocm-path=$rocmRoot" `
+        "-DCMAKE_PREFIX_PATH=$rocmRoot" `
+        -DCMAKE_BUILD_TYPE=Release `
+        -DBUILD_SHARED_LIBS=ON `
+        -DWHISPER_SDL2=ON `
+        "-DSDL2_DIR=$SDL2_DIR"
+    if ($LASTEXITCODE -ne 0) { throw "CMake configure failed" }
+
+    Write-Info "Building ..."
+    & cmake --build $dir --config Release -j $env:NUMBER_OF_PROCESSORS
+    if ($LASTEXITCODE -ne 0) { throw "Build failed" }
+
+    # ── Copy ROCm DLLs ────────────────────────────────────────────────────
+    $binOut = "$dir\bin\Release"
+    $rocBin = "$rocmRoot\bin"
+    Write-Info "Copying ROCm DLLs ..."
+    @("amdhip64_*.dll","amd_comgr*.dll","libhipblas.dll","rocblas.dll",
+      "rocsolver.dll","hipblaslt.dll","libhipblaslt.dll","hipblas.dll") | ForEach-Object {
+        Get-ChildItem $rocBin -Name $_ -ErrorAction SilentlyContinue |
+            ForEach-Object { Copy-Item (Join-Path $rocBin $_) (Join-Path $binOut $_) -Force }
+    }
+    $rocblasLib = Join-Path $rocBin "rocblas\library"
+    if (Test-Path $rocblasLib) {
+        Copy-Item $rocblasLib -Destination (Join-Path $binOut "rocblas\library") -Recurse -Force
+    }
+    $hipblasltLib = Join-Path $rocBin "hipblaslt\library"
+    if (Test-Path $hipblasltLib) {
+        Copy-Item $hipblasltLib -Destination (Join-Path $binOut "hipblaslt\library") -Recurse -Force
+    }
+
+    $sdl2dll = Get-ChildItem -Path "SDL2-*\lib\x64\SDL2.dll" -ErrorAction SilentlyContinue | Select-Object -First 1
+    if ($sdl2dll) { Copy-Item $sdl2dll.FullName $binOut -Force }
+
+    $zip = Package-Build "whisper-$Version-windows-rocm-$GfxTarget" $binOut
+    Write-Ok "ROCm build done. Artifact: $zip"
+}
+
+function Build-NPU {
+    Write-Step "NPU (VitisAI / RyzenAI) - Windows x64"
+    Require-Command msbuild
+
+    # ── FlexML Runtime ────────────────────────────────────────────────────
+    $flexmlDir = Get-ChildItem -Directory | Where-Object { $_.Name -like "flexmlrt*" } | Select-Object -First 1
+    if (-not $flexmlDir) {
+        Write-Info "Downloading FlexML Runtime ..."
+        $url = "https://github.com/lemonade-sdk/whisper.cpp/releases/download/deps/flexmlrt1.7.0-win.zip"
+        Invoke-WebRequest -Uri $url -OutFile flexmlrt.zip
+        if (-not (Test-Path "flexmlrt.zip") -or (Get-Item "flexmlrt.zip").Length -eq 0) {
+            throw "flexmlrt.zip download failed or is empty"
+        }
+        $mb = [math]::Round((Get-Item "flexmlrt.zip").Length / 1MB, 2)
+        Write-Ok "Downloaded FlexML: $mb MB"
+
+        & tar xvf flexmlrt.zip
+        if ($LASTEXITCODE -ne 0) { throw "FlexML extraction failed" }
+        Remove-Item flexmlrt.zip
+
+        $flexmlDir = Get-ChildItem -Directory | Where-Object { $_.Name -like "flexmlrt*" } | Select-Object -First 1
+        if (-not $flexmlDir) { throw "No flexmlrt directory found after extraction" }
+    }
+    Write-Ok "FlexML Runtime: $($flexmlDir.FullName)"
+
+    # ── Run setup.bat via a temporary cmd script ───────────────────────────
+    # cmd /c with && is not reliable from PowerShell; use a temp .bat file instead
+    $tempBat = [System.IO.Path]::GetTempFileName() + ".bat"
+    $setupPath = Join-Path $flexmlDir.FullName "setup.bat"
+    Set-Content -Path $tempBat -Value "@echo off`r`ncall `"$setupPath`"`r`nif errorlevel 1 exit /b 1`r`necho FLEXML_OK"
+    Write-Info "Running FlexML setup.bat ..."
+    $setupOut = & cmd /c $tempBat 2>&1
+    Remove-Item $tempBat -ErrorAction SilentlyContinue
+
+    if ($LASTEXITCODE -ne 0 -or ($setupOut -notmatch "FLEXML_OK")) {
+        Write-Fail "FlexML setup.bat failed. Output:"
+        $setupOut | ForEach-Object { Write-Host "    $_" }
+        throw "FlexML setup failed. Ensure NPU drivers (>= .280) are installed."
+    }
+    Write-Ok "FlexML environment configured"
+
+    # ── CMake configure + build ───────────────────────────────────────────
+    $dir = "$BuildDir-npu"
+    Write-Info "CMake configure with -DWHISPER_VITISAI=ON ..."
+    & cmake -B $dir -A x64 -DCMAKE_BUILD_TYPE=Release -DWHISPER_VITISAI=ON
+    if ($LASTEXITCODE -ne 0) { throw "CMake configure failed" }
+
+    Write-Info "Building ..."
+    & cmake --build $dir --config Release -j $env:NUMBER_OF_PROCESSORS
+    if ($LASTEXITCODE -ne 0) { throw "Build failed" }
+
+    # ── List output ───────────────────────────────────────────────────────
+    $binOut = "$dir\bin\Release"
+    if (Test-Path $binOut) {
+        Write-Info "Build output:"
+        Get-ChildItem $binOut | Format-Table Name, Length -AutoSize
+    } else {
+        throw "Expected output directory $binOut not found"
+    }
+
+    # ── Copy FlexML DLLs ─────────────────────────────────────────────────
+    Write-Info "Copying FlexML DLLs ..."
+    $copied = 0
+    foreach ($sub in @("bin", "lib")) {
+        $subPath = Join-Path $flexmlDir.FullName $sub
+        if (Test-Path $subPath) {
+            $dlls = Get-ChildItem "$subPath\*.dll" -ErrorAction SilentlyContinue
+            if ($dlls) {
+                Copy-Item $dlls.FullName $binOut -Force
+                $copied += $dlls.Count
+            }
+        }
+    }
+    Write-Ok "Copied $copied FlexML DLLs"
+
+    $zip = Package-Build "whisper-$Version-windows-npu-x64" $binOut
+    Write-Ok "NPU build done. Artifact: $zip"
+    Write-Info "To run: place the .rai encoder model next to your ggml-*.bin and run whisper-cli.exe normally."
+}
+
+# ── Main dispatch ─────────────────────────────────────────────────────────────
+
+$targets = if ($Backend -eq "all") { @("cpu","vulkan","rocm","npu") } else { @($Backend) }
+$results = [ordered]@{}
+
+foreach ($t in $targets) {
+    try {
+        switch ($t) {
+            "cpu"    { Build-CPU    }
+            "vulkan" { Build-Vulkan }
+            "rocm"   { Build-ROCm   }
+            "npu"    { Build-NPU    }
+        }
+        $results[$t] = "[OK]    PASSED"
+    } catch {
+        Write-Fail "[$t] failed: $_"
+        $results[$t] = "[FAIL]  $_"
+    }
+}
+
+# ── Summary ───────────────────────────────────────────────────────────────────
+
+Write-Step "Build Summary"
+foreach ($t in $targets) {
+    $color = if ($results[$t].StartsWith("[OK]")) { "Green" } else { "Red" }
+    Write-Host "  $t : $($results[$t])" -ForegroundColor $color
+}
+
+Write-Host ""
+Write-Host "Artifacts in: $(Resolve-Path $OutputDir)" -ForegroundColor Cyan
+if (Test-Path $OutputDir) {
+    Get-ChildItem $OutputDir -Filter "*.zip" | ForEach-Object {
+        $mb = [math]::Round($_.Length / 1MB, 2)
+        Write-Host "  $($_.Name) ($mb MB)"
+    }
+}
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 095a2791de5..fe10876eaf7 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -48,6 +48,10 @@ if (WHISPER_OPENVINO)
     find_package(OpenVINO REQUIRED COMPONENTS Runtime)
 endif()
 
+if (WHISPER_VITISAI)
+    find_package(FlexmlRT REQUIRED)
+endif()
+
 #
 # libraries
 #
@@ -101,6 +105,30 @@ if (WHISPER_OPENVINO)
     set_target_properties(${TARGET} PROPERTIES FOLDER "libs")
 endif()
 
+if (WHISPER_VITISAI)
+    set(TARGET whisper.vitisai)
+
+    add_library(${TARGET} OBJECT
+        vitisai/whisper-vitisai-encoder.h
+        vitisai/whisper-vitisai-encoder.cpp
+        )
+
+    target_include_directories(${TARGET} PUBLIC
+        .
+        )
+
+    set_property(TARGET ${TARGET} PROPERTY POSITION_INDEPENDENT_CODE ON)
+    set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_VITISAI)
+
+    # C++17 required for MSVC (FlexML headers use structured bindings etc.)
+    if (MSVC)
+        target_compile_options(${TARGET} PRIVATE /std:c++17)
+    endif()
+
+    target_link_libraries(${TARGET} PRIVATE ggml flexmlrt::flexmlrt)
+    set_target_properties(${TARGET} PROPERTIES FOLDER "libs")
+endif()
+
 # whisper
 
 add_library(whisper
@@ -137,6 +165,10 @@ if (WHISPER_OPENVINO)
     target_link_libraries(whisper PRIVATE whisper.openvino)
 endif()
 
+if (WHISPER_VITISAI)
+    target_link_libraries(whisper PRIVATE whisper.vitisai)
+endif()
+
 if (WHISPER_MKL)
     target_link_libraries(whisper PRIVATE MKL::MKL)
 endif()
diff --git a/src/vitisai/whisper-vitisai-encoder.cpp b/src/vitisai/whisper-vitisai-encoder.cpp
new file mode 100644
index 00000000000..a6d20a88c9a
--- /dev/null
+++ b/src/vitisai/whisper-vitisai-encoder.cpp
@@ -0,0 +1,204 @@
+// Copyright(C) 2025 Advanced Micro Devices, Inc. All rights reserved.
+#include "vitisai/whisper-vitisai-encoder.h"
+#include "FlexMLClient.h"
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#include <cstdio>
+#include <cstdlib>
+#ifdef _WIN32
+    #include <windows.h>
+#else
+    #include <sys/mman.h>
+    #include <sys/stat.h>
+    #include <fcntl.h>
+#endif
+#include <cstring>
+#include <string>
+
+struct whisper_vitisai_context {
+    std::string model_path;
+    std::shared_ptr<flexmlrt::client::Model> runner;
+    uint8_t * fbs_buffer;
+    size_t fbs_buffer_size;
+};
+
+// Function to mmap rai file for Linux and MapViewOfFile for Windows
+bool map_rai_file(const char * path, uint8_t ** buffer, size_t * size) {
+#ifdef _WIN32
+    // Open the file
+    HANDLE hFile = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+    if (hFile == INVALID_HANDLE_VALUE) {
+        std::fprintf(stderr, "%s: %d: Failed to open rai file '%s'\n", __func__, __LINE__, path);
+        return false;
+    }
+
+    // Get the file size
+    LARGE_INTEGER fileSize;
+    if (!GetFileSizeEx(hFile, &fileSize)) {
+        CloseHandle(hFile);
+        std::fprintf(stderr, "%s: %d: Failed to get file size for rai file '%s'\n", __func__, __LINE__, path);
+        return false;
+    }
+
+    // Create a file mapping object
+    HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, fileSize.QuadPart, NULL);
+    if (hMapping == NULL) {
+        CloseHandle(hFile);
+        std::fprintf(stderr, "%s: %d: Failed to create file mapping for rai file '%s'\n", __func__, __LINE__, path);
+        return false;
+    }
+
+    // Map the file
+    *buffer = (uint8_t *)MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, fileSize.QuadPart);
+    if (*buffer == NULL) {
+        CloseHandle(hMapping);
+        CloseHandle(hFile);
+        std::fprintf(stderr, "%s: %d: Failed to map rai file '%s'\n", __func__, __LINE__, path);
+        return false;
+    }
+    *size = fileSize.QuadPart;
+    return true;
+#else
+    // Open the file
+    FILE * fd = fopen(path, "rb");
+    if (!fd) {
+        std::fprintf(stderr, "%s: %d: Failed to open rai file '%s'\n", __func__, __LINE__, path);
+        return false;
+    }
+
+    // Get the file size
+    struct stat st;
+    if (fstat(fileno(fd), &st) == -1) {
+        fclose(fd);
+        std::fprintf(stderr, "%s: %d: Failed to get file size for rai file '%s'\n", __func__, __LINE__, path);
+        return false;
+    }
+
+    // Mmap the file
+    *buffer = (uint8_t *)mmap(nullptr, st.st_size, PROT_READ, MAP_SHARED, fileno(fd), 0);
+    if (*buffer == MAP_FAILED) {
+        fclose(fd);
+        std::fprintf(stderr, "%s: %d: Failed to mmap rai file '%s'\n", __func__, __LINE__, path);
+        return false;
+    }
+    *size = st.st_size;
+    return true;
+#endif // _WIN32
+}
+
+void unmap_rai_file(uint8_t * buffer, size_t size) {
+#ifdef _WIN32
+    UnmapViewOfFile(buffer);
+#else
+    munmap(buffer, size);
+#endif // _WIN32
+}
+
+struct whisper_vitisai_context * whisper_vitisai_init(const char * path_model) {
+    if (!path_model) {
+        std::fprintf(stderr, "%s: path_model is null\n", __func__);
+        return nullptr;
+    }
+
+    auto * ctx = new whisper_vitisai_context;
+    ctx->model_path = path_model;
+
+    // Override the model path with the environment variable if it is set
+    if (const char * env_model_path = std::getenv("OVERRIDE_VITISAI_MODEL_PATH")) {
+        if (env_model_path[0] != '\0') {
+            ctx->model_path = env_model_path;
+        }
+    }
+
+    // Step 1: Set up the model
+    flexmlrt::client::Options options;
+    options.modelPath = ctx->model_path;
+    options.deviceName = "stx";
+    options.debug = false;
+    options.executeMode = 2;
+    options.extOptions["ai_analyzer_profiling"] = true; // Enable AIA profiling
+    options.extOptions["enable_preemption"] = true;
+
+    // Check if model_path is rai file and if so, add fbs_buffer and fbs_buffer_size to the options
+    if (ctx->model_path.find(".rai") != std::string::npos) {
+        // mmap rai file for both Linux and Windows and pass the buffer to the options
+        ctx->fbs_buffer = nullptr;
+        ctx->fbs_buffer_size = 0;
+        if (map_rai_file(ctx->model_path.c_str(), &ctx->fbs_buffer, &ctx->fbs_buffer_size)) {
+            options.extOptions["fbs_buffer"] = ctx->fbs_buffer;
+            options.extOptions["fbs_buffer_size"] = ctx->fbs_buffer_size;
+            options.subgraphName = "vaiml_par_0";
+            options.extOptions["cache_dir"] = std::string(".");
+        } else {
+            std::fprintf(stderr, "%s: Failed to mmap rai file '%s'\n", __func__, ctx->model_path.c_str());
+            delete ctx;
+            return nullptr;
+        }
+    }
+
+    try {
+        ctx->runner = std::make_shared<flexmlrt::client::Model>(options);
+
+        if (!ctx->runner->good()) {
+            throw std::runtime_error("Runner creation ran into an error");
+        }
+    } catch (const std::exception & e) {
+        std::fprintf(stderr, "%s: Exception during Vitis AI runner creation: %s\n", __func__, e.what());
+        delete ctx;
+        return nullptr;
+    }
+    return ctx;
+}
+
+void whisper_vitisai_free(struct whisper_vitisai_context * ctx) {
+    if (!ctx) {
+        return;
+    }
+
+    std::fprintf(stderr, "%s: releasing Vitis AI encoder context for model '%s'\n", __func__, ctx->model_path.c_str());
+    if (ctx->fbs_buffer) {
+        unmap_rai_file(ctx->fbs_buffer, ctx->fbs_buffer_size);
+    }
+    delete ctx;
+}
+
+int whisper_vitisai_encode(struct whisper_vitisai_context * ctx, struct ggml_tensor * mel, struct ggml_tensor * out) {
+    if (!ctx || !mel || !out) {
+        std::fprintf(stderr, "%s: ctx/mel/out must not be null\n", __func__);
+        return 0;
+    }
+
+    if (ggml_n_dims(mel) != 2) {
+        std::fprintf(stderr, "%s: mel tensor expected to have 2 dims, got %d\n", __func__, ggml_n_dims(mel));
+        return 0;
+    }
+
+    if (ggml_n_dims(out) != 2) {
+        std::fprintf(stderr, "%s: out tensor expected to have 2 dims, got %d\n", __func__, ggml_n_dims(out));
+        return 0;
+    }
+
+    // setup input and output tensors for Vitis AI model
+    std::vector<flexmlrt::client::ErtTensorType> input_tensors, output_tensors;
+    auto model = ctx->runner;
+
+    // Get tensors as CPU tensors (hwTensor = false)
+    input_tensors = model->getIOTensors("input", false);
+    output_tensors = model->getIOTensors("output", false);
+
+    // TODO: add assert checks for tensor numbers and shapes
+
+    input_tensors[0].data = mel->data;
+    output_tensors[0].data = out->data;
+
+    try {
+        model->forward(input_tensors, output_tensors);
+        std::fprintf(stdout, "%s: Vitis AI model inference completed.\n", __func__);
+    } catch (const std::exception & e) {
+        std::fprintf(stderr, "%s: Exception during model inference: %s\n", __func__, e.what());
+        return 0;
+    }
+
+    return 1;
+}
diff --git a/src/vitisai/whisper-vitisai-encoder.h b/src/vitisai/whisper-vitisai-encoder.h
new file mode 100644
index 00000000000..05dc812be88
--- /dev/null
+++ b/src/vitisai/whisper-vitisai-encoder.h
@@ -0,0 +1,32 @@
+// Copyright(C) 2025 Advanced Micro Devices, Inc. All rights reserved.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdbool>
+#include <cstdint>
+
+#if __cplusplus
+extern "C" {
+#endif
+
+struct whisper_vitisai_context;
+
+struct whisper_vitisai_context * whisper_vitisai_init(const char * path_model);
+void whisper_vitisai_free(struct whisper_vitisai_context * ctx);
+
+// Function to mmap rai file for Linux and MapViewOfFile for Windows
+bool map_rai_file(const char * path, uint8_t ** buffer, size_t * size);
+// Function to unmap rai file for Linux and UnmapViewOfFile for Windows
+void unmap_rai_file(uint8_t * buffer, size_t size);
+
+struct ggml_tensor;
+
+int whisper_vitisai_encode(
+    struct whisper_vitisai_context * ctx,
+    struct ggml_tensor * mel,
+    struct ggml_tensor * out);
+
+#if __cplusplus
+}
+#endif
diff --git a/src/whisper.cpp b/src/whisper.cpp
index 2f356da0f06..a038a5959ea 100644
--- a/src/whisper.cpp
+++ b/src/whisper.cpp
@@ -14,6 +14,10 @@
 #include "openvino/whisper-openvino-encoder.h"
 #endif
 
+#ifdef WHISPER_USE_VITISAI
+#include "vitisai/whisper-vitisai-encoder.h"
+#endif
+
 #include <atomic>
 #include <algorithm>
 #include <cassert>
@@ -903,6 +907,10 @@ struct whisper_state {
     whisper_openvino_context * ctx_openvino = nullptr;
 #endif
 
+#ifdef WHISPER_USE_VITISAI
+    whisper_vitisai_context * ctx_vitisai = nullptr;
+#endif
+
     // [EXPERIMENTAL] token-level timestamps data
     int64_t t_beg  = 0;
     int64_t t_last = 0;
@@ -1970,7 +1978,13 @@ static bool whisper_encode_external(const whisper_state & wstate) {
     const bool use_openvino = wstate.ctx_openvino != nullptr;
 #endif
 
-    return use_coreml || use_openvino;
+#ifndef WHISPER_USE_VITISAI
+    const bool use_vitisai = false;
+#else
+    const bool use_vitisai = wstate.ctx_vitisai != nullptr;
+#endif
+
+    return use_coreml || use_openvino || use_vitisai;
 }
 
 static struct ggml_cgraph * whisper_build_graph_conv(
@@ -2411,6 +2425,8 @@ static bool whisper_encode_internal(
 
 #if defined(WHISPER_USE_COREML)
             whisper_coreml_encode(wstate.ctx_coreml, mel->ne[0], mel->ne[1], (float *) mel->data, (float *) wstate.embd_enc->data);
+#elif defined(WHISPER_USE_VITISAI)
+            whisper_vitisai_encode(wstate.ctx_vitisai, mel, wstate.embd_enc);
 #elif defined(WHISPER_USE_OPENVINO)
             whisper_openvino_encode(wstate.ctx_openvino, mel, wstate.embd_enc);
 #endif
@@ -3346,6 +3362,20 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) {
 }
 #endif
 
+#ifdef WHISPER_USE_VITISAI
+// replace extension with Vitis AI encoder artifact (.rai)
+static std::string whisper_get_vitisai_path_encoder_cache(std::string path_bin) {
+    auto pos = path_bin.rfind('.');
+    if (pos != std::string::npos) {
+        path_bin = path_bin.substr(0, pos);
+    }
+
+    path_bin += "-encoder-vitisai.rai";
+
+    return path_bin;
+}
+#endif
+
 #ifdef WHISPER_USE_OPENVINO
 // replace .bin with-encoder-openvino.xml
 static std::string whisper_openvino_get_path_encoder(std::string path_bin) {
@@ -3455,6 +3485,19 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
     }
 #endif
 
+#ifdef WHISPER_USE_VITISAI
+    const auto path_vitisai = whisper_get_vitisai_path_encoder_cache(ctx->path_model);
+
+    state->ctx_vitisai = whisper_vitisai_init(path_vitisai.c_str());
+    if (!state->ctx_vitisai) {
+        WHISPER_LOG_ERROR("%s: failed to load Vitis AI model from '%s'\n", __func__, path_vitisai.c_str());
+        whisper_free_state(state);
+        return nullptr;
+    } else {
+        WHISPER_LOG_INFO("%s: Vitis AI model loaded\n", __func__);
+    }
+#endif
+
     state->logits.reserve(ctx->vocab.n_vocab * ctx->model.hparams.n_text_ctx);
 
     state->batch = whisper_batch_init(ctx->model.hparams.n_text_ctx, WHISPER_MAX_DECODERS);
@@ -3821,6 +3864,13 @@ void whisper_free_state(struct whisper_state * state) {
         }
 #endif
 
+#ifdef WHISPER_USE_VITISAI
+        if (state->ctx_vitisai != nullptr) {
+            whisper_vitisai_free(state->ctx_vitisai);
+            state->ctx_vitisai = nullptr;
+        }
+#endif
+
         whisper_batch_free(state->batch);
 
         ggml_backend_sched_free(state->sched_conv.sched);
@@ -4312,11 +4362,20 @@ static int whisper_has_openvino(void) {
 #endif
 }
 
+static int whisper_has_vitisai(void) {
+#ifdef WHISPER_USE_VITISAI
+    return 1;
+#else
+    return 0;
+#endif
+}
+
 const char * whisper_print_system_info(void) {
     static std::string s;
 
     s  = "";
     s += "WHISPER : ";
+    s += "VITISAI = "   + std::to_string(whisper_has_vitisai())    + " | ";
     s += "COREML = "    + std::to_string(whisper_has_coreml())     + " | ";
     s += "OPENVINO = "  + std::to_string(whisper_has_openvino())   + " | ";