From f4ed884107531f293fed8ba5c59a8eebca6a3a23 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Sun, 25 Jan 2026 13:32:34 -0800 Subject: [PATCH 01/55] Enable windows rocm build of whisper.cpp Add ROCm support and CI improvements ci: automate library bundling in ROCm build workflow Replace manual copying of ROCm libraries and shared objects with an automated CMake-based bundling step using GET_RUNTIME_DEPENDENCIES. This ensures all linked libraries (e.g., libamdhip64, librocm_sysdeps) are recursively detected and bundled into build/bin, filtering out system libs like libc.so, while improving portability and reducing maintenance for dependency management. build: enhance library bundling to handle symlinks for portable distribution - Updated CMake script to resolve and copy real targets of symlinks, then recreate symlinks in build dir - Modified chmod to only affect real .so* files, ignoring symlinks - Removed outdated comments and improved script clarity for better portability of whisper-cli binaries Replaced file(CREATE_LINK): We now use execute_process(COMMAND ln -sf ...) which is standard on Linux Changing Bundle Linked Libraries to use linux based ldd instead of cmake, I am trying to make the smallest changes to whisper.cpp as possible without modifying existing things like cmake files or adding more files. --- .github/workflows/build.yml | 575 +++++++++++++++++++++++++++++++++++- 1 file changed, 568 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5c1cf93ba2a..692e5e7697e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -45,6 +45,14 @@ on: options: - full-ci - release-only + gfx_target: + description: 'AMD GPU targets (comma-separated)' + required: false + default: 'gfx1151,gfx1150,gfx120X,gfx110X' + rocm_version: + description: 'ROCm version to use (e.g., 7.11.0a20251205) or "latest" to auto-detect' + required: false + default: 'latest' concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} @@ -57,6 +65,8 @@ env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} ubuntu_image: "ubuntu:22.04" VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite" + GFX_TARGETS: ${{ github.event.inputs.gfx_target || 'gfx1151,gfx1150,gfx120X,gfx110X' }} + ROCM_VERSION: ${{ github.event.inputs.rocm_version || 'latest' }} jobs: determine-tag: @@ -64,13 +74,11 @@ jobs: outputs: tag_name: ${{ steps.tag.outputs.name }} should_release: ${{ steps.tag.outputs.should_release }} - steps: - name: Checkout with full history uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Determine tag name id: tag shell: bash @@ -114,6 +122,58 @@ jobs: echo "name=$TAG_NAME" >> $GITHUB_OUTPUT echo "should_release=$SHOULD_RELEASE" >> $GITHUB_OUTPUT + prepare-matrix: + runs-on: ubuntu-latest + outputs: + windows_matrix: ${{ steps.set-matrix.outputs.windows_matrix }} + ubuntu_matrix: ${{ steps.set-matrix.outputs.ubuntu_matrix }} + should_build_windows: ${{ steps.set-matrix.outputs.should_build_windows }} + should_build_ubuntu: ${{ steps.set-matrix.outputs.should_build_ubuntu }} + steps: + - name: Set matrix + id: set-matrix + run: | + targets="${{ env.GFX_TARGETS }}" + operating_systems="windows,ubuntu" + + echo "Input targets: $targets" + echo "Input operating systems: $operating_systems" + + target_array=$(echo "$targets" \ + | tr ',' '\n' \ + | sed 's/^ *//;s/ *$//' \ + | sed 's/^"//;s/"$//' \ + | jq -R . \ + | jq -s .) + + windows_matrix=$(echo "$target_array" \ + | jq -c '{gfx_target: ., sdl2: ["ON"], build: ["Release"], arch: ["x64"], s2arc: ["x64"], s2ver: ["2.28.5"]}') + + # 3. Create Ubuntu Matrix + ubuntu_matrix=$(echo "$target_array" \ + | jq -c '{gfx_target: ., sdl2: ["ON"], build: ["Release"], arch: ["linux/amd64"]}') + + # Check which operating systems to build + should_build_windows="false" + should_build_ubuntu="false" + + if [[ "$operating_systems" == *"windows"* ]]; then + should_build_windows="true" + echo "windows_matrix=$windows_matrix" >> $GITHUB_OUTPUT + fi + + if [[ "$operating_systems" == *"ubuntu"* ]]; then + should_build_ubuntu="true" + echo "ubuntu_matrix=$ubuntu_matrix" >> $GITHUB_OUTPUT + fi + + echo "should_build_windows=$should_build_windows" >> $GITHUB_OUTPUT + echo "should_build_ubuntu=$should_build_ubuntu" >> $GITHUB_OUTPUT + + echo "Windows build: $should_build_windows" + echo "Ubuntu build: $should_build_ubuntu" + echo "Generated matrix: $matrix_targets" + #linux/amd64 ubuntu-22: if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || @@ -394,11 +454,7 @@ jobs: strategy: fail-fast: false matrix: - build: [Debug, Release] - #arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le] - # TODO: arm/v7 disabled due to clang bug - # https://github.com/ggerganov/whisper.cpp/actions/runs/9657764109/job/26637633042?pr=2256#step:4:1990 - arch: [linux/amd64, linux/arm64, linux/ppc64le] + ${{fromJson(needs.prepare-matrix.outputs.ubuntu_matrix)}} steps: - name: Clone @@ -426,6 +482,263 @@ jobs: cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang make ctest -L gh --output-on-failure' + + ubuntu-rocm: + if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || + github.event.inputs.run_type == 'full-ci' }} + runs-on: ubuntu-22.04 + needs: [determine-tag, prepare-matrix] + # Check if we should run (based on the prepare-matrix output) + strategy: + # Uses the matrix generated in prepare-matrix (gfx_target, arch, build, sdl2) + matrix: ${{fromJson(needs.prepare-matrix.outputs.ubuntu_matrix)}} + fail-fast: false + outputs: + rocm_version: ${{ steps.set-outputs.outputs.rocm_version }} + + steps: + - name: Free disk space + run: curl -fsSL https://raw.githubusercontent.com/kou/arrow/e49d8ae15583ceff03237571569099a6ad62be32/ci/scripts/util_free_space.sh | bash + + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install build dependencies + run: | + sudo apt update + sudo apt install -y cmake ninja-build unzip curl build-essential libsdl2-dev git patchelf + + - name: Download and extract ROCm directly to /opt/rocm + run: | + # Determine ROCm version to use + rocm_version="${{ env.ROCM_VERSION }}" + current_target="${{ matrix.gfx_target }}" + + # Add appropriate suffixes for different GPU targets + s3_target="$current_target" + if [ "$current_target" = "gfx110X" ]; then + s3_target="${current_target}-dgpu" + echo "Using S3 target with -dgpu suffix: $s3_target" + elif [ "$current_target" = "gfx120X" ]; then + s3_target="${current_target}-all" + echo "Using S3 target with -all suffix: $s3_target" + fi + + if [ "$rocm_version" = "latest" ]; then + echo "Auto-detecting latest ROCm version for target: $current_target" + s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=therock-dist-linux-${s3_target}-7") + + # Extract all files + files=$(echo "$s3_response" | grep -oP '(?<=)[^<]*' | grep "therock-dist-linux-${s3_target}-") + + # Extract versions and sort them properly + latest_file="" + latest_major=0 + latest_minor=0 + latest_patch=0 + latest_rc=0 + latest_is_alpha=false + + while IFS= read -r file; do + if [[ "$file" =~ therock-dist-linux-${s3_target}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then + version="${BASH_REMATCH[1]}" + major=$(echo "$version" | cut -d. -f1) + minor=$(echo "$version" | cut -d. -f2) + patch=$(echo "$version" | cut -d. -f3 | sed 's/\(a\|rc\).*//') + rc=$(echo "$version" | sed 's/.*\(a\|rc\)//') + is_alpha=false + if [[ "$version" =~ a ]]; then is_alpha=true; fi + + is_newer=false + if [ "$major" -gt "$latest_major" ]; then is_newer=true; + elif [ "$major" -eq "$latest_major" ] && [ "$minor" -gt "$latest_minor" ]; then is_newer=true; + elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -gt "$latest_patch" ]; then is_newer=true; + elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -eq "$latest_patch" ]; then + if [ "$is_alpha" = true ] && [ "$latest_is_alpha" = false ]; then is_newer=true; + elif [ "$is_alpha" = "$latest_is_alpha" ] && [ "$rc" -gt "$latest_rc" ]; then is_newer=true; + fi + fi + + if [ "$is_newer" = true ]; then + latest_file="$file" + latest_major="$major" + latest_minor="$minor" + latest_patch="$patch" + latest_rc="$rc" + latest_is_alpha="$is_alpha" + fi + fi + done <<< "$files" + + echo "Found latest file: $latest_file" + + if [[ "$latest_file" =~ therock-dist-linux-${s3_target}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then + rocm_version="${BASH_REMATCH[1]}" + echo "Detected latest ROCm version: $rocm_version" + else + echo "Failed to extract ROCm version from latest file: $latest_file" + exit 1 + fi + + rocm_url="https://therock-nightly-tarball.s3.amazonaws.com/$latest_file" + else + rocm_url="https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-linux-${s3_target}-${rocm_version}.tar.gz" + fi + + echo "DETECTED_ROCM_VERSION=$rocm_version" >> $GITHUB_ENV + + # Create directory and stream extraction + sudo mkdir -p /opt/rocm + curl -sL "$rocm_url" | sudo tar --use-compress-program=gzip -xf - -C /opt/rocm --strip-components=1 + + - name: Set ROCm environment variables + run: | + echo "HIP_PATH=/opt/rocm" >> $GITHUB_ENV + echo "ROCM_PATH=/opt/rocm" >> $GITHUB_ENV + echo "HIP_PLATFORM=amd" >> $GITHUB_ENV + echo "/opt/rocm/bin:/opt/rocm/llvm/bin:$PATH" >> $GITHUB_PATH + + - name: Find ROCm bitcode path + run: | + # Dynamically find the directory containing device libraries (amdgcn/bitcode) + BITCODE_PATH=$(find /opt/rocm -type d -name bitcode -print -quit) + + if [ -z "$BITCODE_PATH" ]; then + echo "::error::Could not find 'bitcode' directory in /opt/rocm" + find /opt/rocm -maxdepth 5 + exit 1 + fi + + echo "Found bitcode at: $BITCODE_PATH" + echo "ROCM_BITCODE_PATH=$BITCODE_PATH" >> $GITHUB_ENV + + - name: Configure CMake + run: | + + # Map GPU targets + current_target="${{ matrix.gfx_target }}" + echo "Input target: $current_target" + + if [ "$current_target" = "gfx110X" ]; then + mapped_target="gfx1100;gfx1101;gfx1102" + elif [ "$current_target" = "gfx1151" ]; then + mapped_target="gfx1151" + elif [ "$current_target" = "gfx1150" ]; then + mapped_target="gfx1150" + elif [ "$current_target" = "gfx120X" ]; then + mapped_target="gfx1200;gfx1201" + else + mapped_target="$current_target" + fi + echo "Mapped target: $mapped_target" + + cmake -S . -B build -G Ninja \ + -DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \ + -DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \ + -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm --rocm-device-lib-path=${{ env.ROCM_BITCODE_PATH }}" \ + -DCMAKE_PREFIX_PATH=/opt/rocm \ + -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ + -DGPU_TARGETS="$mapped_target" \ + -DBUILD_SHARED_LIBS=ON \ + -DGGML_HIP=ON \ + -DGGML_ROCM=1 \ + -DWHISPER_SDL2=${{ matrix.sdl2 }} + + - name: Build + run: cmake --build build --config ${{ matrix.build }} -j $(nproc) + + # Copy Backend ROCm Folders --- + - name: Copy ROCm core libs to build directory + run: | + build_bin_path="build/bin" + rocm_bin_path="/opt/rocm/bin" + rocm_lib_path="/opt/rocm/lib" + + # Copy rocblas/library folder + rocblas_lib_path="$rocm_lib_path/rocblas/library" + if [ -d "$rocblas_lib_path" ]; then + dest_rocblas_path="$build_bin_path/rocblas/library" + mkdir -p "$(dirname "$dest_rocblas_path")" + cp -r "$rocblas_lib_path" "$(dirname "$dest_rocblas_path")/" + echo "Copied: rocblas/library" + fi + + # Copy hipblaslt/library folder + hipblaslt_lib_path="$rocm_lib_path/hipblaslt/library" + if [ -d "$hipblaslt_lib_path" ]; then + dest_hipblaslt_path="$build_bin_path/hipblaslt/library" + mkdir -p "$(dirname "$dest_hipblaslt_path")" + cp -r "$hipblaslt_lib_path" "$(dirname "$dest_hipblaslt_path")/" + echo "Copied: hipblaslt/library" + fi + + # Copy required shared libraries + # We use generic wildcards to catch versioned .so files + echo "Copying shared libraries..." + cp -v $rocm_lib_path/libhipblas.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/librocblas.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libamdhip64.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/librocsolver.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libroctx64.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libhipblaslt.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libamd_comgr.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libhsa-runtime64.so* "$build_bin_path/" 2>/dev/null || true + + # Copy LLVM runtime libs often needed + cp -v $rocm_lib_path/llvm/lib/libLLVM.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/llvm/lib/libclang-cpp.so* "$build_bin_path/" 2>/dev/null || true + + - name: Bundle Linked Libraries + run: | + build_bin="build/bin" + echo "Scanning dependencies for whisper-cli..." + + ldd "$build_bin/whisper-cli" | grep "=> /" | while read -r line; do + + soname=$(echo "$line" | awk '{print $1}') + path=$(echo "$line" | awk '{print $3}') + + if [[ "$soname" =~ ^(libc\.so|libm\.so|libdl\.so|librt\.so|libpthread\.so|libstdc\+\+|libgcc_s|ld-linux) ]]; then + continue + fi + echo "Bundling: $soname" + echo " Source: $path" + cp -L "$path" "$build_bin/$soname" + done + chmod +x "$build_bin"/*.so* + + - name: Set RPATH for portable distribution + run: | + cd build/bin + # Set RPATH to $ORIGIN so the binary looks for .so files in its own directory + # wildcards catch whisper-cli, whisper-bench, etc., plus shared libs + for file in *.so* whisper-*; do + if [ -f "$file" ] && [ ! -L "$file" ]; then + # Only patch ELF files (executables and shared objects) + if file "$file" | grep -q "ELF"; then + patchelf --set-rpath '$ORIGIN' "$file" 2>/dev/null || true + echo "Patched RPATH for $file" + fi + fi + done + + - name: Pack bin artifacts + run: | + cd build/bin + SAFE_ARCH=$(echo "${{ matrix.arch }}" | tr '/' '-') + + # 1. Save SAFE_ARCH to GITHUB_ENV so the next step can use it + echo "SAFE_ARCH=$SAFE_ARCH" >> $GITHUB_ENV + + # Include target in filename (e.g., whisper-bin-gfx1100-linux-amd64.zip) + zip -r ../../whisper-bin-${{ matrix.gfx_target }}-${SAFE_ARCH}.zip . + + - name: Upload binaries + if: matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release + uses: actions/upload-artifact@v4 + with: + name: whisper-bin-${{ matrix.gfx_target }}-${{ env.SAFE_ARCH }} + path: whisper-bin-*.zip ubuntu-22-gcc-sanitized: if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || @@ -1001,6 +1314,252 @@ jobs: with: name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip path: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip + + windows-rocm: + if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || + github.event.inputs.run_type == 'full-ci' }} + runs-on: windows-latest + needs: [determine-tag, prepare-matrix] + strategy: + matrix: ${{fromJson(needs.prepare-matrix.outputs.windows_matrix)}} + fail-fast: false + outputs: + rocm_version: ${{ steps.set-outputs.outputs.rocm_version }} + + steps: + - name: Clone + uses: actions/checkout@v4 + + - name: Install Ninja + run: choco install ninja + + - name: Fetch SDL2 and Patch Header (Robust) + if: matrix.sdl2 == 'ON' + shell: pwsh + run: | + $sdlVer = "${{ matrix.s2ver }}" + $url = "https://github.com/libsdl-org/SDL/releases/download/release-$sdlVer/SDL2-devel-$sdlVer-VC.zip" + + Write-Host "Downloading SDL2 from $url..." + Invoke-WebRequest -Uri $url -OutFile "sdl2.zip" + + Write-Host "Extracting SDL2..." + 7z x sdl2.zip + + # 1. Locate CMake config dynamically + $cmakeFile = Get-ChildItem -Path . -Recurse -Filter "sdl2-config.cmake" | Select-Object -First 1 + if ($cmakeFile) { + $cmakeDir = $cmakeFile.DirectoryName + Write-Host "Found SDL2 CMake dir at: $cmakeDir" + echo "SDL2_DIR=$cmakeDir" >> $env:GITHUB_ENV + } else { + Write-Error "FATAL: Could not find sdl2-config.cmake inside extracted files!" + exit 1 + } + + # 2. Find and Patch SDL_endian.h recursively + Write-Host "Searching for SDL_endian.h..." + $headerFile = Get-ChildItem -Path . -Recurse -Filter "SDL_endian.h" | Select-Object -First 1 + + if ($headerFile) { + Write-Host "Found header at: $($headerFile.FullName)" + $content = Get-Content $headerFile.FullName -Raw + + # The Fix: Comment out the extern declaration + if ($content -match 'extern void _m_prefetch') { + $content = $content -replace 'extern void _m_prefetch\(void \*__P\);', '// extern void _m_prefetch(void *__P);' + Set-Content -Path $headerFile.FullName -Value $content + Write-Host "SUCCESS: Patched _m_prefetch in SDL_endian.h" + } else { + Write-Host "WARNING: _m_prefetch string not found. It might be a different SDL version or already patched." + } + } else { + # Debug output if file is missing + Write-Host "Listing root directories:" + Get-ChildItem -Path . -Directory | Format-Table Name + Write-Error "FATAL: Could not locate SDL_endian.h in the workspace." + exit 1 + } + + - name: Download ROCm nightly tarball + run: | + # Determine ROCm version to use + $rocmVersion = "${{ env.ROCM_VERSION }}" + $currentTarget = "${{ matrix.gfx_target }}" + + $s3Target = $currentTarget + if ($currentTarget -eq "gfx110X") { + $s3Target = "$currentTarget-dgpu" + Write-Host "Using S3 target with -dgpu suffix: $s3Target" + } elseif ($currentTarget -eq "gfx120X") { + $s3Target = "$currentTarget-all" + Write-Host "Using S3 target with -all suffix: $s3Target" + } + + if ($rocmVersion -eq "latest") { + Write-Host "Auto-detecting latest ROCm version for target: $currentTarget" + $s3Response = (Invoke-WebRequest "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=therock-dist-windows-$s3Target-7").Content + $files = $s3Response -split '' | Where-Object {$_ -match ''} | ForEach-Object { ($_ -split '')[0] } + + $versionFiles = @() + foreach ($file in $files) { + if ($file -match "therock-dist-windows-$s3Target-.*?(\d+\.\d+\.\d+(?:a|rc)\d+)\.tar\.gz") { + $version = $matches[1] + $versionFiles += [PSCustomObject]@{ + File = $file + Version = $version + Major = [int]($version -split '\.')[0] + Minor = [int]($version -split '\.')[1] + Patch = [int](($version -split '\.')[2] -replace '(?:a|rc).*', '') + RC = [int]($version -replace '.*(?:a|rc)', '') + IsAlpha = $version -match 'a' + } + } + } + + $latestFile = ($versionFiles | Sort-Object Major, Minor, Patch, @{Expression={if($_.IsAlpha){1}else{0}}}, RC | Select-Object -Last 1).File + Write-Host "Found latest file: $latestFile" + + if ($latestFile -match "therock-dist-windows-$s3Target-.*?(\d+\.\d+\.\d+(?:a|rc)\d+)\.tar\.gz") { + $rocmVersion = $matches[1] + Write-Host "Detected latest ROCm version: $rocmVersion" + } else { + Write-Error "Failed to extract ROCm version from latest file: $latestFile" + exit 1 + } + $rocmUrl = "https://therock-nightly-tarball.s3.amazonaws.com/$latestFile" + } else { + $rocmUrl = "https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-windows-$s3Target-$rocmVersion.tar.gz" + } + + echo "DETECTED_ROCM_VERSION=$rocmVersion" >> $env:GITHUB_ENV + Invoke-WebRequest -Uri $rocmUrl -OutFile "rocm.tar.gz" + + - name: Extract ROCm to C:\opt\rocm + run: | + New-Item -ItemType Directory -Force -Path "C:\opt\rocm" + tar -xzf rocm.tar.gz -C C:\opt\rocm --strip-components=1 + + - name: Configure + shell: pwsh + run: | + $currentTarget = "${{ matrix.gfx_target }}" + Write-Host "Input target: $currentTarget" + + if ($currentTarget -eq "gfx110X") { + $mapped_target = "gfx1100;gfx1101;gfx1102" + } elseif ($currentTarget -eq "gfx1151") { + $mapped_target = "gfx1151" + } elseif ($currentTarget -eq "gfx1150") { + $mapped_target = "gfx1150" + } elseif ($currentTarget -eq "gfx120X") { + $mapped_target = "gfx1200;gfx1201" + } else { + $mapped_target = $currentTarget + } + Write-Host "Mapped target: $mapped_target" + + # Set up environment variables and PATH + $env:HIP_PATH = "C:\opt\rocm" + $env:HIP_PLATFORM = "amd" + # Ensure bin comes before llvm\bin for consistency + $env:PATH = "$env:HIP_PATH\bin;$env:HIP_PATH\lib\llvm\bin;$env:PATH" + + # Define CMake arguments + $cmakeArgs = @( + "-S", ".", + "-B", "build", + "-G", "Ninja Multi-Config", + "-DGPU_TARGETS=$mapped_target", + "-DGGML_HIP=ON", + "-DCMAKE_C_COMPILER=$env:HIP_PATH/lib/llvm/bin/amdclang.exe", + "-DCMAKE_CXX_COMPILER=$env:HIP_PATH/lib/llvm/bin/amdclang++.exe", + "-DCMAKE_HIP_COMPILER=$env:HIP_PATH/lib/llvm/bin/amdclang++.exe", + "-DCMAKE_C_FLAGS='-D__PRFCHWINTRIN_H'", + "-DCMAKE_CXX_FLAGS='-D__PRFCHWINTRIN_H'", + "-DCMAKE_HIP_FLAGS=--rocm-path=C:/opt/rocm", + "-DCMAKE_PREFIX_PATH=$env:HIP_PATH", + "-DGGML_ROCM=1", + "-DCMAKE_BUILD_TYPE=${{ matrix.build }}", + "-DBUILD_SHARED_LIBS=ON", + "-DWHISPER_SDL2=${{ matrix.sdl2 }}" + ) + # Run CMake + cmake @cmakeArgs + + - name: Build + shell: pwsh + run: | + cmake --build build --config ${{ matrix.build }} -j $env:NUMBER_OF_PROCESSORS + + - name: Copy ROCm core DLLs to build directory + run: | + $rocmVersion = if ($env:DETECTED_ROCM_VERSION) { $env:DETECTED_ROCM_VERSION } else { $env:ROCM_VERSION } + $buildBinPath = "build/bin/${{ matrix.build }}" + $rocmBinPath = "C:\opt\rocm\bin" + + Write-Host "Copying ROCm core DLL files..." + + if (Test-Path $rocmBinPath) { + # Copy files matching patterns + $filesToCopy = @( + "amdhip64_*.dll", + "amd_comgr*.dll", + "libhipblas.dll", + "rocblas.dll", + "rocsolver.dll", + "hipblaslt.dll", + "libhipblaslt.dll", + "hipblas.dll" + ) + + foreach ($pattern in $filesToCopy) { + $matchingFiles = Get-ChildItem -Path $rocmBinPath -Name $pattern -ErrorAction SilentlyContinue + if ($matchingFiles) { + foreach ($file in $matchingFiles) { + Copy-Item (Join-Path $rocmBinPath $file) (Join-Path $buildBinPath $file) + Write-Host "Copied: $file" + } + } + } + + # Copy rocblas/library + $rocblasLibPath = Join-Path $rocmBinPath "rocblas\library" + if (Test-Path $rocblasLibPath) { + Copy-Item -Path $rocblasLibPath -Destination (Join-Path $buildBinPath "rocblas\library") -Recurse -Force + Write-Host "Copied: rocblas\library" + } + + # Copy hipblaslt/library + $hipblasltLibPath = Join-Path $rocmBinPath "hipblaslt\library" + if (Test-Path $hipblasltLibPath) { + Copy-Item -Path $hipblasltLibPath -Destination (Join-Path $buildBinPath "hipblaslt\library") -Recurse -Force + Write-Host "Copied: hipblaslt\library" + } + } + + - name: Copy SDL2.dll + if: matrix.sdl2 == 'ON' + run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }} + + - name: Copy SDL2.dll + if: matrix.sdl2 == 'ON' + run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" "build/bin/${{ matrix.build }}" + + - name: Pack bin artifacts + shell: pwsh + run: | + # Create unique zip name with target suffix + $zipName = "whisper-bin-${{ matrix.gfx_target }}-windows-${{ matrix.arch }}.zip" + Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath $zipName + + - name: Upload binaries + if: matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release + uses: actions/upload-artifact@v4 + with: + # Unique artifact name per matrix job + name: whisper-bin-${{ matrix.gfx_target }}-windows-${{ matrix.arch }}.zip + path: whisper-bin-${{ matrix.gfx_target }}-windows-${{ matrix.arch }}.zip emscripten: if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || @@ -1265,6 +1824,8 @@ jobs: - windows - windows-blas - windows-cublas + - windows-rocm + - ubuntu-rocm steps: - name: Clone From acc507e3c012e46e654feb78eafdf2a59fa6d2b8 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Thu, 29 Jan 2026 12:17:59 -0800 Subject: [PATCH 02/55] ci: copy ROCm sysdep libs and ensure build directory exists Add copying of ROCm system dependency libraries (e.g., elf, drm, numa) to the build bundle in CI to include required shared libraries for proper ROCm functionality. Also ensure the build directory is created if it doesn't exist to avoid copy failures. --- .github/workflows/build.yml | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 692e5e7697e..7fdee1cd50e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -648,12 +648,16 @@ jobs: run: cmake --build build --config ${{ matrix.build }} -j $(nproc) # Copy Backend ROCm Folders --- - - name: Copy ROCm core libs to build directory + - name: Copy ROCm core and sysdep libs to build directory run: | build_bin_path="build/bin" rocm_bin_path="/opt/rocm/bin" rocm_lib_path="/opt/rocm/lib" + rocm_sysdeps_path="/opt/rocm/lib/rocm_sysdeps/lib" + # Ensure build directory exists + mkdir -p "$build_bin_path" + # Copy rocblas/library folder rocblas_lib_path="$rocm_lib_path/rocblas/library" if [ -d "$rocblas_lib_path" ]; then @@ -672,9 +676,8 @@ jobs: echo "Copied: hipblaslt/library" fi - # Copy required shared libraries - # We use generic wildcards to catch versioned .so files - echo "Copying shared libraries..." + # Copy standard ROCm shared libraries + echo "Copying core shared libraries..." cp -v $rocm_lib_path/libhipblas.so* "$build_bin_path/" 2>/dev/null || true cp -v $rocm_lib_path/librocblas.so* "$build_bin_path/" 2>/dev/null || true cp -v $rocm_lib_path/libamdhip64.so* "$build_bin_path/" 2>/dev/null || true @@ -684,10 +687,16 @@ jobs: cp -v $rocm_lib_path/libamd_comgr.so* "$build_bin_path/" 2>/dev/null || true cp -v $rocm_lib_path/libhsa-runtime64.so* "$build_bin_path/" 2>/dev/null || true - # Copy LLVM runtime libs often needed + # Copy LLVM runtime libs cp -v $rocm_lib_path/llvm/lib/libLLVM.so* "$build_bin_path/" 2>/dev/null || true cp -v $rocm_lib_path/llvm/lib/libclang-cpp.so* "$build_bin_path/" 2>/dev/null || true + if [ -d "$rocm_sysdeps_path" ]; then + echo "Copying sysdep libraries from $rocm_sysdeps_path..." + # Using a broad wildcard ensures we grab elf.so.1, drm.so.2, numa.so.1, etc. + cp -v $rocm_sysdeps_path/librocm_sysdeps_*.so* "$build_bin_path/" + fi + - name: Bundle Linked Libraries run: | build_bin="build/bin" From e7166266d26c6cbeb0350de0e6b998e37ffebf11 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Thu, 26 Feb 2026 12:04:15 -0800 Subject: [PATCH 03/55] Added workflows and actions for self hosted runners. --- .../cleanup-processes-linux/action.yml | 19 +++ .../cleanup-processes-windows/action.yml | 15 +++ .github/workflows/build.yml | 111 ++++++++++++++++++ .github/workflows/runner_heartbeat.yml | 59 ++++++++++ ci/run.sh | 2 +- 5 files changed, 205 insertions(+), 1 deletion(-) create mode 100644 .github/actions/cleanup-processes-linux/action.yml create mode 100644 .github/actions/cleanup-processes-windows/action.yml create mode 100644 .github/workflows/runner_heartbeat.yml diff --git a/.github/actions/cleanup-processes-linux/action.yml b/.github/actions/cleanup-processes-linux/action.yml new file mode 100644 index 00000000000..58649fcc41b --- /dev/null +++ b/.github/actions/cleanup-processes-linux/action.yml @@ -0,0 +1,19 @@ +name: 'Cleanup GPU Processes (Linux)' +description: 'Kill zombie whisper/GPU processes on self-hosted Linux runners' + +runs: + using: 'composite' + steps: + - name: Kill zombie processes + shell: bash + run: | + echo "=== Cleaning up stale processes ===" + pkill -f "whisper-cli" 2>/dev/null || true + pkill -f "whisper-bench" 2>/dev/null || true + pkill -f "whisper-server" 2>/dev/null || true + pkill -f "ctest.*whisper" 2>/dev/null || true + if command -v rocm-smi &>/dev/null; then + echo "=== GPU process check ===" + rocm-smi --showpids 2>/dev/null || true + fi + echo "=== Cleanup complete ===" diff --git a/.github/actions/cleanup-processes-windows/action.yml b/.github/actions/cleanup-processes-windows/action.yml new file mode 100644 index 00000000000..91a9424dd22 --- /dev/null +++ b/.github/actions/cleanup-processes-windows/action.yml @@ -0,0 +1,15 @@ +name: 'Cleanup GPU Processes (Windows)' +description: 'Kill zombie whisper/GPU processes on self-hosted Windows runners' + +runs: + using: 'composite' + steps: + - name: Kill zombie processes + shell: pwsh + run: | + Write-Host "=== Cleaning up stale processes ===" + $processNames = @("whisper-cli", "whisper-bench", "whisper-server", "ctest") + foreach ($name in $processNames) { + Get-Process -Name $name -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue + } + Write-Host "=== Cleanup complete ===" diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7fdee1cd50e..8a61f838c23 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -2148,3 +2148,114 @@ jobs: run: | vulkaninfo --summary GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/whisper.cpp ~/mnt/whisper.cpp + + # AMD ROCm GPU Testing (self-hosted runners) + test-rocm-linux: + runs-on: ${{ matrix.runner }} + + strategy: + fail-fast: false + matrix: + include: + - gfx_target: gfx1151 + runner: [stx-halo, Linux] + # Uncomment when runners are available: + # - gfx_target: gfx1100 + # runner: [navi31, Linux] + # - gfx_target: gfx1200 + # runner: [rdna4, Linux] + # - gfx_target: gfx1150 + # runner: [rai300_400, Linux] + + concurrency: + group: rocm-test-linux-${{ matrix.gfx_target }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + + timeout-minutes: 120 + + steps: + - name: Cleanup before run + uses: ./.github/actions/cleanup-processes-linux + + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Verify ROCm installation + run: | + echo "=== ROCm Environment ===" + rocm-smi || echo "rocm-smi not found" + rocminfo | head -40 || echo "rocminfo not found" + hipcc --version || echo "hipcc not found" + echo "=== GPU Info ===" + rocm-smi --showproductname 2>/dev/null || true + + - name: Test + id: ggml-ci + run: | + GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS=${{ matrix.gfx_target }} GG_BUILD_LOW_PERF=1 \ + bash ./ci/run.sh ~/results/whisper.cpp-rocm-${{ matrix.gfx_target }} /mnt/whisper.cpp + + - name: Cleanup after run + if: always() + uses: ./.github/actions/cleanup-processes-linux + + test-rocm-windows: + runs-on: ${{ matrix.runner }} + + strategy: + fail-fast: false + matrix: + include: + - gfx_target: gfx1151 + runner: [stx-halo, Windows] + # Uncomment when runners are available: + # - gfx_target: gfx1100 + # runner: [navi31, Windows] + # - gfx_target: gfx1200 + # runner: [rdna4, Windows] + # - gfx_target: gfx1150 + # runner: [rai300_400, Windows] + + concurrency: + group: rocm-test-windows-${{ matrix.gfx_target }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + + timeout-minutes: 120 + + steps: + - name: Cleanup before run + uses: ./.github/actions/cleanup-processes-windows + + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Verify ROCm installation + shell: pwsh + run: | + Write-Host "=== ROCm Environment ===" + & "$env:HIP_PATH\bin\rocm-smi.exe" 2>$null + & "$env:HIP_PATH\bin\hipcc.exe" --version 2>$null + + - name: Configure ROCm environment + shell: pwsh + run: | + $rocmPath = $env:HIP_PATH + if (-not $rocmPath) { $rocmPath = "C:\opt\rocm" } + echo "HIP_PATH=$rocmPath" >> $env:GITHUB_ENV + echo "ROCM_PATH=$rocmPath" >> $env:GITHUB_ENV + echo "HIP_PLATFORM=amd" >> $env:GITHUB_ENV + echo "$rocmPath\bin" >> $env:GITHUB_PATH + echo "$rocmPath\lib\llvm\bin" >> $env:GITHUB_PATH + + - name: Test + id: ggml-ci + shell: bash + run: | + GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS=${{ matrix.gfx_target }} GG_BUILD_LOW_PERF=1 \ + bash ./ci/run.sh ~/results/whisper.cpp-rocm-${{ matrix.gfx_target }} /mnt/whisper.cpp + + - name: Cleanup after run + if: always() + uses: ./.github/actions/cleanup-processes-windows diff --git a/.github/workflows/runner_heartbeat.yml b/.github/workflows/runner_heartbeat.yml new file mode 100644 index 00000000000..dff7d7f05a3 --- /dev/null +++ b/.github/workflows/runner_heartbeat.yml @@ -0,0 +1,59 @@ +name: Runner Heartbeat + +on: + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +jobs: + check-rocm-linux: + strategy: + fail-fast: false + matrix: + include: + - runner: [rai300_400, Linux] + name: rai300-400-linux + - runner: [stx-halo, Linux] + name: stx-halo-linux + runs-on: ${{ matrix.runner }} + timeout-minutes: 10 + steps: + - name: Heartbeat + run: | + echo "=== Runner Heartbeat: ${{ matrix.name }} ===" + echo "Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" + echo "Hostname: $(hostname)" + echo "=== GPU Status ===" + rocm-smi 2>/dev/null || echo "rocm-smi not available" + echo "=== Disk Space ===" + df -h / /mnt 2>/dev/null || df -h / + echo "=== Memory ===" + free -h + echo "=== ROCm Version ===" + cat /opt/rocm/.info/version 2>/dev/null || echo "ROCm version file not found" + + check-rocm-windows: + strategy: + fail-fast: false + matrix: + include: + - runner: [rai300_400, Windows] + name: rai300-400-windows + - runner: [stx-halo, Windows] + name: stx-halo-windows + runs-on: ${{ matrix.runner }} + timeout-minutes: 10 + steps: + - name: Heartbeat + shell: pwsh + run: | + Write-Host "=== Runner Heartbeat: ${{ matrix.name }} ===" + Write-Host "Timestamp: $(Get-Date -Format o)" + Write-Host "Hostname: $env:COMPUTERNAME" + Write-Host "=== GPU Status ===" + & "$env:HIP_PATH\bin\rocm-smi.exe" 2>$null + Write-Host "=== Disk Space ===" + Get-PSDrive -PSProvider FileSystem | Format-Table Name, Used, Free -AutoSize + Write-Host "=== Memory ===" + $os = Get-CimInstance Win32_OperatingSystem + Write-Host "Free: $([math]::Round($os.FreePhysicalMemory/1MB, 1)) GB / Total: $([math]::Round($os.TotalVisibleMemorySize/1MB, 1)) GB" diff --git a/ci/run.sh b/ci/run.sh index cbe28442e16..59e2cefad52 100644 --- a/ci/run.sh +++ b/ci/run.sh @@ -223,7 +223,7 @@ function gg_run_ctest { gg_check_build_requirements (time cmake -DCMAKE_BUILD_TYPE=${mode} ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log - (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + (time cmake --build . --config ${mode} -j $(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log From 1c5061f545f28e15e0b4c46477c6fe32555177df Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Thu, 26 Feb 2026 12:49:28 -0800 Subject: [PATCH 04/55] Reorder jobs. --- .github/workflows/build.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ecec06d4dfd..e9e1006ba18 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -2154,13 +2154,13 @@ jobs: timeout-minutes: 120 steps: - - name: Cleanup before run - uses: ./.github/actions/cleanup-processes-linux - - name: Clone id: checkout uses: actions/checkout@v4 + - name: Cleanup before run + uses: ./.github/actions/cleanup-processes-linux + - name: Verify ROCm installation run: | echo "=== ROCm Environment ===" @@ -2204,13 +2204,13 @@ jobs: timeout-minutes: 120 steps: - - name: Cleanup before run - uses: ./.github/actions/cleanup-processes-windows - - name: Clone id: checkout uses: actions/checkout@v4 + - name: Cleanup before run + uses: ./.github/actions/cleanup-processes-windows + - name: Verify ROCm installation shell: pwsh run: | From a4a6a712848aaa80b49d0eacf56999cec06614c7 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Thu, 26 Feb 2026 13:05:11 -0800 Subject: [PATCH 05/55] adding exports for rocm path and hip path. --- .github/workflows/build.yml | 4 ++-- ci/run.sh | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e9e1006ba18..553a16884ec 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1003,7 +1003,7 @@ jobs: Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-bin-${{ matrix.arch }}.zip" - name: Upload binaries - if: matrix.sdl2 == 'ON' && ${{ needs.determine-tag.outputs.should_release }} + if: ${{ matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release }} uses: actions/upload-artifact@v6 with: name: whisper-bin-${{ matrix.arch }}.zip @@ -1089,7 +1089,7 @@ jobs: Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-blas-bin-${{ matrix.arch }}.zip" - name: Upload binaries - if: matrix.blas == 'ON' && matrix.sdl2 == 'ON' && ${{ needs.determine-tag.outputs.should_release }} + if: ${{ matrix.blas == 'ON' && matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release }} uses: actions/upload-artifact@v6 with: name: whisper-blas-bin-${{ matrix.arch }}.zip diff --git a/ci/run.sh b/ci/run.sh index 59e2cefad52..7e69ec91f3b 100644 --- a/ci/run.sh +++ b/ci/run.sh @@ -79,6 +79,12 @@ if [ ! -z ${GG_BUILD_ROCM} ]; then fi CMAKE_EXTRA="${CMAKE_EXTRA} -DAMDGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS}" + + # Set HIP environment if not already set + export HIP_PLATFORM=${HIP_PLATFORM:-amd} + export ROCM_PATH=${ROCM_PATH:-/opt/rocm} + export HIP_PATH=${HIP_PATH:-/opt/rocm} + CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_PREFIX_PATH=${ROCM_PATH} -DCMAKE_HIP_COMPILER=${ROCM_PATH}/lib/llvm/bin/clang++" fi if [ ! -z ${GG_BUILD_SYCL} ]; then From 1e0ec6db17ceeb4ffc77510b44b94072efa2c370 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Thu, 26 Feb 2026 13:11:14 -0800 Subject: [PATCH 06/55] adding rocm lib path to ld_library_path --- ci/run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/run.sh b/ci/run.sh index 7e69ec91f3b..85ade166658 100644 --- a/ci/run.sh +++ b/ci/run.sh @@ -84,6 +84,7 @@ if [ ! -z ${GG_BUILD_ROCM} ]; then export HIP_PLATFORM=${HIP_PLATFORM:-amd} export ROCM_PATH=${ROCM_PATH:-/opt/rocm} export HIP_PATH=${HIP_PATH:-/opt/rocm} + export LD_LIBRARY_PATH=${ROCM_PATH}/lib:${LD_LIBRARY_PATH} CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_PREFIX_PATH=${ROCM_PATH} -DCMAKE_HIP_COMPILER=${ROCM_PATH}/lib/llvm/bin/clang++" fi From bc289aa7b93e29d3fcba9213df7c924f347e8de6 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Mon, 2 Mar 2026 22:56:26 -0800 Subject: [PATCH 07/55] Fixed a merge conflict. --- .github/workflows/build.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 553a16884ec..347041a836d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -438,11 +438,12 @@ jobs: strategy: fail-fast: false matrix: - ${{fromJson(needs.prepare-matrix.outputs.ubuntu_matrix)}} + arch: [linux/amd64] + build: [Release] steps: - name: Clone - uses: actions/checkout@v6 + uses: actions/checkout@v4 - name: Set up QEMU uses: docker/setup-qemu-action@v3 From c8a540dff94588c919fa9ba6967659efe8dd18f8 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Tue, 3 Mar 2026 10:45:03 -0800 Subject: [PATCH 08/55] Add AMD ROCm GPU build and test CI infrastructure - Add self-hosted runner test jobs (test-rocm-linux, test-rocm-windows) for gfx1151/gfx1150 - Add cleanup composite actions for Linux and Windows runners - Add runner heartbeat monitoring workflow - Configure ci/run.sh with ROCm environment (HIP_PLATFORM, LD_LIBRARY_PATH, cmake flags) - Add Windows ROCm build support to build.yml - Fix conditional expression syntax warnings in build.yml --- .../cleanup-processes-linux/action.yml | 19 + .../cleanup-processes-windows/action.yml | 15 + .github/workflows/build.yml | 702 +++++++++++++++++- .github/workflows/runner_heartbeat.yml | 59 ++ ci/run.sh | 9 +- 5 files changed, 793 insertions(+), 11 deletions(-) create mode 100644 .github/actions/cleanup-processes-linux/action.yml create mode 100644 .github/actions/cleanup-processes-windows/action.yml create mode 100644 .github/workflows/runner_heartbeat.yml diff --git a/.github/actions/cleanup-processes-linux/action.yml b/.github/actions/cleanup-processes-linux/action.yml new file mode 100644 index 00000000000..58649fcc41b --- /dev/null +++ b/.github/actions/cleanup-processes-linux/action.yml @@ -0,0 +1,19 @@ +name: 'Cleanup GPU Processes (Linux)' +description: 'Kill zombie whisper/GPU processes on self-hosted Linux runners' + +runs: + using: 'composite' + steps: + - name: Kill zombie processes + shell: bash + run: | + echo "=== Cleaning up stale processes ===" + pkill -f "whisper-cli" 2>/dev/null || true + pkill -f "whisper-bench" 2>/dev/null || true + pkill -f "whisper-server" 2>/dev/null || true + pkill -f "ctest.*whisper" 2>/dev/null || true + if command -v rocm-smi &>/dev/null; then + echo "=== GPU process check ===" + rocm-smi --showpids 2>/dev/null || true + fi + echo "=== Cleanup complete ===" diff --git a/.github/actions/cleanup-processes-windows/action.yml b/.github/actions/cleanup-processes-windows/action.yml new file mode 100644 index 00000000000..91a9424dd22 --- /dev/null +++ b/.github/actions/cleanup-processes-windows/action.yml @@ -0,0 +1,15 @@ +name: 'Cleanup GPU Processes (Windows)' +description: 'Kill zombie whisper/GPU processes on self-hosted Windows runners' + +runs: + using: 'composite' + steps: + - name: Kill zombie processes + shell: pwsh + run: | + Write-Host "=== Cleaning up stale processes ===" + $processNames = @("whisper-cli", "whisper-bench", "whisper-server", "ctest") + foreach ($name in $processNames) { + Get-Process -Name $name -ErrorAction SilentlyContinue | Stop-Process -Force -ErrorAction SilentlyContinue + } + Write-Host "=== Cleanup complete ===" diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8ce887fd111..347041a836d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -45,6 +45,14 @@ on: options: - full-ci - release-only + gfx_target: + description: 'AMD GPU targets (comma-separated)' + required: false + default: 'gfx1151,gfx1150,gfx120X,gfx110X' + rocm_version: + description: 'ROCm version to use (e.g., 7.11.0a20251205) or "latest" to auto-detect' + required: false + default: 'latest' concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} @@ -57,6 +65,8 @@ env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} ubuntu_image: "ubuntu:22.04" VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite" + GFX_TARGETS: ${{ github.event.inputs.gfx_target || 'gfx1151,gfx1150,gfx120X,gfx110X' }} + ROCM_VERSION: ${{ github.event.inputs.rocm_version || 'latest' }} jobs: determine-tag: @@ -64,13 +74,11 @@ jobs: outputs: tag_name: ${{ steps.tag.outputs.name }} should_release: ${{ steps.tag.outputs.should_release }} - steps: - name: Checkout with full history uses: actions/checkout@v6 with: fetch-depth: 0 - - name: Determine tag name id: tag shell: bash @@ -114,6 +122,58 @@ jobs: echo "name=$TAG_NAME" >> $GITHUB_OUTPUT echo "should_release=$SHOULD_RELEASE" >> $GITHUB_OUTPUT + prepare-matrix: + runs-on: ubuntu-latest + outputs: + windows_matrix: ${{ steps.set-matrix.outputs.windows_matrix }} + ubuntu_matrix: ${{ steps.set-matrix.outputs.ubuntu_matrix }} + should_build_windows: ${{ steps.set-matrix.outputs.should_build_windows }} + should_build_ubuntu: ${{ steps.set-matrix.outputs.should_build_ubuntu }} + steps: + - name: Set matrix + id: set-matrix + run: | + targets="${{ env.GFX_TARGETS }}" + operating_systems="windows,ubuntu" + + echo "Input targets: $targets" + echo "Input operating systems: $operating_systems" + + target_array=$(echo "$targets" \ + | tr ',' '\n' \ + | sed 's/^ *//;s/ *$//' \ + | sed 's/^"//;s/"$//' \ + | jq -R . \ + | jq -s .) + + windows_matrix=$(echo "$target_array" \ + | jq -c '{gfx_target: ., sdl2: ["ON"], build: ["Release"], arch: ["x64"], s2arc: ["x64"], s2ver: ["2.28.5"]}') + + # 3. Create Ubuntu Matrix + ubuntu_matrix=$(echo "$target_array" \ + | jq -c '{gfx_target: ., sdl2: ["ON"], build: ["Release"], arch: ["linux/amd64"]}') + + # Check which operating systems to build + should_build_windows="false" + should_build_ubuntu="false" + + if [[ "$operating_systems" == *"windows"* ]]; then + should_build_windows="true" + echo "windows_matrix=$windows_matrix" >> $GITHUB_OUTPUT + fi + + if [[ "$operating_systems" == *"ubuntu"* ]]; then + should_build_ubuntu="true" + echo "ubuntu_matrix=$ubuntu_matrix" >> $GITHUB_OUTPUT + fi + + echo "should_build_windows=$should_build_windows" >> $GITHUB_OUTPUT + echo "should_build_ubuntu=$should_build_ubuntu" >> $GITHUB_OUTPUT + + echo "Windows build: $should_build_windows" + echo "Ubuntu build: $should_build_ubuntu" + echo "Generated matrix: $matrix_targets" + #linux/amd64 ubuntu-22: if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || @@ -378,15 +438,12 @@ jobs: strategy: fail-fast: false matrix: - build: [Debug, Release] - #arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le] - # TODO: arm/v7 disabled due to clang bug - # https://github.com/ggerganov/whisper.cpp/actions/runs/9657764109/job/26637633042?pr=2256#step:4:1990 - arch: [linux/amd64, linux/arm64, linux/ppc64le] + arch: [linux/amd64] + build: [Release] steps: - name: Clone - uses: actions/checkout@v6 + uses: actions/checkout@v4 - name: Set up QEMU uses: docker/setup-qemu-action@v3 @@ -406,6 +463,272 @@ jobs: cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang make ctest -L gh --output-on-failure' + + ubuntu-rocm: + if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || + github.event.inputs.run_type == 'full-ci' }} + runs-on: ubuntu-22.04 + needs: [determine-tag, prepare-matrix] + # Check if we should run (based on the prepare-matrix output) + strategy: + # Uses the matrix generated in prepare-matrix (gfx_target, arch, build, sdl2) + matrix: ${{fromJson(needs.prepare-matrix.outputs.ubuntu_matrix)}} + fail-fast: false + outputs: + rocm_version: ${{ steps.set-outputs.outputs.rocm_version }} + + steps: + - name: Free disk space + run: curl -fsSL https://raw.githubusercontent.com/kou/arrow/e49d8ae15583ceff03237571569099a6ad62be32/ci/scripts/util_free_space.sh | bash + + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install build dependencies + run: | + sudo apt update + sudo apt install -y cmake ninja-build unzip curl build-essential libsdl2-dev git patchelf + + - name: Download and extract ROCm directly to /opt/rocm + run: | + # Determine ROCm version to use + rocm_version="${{ env.ROCM_VERSION }}" + current_target="${{ matrix.gfx_target }}" + + # Add appropriate suffixes for different GPU targets + s3_target="$current_target" + if [ "$current_target" = "gfx110X" ]; then + s3_target="${current_target}-dgpu" + echo "Using S3 target with -dgpu suffix: $s3_target" + elif [ "$current_target" = "gfx120X" ]; then + s3_target="${current_target}-all" + echo "Using S3 target with -all suffix: $s3_target" + fi + + if [ "$rocm_version" = "latest" ]; then + echo "Auto-detecting latest ROCm version for target: $current_target" + s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=therock-dist-linux-${s3_target}-7") + + # Extract all files + files=$(echo "$s3_response" | grep -oP '(?<=)[^<]*' | grep "therock-dist-linux-${s3_target}-") + + # Extract versions and sort them properly + latest_file="" + latest_major=0 + latest_minor=0 + latest_patch=0 + latest_rc=0 + latest_is_alpha=false + + while IFS= read -r file; do + if [[ "$file" =~ therock-dist-linux-${s3_target}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then + version="${BASH_REMATCH[1]}" + major=$(echo "$version" | cut -d. -f1) + minor=$(echo "$version" | cut -d. -f2) + patch=$(echo "$version" | cut -d. -f3 | sed 's/\(a\|rc\).*//') + rc=$(echo "$version" | sed 's/.*\(a\|rc\)//') + is_alpha=false + if [[ "$version" =~ a ]]; then is_alpha=true; fi + + is_newer=false + if [ "$major" -gt "$latest_major" ]; then is_newer=true; + elif [ "$major" -eq "$latest_major" ] && [ "$minor" -gt "$latest_minor" ]; then is_newer=true; + elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -gt "$latest_patch" ]; then is_newer=true; + elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -eq "$latest_patch" ]; then + if [ "$is_alpha" = true ] && [ "$latest_is_alpha" = false ]; then is_newer=true; + elif [ "$is_alpha" = "$latest_is_alpha" ] && [ "$rc" -gt "$latest_rc" ]; then is_newer=true; + fi + fi + + if [ "$is_newer" = true ]; then + latest_file="$file" + latest_major="$major" + latest_minor="$minor" + latest_patch="$patch" + latest_rc="$rc" + latest_is_alpha="$is_alpha" + fi + fi + done <<< "$files" + + echo "Found latest file: $latest_file" + + if [[ "$latest_file" =~ therock-dist-linux-${s3_target}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then + rocm_version="${BASH_REMATCH[1]}" + echo "Detected latest ROCm version: $rocm_version" + else + echo "Failed to extract ROCm version from latest file: $latest_file" + exit 1 + fi + + rocm_url="https://therock-nightly-tarball.s3.amazonaws.com/$latest_file" + else + rocm_url="https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-linux-${s3_target}-${rocm_version}.tar.gz" + fi + + echo "DETECTED_ROCM_VERSION=$rocm_version" >> $GITHUB_ENV + + # Create directory and stream extraction + sudo mkdir -p /opt/rocm + curl -sL "$rocm_url" | sudo tar --use-compress-program=gzip -xf - -C /opt/rocm --strip-components=1 + + - name: Set ROCm environment variables + run: | + echo "HIP_PATH=/opt/rocm" >> $GITHUB_ENV + echo "ROCM_PATH=/opt/rocm" >> $GITHUB_ENV + echo "HIP_PLATFORM=amd" >> $GITHUB_ENV + echo "/opt/rocm/bin:/opt/rocm/llvm/bin:$PATH" >> $GITHUB_PATH + + - name: Find ROCm bitcode path + run: | + # Dynamically find the directory containing device libraries (amdgcn/bitcode) + BITCODE_PATH=$(find /opt/rocm -type d -name bitcode -print -quit) + + if [ -z "$BITCODE_PATH" ]; then + echo "::error::Could not find 'bitcode' directory in /opt/rocm" + find /opt/rocm -maxdepth 5 + exit 1 + fi + + echo "Found bitcode at: $BITCODE_PATH" + echo "ROCM_BITCODE_PATH=$BITCODE_PATH" >> $GITHUB_ENV + + - name: Configure CMake + run: | + + # Map GPU targets + current_target="${{ matrix.gfx_target }}" + echo "Input target: $current_target" + + if [ "$current_target" = "gfx110X" ]; then + mapped_target="gfx1100;gfx1101;gfx1102" + elif [ "$current_target" = "gfx1151" ]; then + mapped_target="gfx1151" + elif [ "$current_target" = "gfx1150" ]; then + mapped_target="gfx1150" + elif [ "$current_target" = "gfx120X" ]; then + mapped_target="gfx1200;gfx1201" + else + mapped_target="$current_target" + fi + echo "Mapped target: $mapped_target" + + cmake -S . -B build -G Ninja \ + -DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \ + -DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \ + -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm --rocm-device-lib-path=${{ env.ROCM_BITCODE_PATH }}" \ + -DCMAKE_PREFIX_PATH=/opt/rocm \ + -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ + -DGPU_TARGETS="$mapped_target" \ + -DBUILD_SHARED_LIBS=ON \ + -DGGML_HIP=ON \ + -DGGML_ROCM=1 \ + -DWHISPER_SDL2=${{ matrix.sdl2 }} + + - name: Build + run: cmake --build build --config ${{ matrix.build }} -j $(nproc) + + # Copy Backend ROCm Folders --- + - name: Copy ROCm core and sysdep libs to build directory + run: | + build_bin_path="build/bin" + rocm_bin_path="/opt/rocm/bin" + rocm_lib_path="/opt/rocm/lib" + rocm_sysdeps_path="/opt/rocm/lib/rocm_sysdeps/lib" + + # Ensure build directory exists + mkdir -p "$build_bin_path" + + # Copy rocblas/library folder + rocblas_lib_path="$rocm_lib_path/rocblas/library" + if [ -d "$rocblas_lib_path" ]; then + dest_rocblas_path="$build_bin_path/rocblas/library" + mkdir -p "$(dirname "$dest_rocblas_path")" + cp -r "$rocblas_lib_path" "$(dirname "$dest_rocblas_path")/" + echo "Copied: rocblas/library" + fi + + # Copy hipblaslt/library folder + hipblaslt_lib_path="$rocm_lib_path/hipblaslt/library" + if [ -d "$hipblaslt_lib_path" ]; then + dest_hipblaslt_path="$build_bin_path/hipblaslt/library" + mkdir -p "$(dirname "$dest_hipblaslt_path")" + cp -r "$hipblaslt_lib_path" "$(dirname "$dest_hipblaslt_path")/" + echo "Copied: hipblaslt/library" + fi + + # Copy standard ROCm shared libraries + echo "Copying core shared libraries..." + cp -v $rocm_lib_path/libhipblas.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/librocblas.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libamdhip64.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/librocsolver.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libroctx64.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libhipblaslt.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libamd_comgr.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/libhsa-runtime64.so* "$build_bin_path/" 2>/dev/null || true + + # Copy LLVM runtime libs + cp -v $rocm_lib_path/llvm/lib/libLLVM.so* "$build_bin_path/" 2>/dev/null || true + cp -v $rocm_lib_path/llvm/lib/libclang-cpp.so* "$build_bin_path/" 2>/dev/null || true + + if [ -d "$rocm_sysdeps_path" ]; then + echo "Copying sysdep libraries from $rocm_sysdeps_path..." + # Using a broad wildcard ensures we grab elf.so.1, drm.so.2, numa.so.1, etc. + cp -v $rocm_sysdeps_path/librocm_sysdeps_*.so* "$build_bin_path/" + fi + + - name: Bundle Linked Libraries + run: | + build_bin="build/bin" + echo "Scanning dependencies for whisper-cli..." + + ldd "$build_bin/whisper-cli" | grep "=> /" | while read -r line; do + + soname=$(echo "$line" | awk '{print $1}') + path=$(echo "$line" | awk '{print $3}') + + if [[ "$soname" =~ ^(libc\.so|libm\.so|libdl\.so|librt\.so|libpthread\.so|libstdc\+\+|libgcc_s|ld-linux) ]]; then + continue + fi + echo "Bundling: $soname" + echo " Source: $path" + cp -L "$path" "$build_bin/$soname" + done + chmod +x "$build_bin"/*.so* + + - name: Set RPATH for portable distribution + run: | + cd build/bin + # Set RPATH to $ORIGIN so the binary looks for .so files in its own directory + # wildcards catch whisper-cli, whisper-bench, etc., plus shared libs + for file in *.so* whisper-*; do + if [ -f "$file" ] && [ ! -L "$file" ]; then + # Only patch ELF files (executables and shared objects) + if file "$file" | grep -q "ELF"; then + patchelf --set-rpath '$ORIGIN' "$file" 2>/dev/null || true + echo "Patched RPATH for $file" + fi + fi + done + + - name: Pack bin artifacts + run: | + cd build/bin + SAFE_ARCH=$(echo "${{ matrix.arch }}" | tr '/' '-') + + # 1. Save SAFE_ARCH to GITHUB_ENV so the next step can use it + echo "SAFE_ARCH=$SAFE_ARCH" >> $GITHUB_ENV + + # Include target in filename (e.g., whisper-bin-gfx1100-linux-amd64.zip) + zip -r ../../whisper-bin-${{ matrix.gfx_target }}-${SAFE_ARCH}.zip . + + - name: Upload binaries + if: matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release + uses: actions/upload-artifact@v4 + with: + name: whisper-bin-${{ matrix.gfx_target }}-${{ env.SAFE_ARCH }} + path: whisper-bin-*.zip ubuntu-22-gcc-sanitized: if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || @@ -681,7 +1004,7 @@ jobs: Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-bin-${{ matrix.arch }}.zip" - name: Upload binaries - if: matrix.sdl2 == 'ON' && ${{ needs.determine-tag.outputs.should_release }} + if: ${{ matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release }} uses: actions/upload-artifact@v6 with: name: whisper-bin-${{ matrix.arch }}.zip @@ -767,7 +1090,7 @@ jobs: Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-blas-bin-${{ matrix.arch }}.zip" - name: Upload binaries - if: matrix.blas == 'ON' && matrix.sdl2 == 'ON' && ${{ needs.determine-tag.outputs.should_release }} + if: ${{ matrix.blas == 'ON' && matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release }} uses: actions/upload-artifact@v6 with: name: whisper-blas-bin-${{ matrix.arch }}.zip @@ -981,6 +1304,252 @@ jobs: with: name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip path: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip + + windows-rocm: + if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || + github.event.inputs.run_type == 'full-ci' }} + runs-on: windows-latest + needs: [determine-tag, prepare-matrix] + strategy: + matrix: ${{fromJson(needs.prepare-matrix.outputs.windows_matrix)}} + fail-fast: false + outputs: + rocm_version: ${{ steps.set-outputs.outputs.rocm_version }} + + steps: + - name: Clone + uses: actions/checkout@v4 + + - name: Install Ninja + run: choco install ninja + + - name: Fetch SDL2 and Patch Header (Robust) + if: matrix.sdl2 == 'ON' + shell: pwsh + run: | + $sdlVer = "${{ matrix.s2ver }}" + $url = "https://github.com/libsdl-org/SDL/releases/download/release-$sdlVer/SDL2-devel-$sdlVer-VC.zip" + + Write-Host "Downloading SDL2 from $url..." + Invoke-WebRequest -Uri $url -OutFile "sdl2.zip" + + Write-Host "Extracting SDL2..." + 7z x sdl2.zip + + # 1. Locate CMake config dynamically + $cmakeFile = Get-ChildItem -Path . -Recurse -Filter "sdl2-config.cmake" | Select-Object -First 1 + if ($cmakeFile) { + $cmakeDir = $cmakeFile.DirectoryName + Write-Host "Found SDL2 CMake dir at: $cmakeDir" + echo "SDL2_DIR=$cmakeDir" >> $env:GITHUB_ENV + } else { + Write-Error "FATAL: Could not find sdl2-config.cmake inside extracted files!" + exit 1 + } + + # 2. Find and Patch SDL_endian.h recursively + Write-Host "Searching for SDL_endian.h..." + $headerFile = Get-ChildItem -Path . -Recurse -Filter "SDL_endian.h" | Select-Object -First 1 + + if ($headerFile) { + Write-Host "Found header at: $($headerFile.FullName)" + $content = Get-Content $headerFile.FullName -Raw + + # The Fix: Comment out the extern declaration + if ($content -match 'extern void _m_prefetch') { + $content = $content -replace 'extern void _m_prefetch\(void \*__P\);', '// extern void _m_prefetch(void *__P);' + Set-Content -Path $headerFile.FullName -Value $content + Write-Host "SUCCESS: Patched _m_prefetch in SDL_endian.h" + } else { + Write-Host "WARNING: _m_prefetch string not found. It might be a different SDL version or already patched." + } + } else { + # Debug output if file is missing + Write-Host "Listing root directories:" + Get-ChildItem -Path . -Directory | Format-Table Name + Write-Error "FATAL: Could not locate SDL_endian.h in the workspace." + exit 1 + } + + - name: Download ROCm nightly tarball + run: | + # Determine ROCm version to use + $rocmVersion = "${{ env.ROCM_VERSION }}" + $currentTarget = "${{ matrix.gfx_target }}" + + $s3Target = $currentTarget + if ($currentTarget -eq "gfx110X") { + $s3Target = "$currentTarget-dgpu" + Write-Host "Using S3 target with -dgpu suffix: $s3Target" + } elseif ($currentTarget -eq "gfx120X") { + $s3Target = "$currentTarget-all" + Write-Host "Using S3 target with -all suffix: $s3Target" + } + + if ($rocmVersion -eq "latest") { + Write-Host "Auto-detecting latest ROCm version for target: $currentTarget" + $s3Response = (Invoke-WebRequest "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=therock-dist-windows-$s3Target-7").Content + $files = $s3Response -split '' | Where-Object {$_ -match ''} | ForEach-Object { ($_ -split '')[0] } + + $versionFiles = @() + foreach ($file in $files) { + if ($file -match "therock-dist-windows-$s3Target-.*?(\d+\.\d+\.\d+(?:a|rc)\d+)\.tar\.gz") { + $version = $matches[1] + $versionFiles += [PSCustomObject]@{ + File = $file + Version = $version + Major = [int]($version -split '\.')[0] + Minor = [int]($version -split '\.')[1] + Patch = [int](($version -split '\.')[2] -replace '(?:a|rc).*', '') + RC = [int]($version -replace '.*(?:a|rc)', '') + IsAlpha = $version -match 'a' + } + } + } + + $latestFile = ($versionFiles | Sort-Object Major, Minor, Patch, @{Expression={if($_.IsAlpha){1}else{0}}}, RC | Select-Object -Last 1).File + Write-Host "Found latest file: $latestFile" + + if ($latestFile -match "therock-dist-windows-$s3Target-.*?(\d+\.\d+\.\d+(?:a|rc)\d+)\.tar\.gz") { + $rocmVersion = $matches[1] + Write-Host "Detected latest ROCm version: $rocmVersion" + } else { + Write-Error "Failed to extract ROCm version from latest file: $latestFile" + exit 1 + } + $rocmUrl = "https://therock-nightly-tarball.s3.amazonaws.com/$latestFile" + } else { + $rocmUrl = "https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-windows-$s3Target-$rocmVersion.tar.gz" + } + + echo "DETECTED_ROCM_VERSION=$rocmVersion" >> $env:GITHUB_ENV + Invoke-WebRequest -Uri $rocmUrl -OutFile "rocm.tar.gz" + + - name: Extract ROCm to C:\opt\rocm + run: | + New-Item -ItemType Directory -Force -Path "C:\opt\rocm" + tar -xzf rocm.tar.gz -C C:\opt\rocm --strip-components=1 + + - name: Configure + shell: pwsh + run: | + $currentTarget = "${{ matrix.gfx_target }}" + Write-Host "Input target: $currentTarget" + + if ($currentTarget -eq "gfx110X") { + $mapped_target = "gfx1100;gfx1101;gfx1102" + } elseif ($currentTarget -eq "gfx1151") { + $mapped_target = "gfx1151" + } elseif ($currentTarget -eq "gfx1150") { + $mapped_target = "gfx1150" + } elseif ($currentTarget -eq "gfx120X") { + $mapped_target = "gfx1200;gfx1201" + } else { + $mapped_target = $currentTarget + } + Write-Host "Mapped target: $mapped_target" + + # Set up environment variables and PATH + $env:HIP_PATH = "C:\opt\rocm" + $env:HIP_PLATFORM = "amd" + # Ensure bin comes before llvm\bin for consistency + $env:PATH = "$env:HIP_PATH\bin;$env:HIP_PATH\lib\llvm\bin;$env:PATH" + + # Define CMake arguments + $cmakeArgs = @( + "-S", ".", + "-B", "build", + "-G", "Ninja Multi-Config", + "-DGPU_TARGETS=$mapped_target", + "-DGGML_HIP=ON", + "-DCMAKE_C_COMPILER=$env:HIP_PATH/lib/llvm/bin/amdclang.exe", + "-DCMAKE_CXX_COMPILER=$env:HIP_PATH/lib/llvm/bin/amdclang++.exe", + "-DCMAKE_HIP_COMPILER=$env:HIP_PATH/lib/llvm/bin/amdclang++.exe", + "-DCMAKE_C_FLAGS='-D__PRFCHWINTRIN_H'", + "-DCMAKE_CXX_FLAGS='-D__PRFCHWINTRIN_H'", + "-DCMAKE_HIP_FLAGS=--rocm-path=C:/opt/rocm", + "-DCMAKE_PREFIX_PATH=$env:HIP_PATH", + "-DGGML_ROCM=1", + "-DCMAKE_BUILD_TYPE=${{ matrix.build }}", + "-DBUILD_SHARED_LIBS=ON", + "-DWHISPER_SDL2=${{ matrix.sdl2 }}" + ) + # Run CMake + cmake @cmakeArgs + + - name: Build + shell: pwsh + run: | + cmake --build build --config ${{ matrix.build }} -j $env:NUMBER_OF_PROCESSORS + + - name: Copy ROCm core DLLs to build directory + run: | + $rocmVersion = if ($env:DETECTED_ROCM_VERSION) { $env:DETECTED_ROCM_VERSION } else { $env:ROCM_VERSION } + $buildBinPath = "build/bin/${{ matrix.build }}" + $rocmBinPath = "C:\opt\rocm\bin" + + Write-Host "Copying ROCm core DLL files..." + + if (Test-Path $rocmBinPath) { + # Copy files matching patterns + $filesToCopy = @( + "amdhip64_*.dll", + "amd_comgr*.dll", + "libhipblas.dll", + "rocblas.dll", + "rocsolver.dll", + "hipblaslt.dll", + "libhipblaslt.dll", + "hipblas.dll" + ) + + foreach ($pattern in $filesToCopy) { + $matchingFiles = Get-ChildItem -Path $rocmBinPath -Name $pattern -ErrorAction SilentlyContinue + if ($matchingFiles) { + foreach ($file in $matchingFiles) { + Copy-Item (Join-Path $rocmBinPath $file) (Join-Path $buildBinPath $file) + Write-Host "Copied: $file" + } + } + } + + # Copy rocblas/library + $rocblasLibPath = Join-Path $rocmBinPath "rocblas\library" + if (Test-Path $rocblasLibPath) { + Copy-Item -Path $rocblasLibPath -Destination (Join-Path $buildBinPath "rocblas\library") -Recurse -Force + Write-Host "Copied: rocblas\library" + } + + # Copy hipblaslt/library + $hipblasltLibPath = Join-Path $rocmBinPath "hipblaslt\library" + if (Test-Path $hipblasltLibPath) { + Copy-Item -Path $hipblasltLibPath -Destination (Join-Path $buildBinPath "hipblaslt\library") -Recurse -Force + Write-Host "Copied: hipblaslt\library" + } + } + + - name: Copy SDL2.dll + if: matrix.sdl2 == 'ON' + run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }} + + - name: Copy SDL2.dll + if: matrix.sdl2 == 'ON' + run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" "build/bin/${{ matrix.build }}" + + - name: Pack bin artifacts + shell: pwsh + run: | + # Create unique zip name with target suffix + $zipName = "whisper-bin-${{ matrix.gfx_target }}-windows-${{ matrix.arch }}.zip" + Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath $zipName + + - name: Upload binaries + if: matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release + uses: actions/upload-artifact@v4 + with: + # Unique artifact name per matrix job + name: whisper-bin-${{ matrix.gfx_target }}-windows-${{ matrix.arch }}.zip + path: whisper-bin-${{ matrix.gfx_target }}-windows-${{ matrix.arch }}.zip emscripten: if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || @@ -1245,6 +1814,8 @@ jobs: - windows - windows-blas - windows-cublas + - windows-rocm + - ubuntu-rocm steps: - name: Clone @@ -1558,3 +2129,114 @@ jobs: run: | vulkaninfo --summary GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/whisper.cpp ~/mnt/whisper.cpp + + # AMD ROCm GPU Testing (self-hosted runners) + test-rocm-linux: + runs-on: ${{ matrix.runner }} + + strategy: + fail-fast: false + matrix: + include: + - gfx_target: gfx1151 + runner: [stx-halo, Linux] + # Uncomment when runners are available: + # - gfx_target: gfx1100 + # runner: [navi31, Linux] + # - gfx_target: gfx1200 + # runner: [rdna4, Linux] + # - gfx_target: gfx1150 + # runner: [rai300_400, Linux] + + concurrency: + group: rocm-test-linux-${{ matrix.gfx_target }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + + timeout-minutes: 120 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Cleanup before run + uses: ./.github/actions/cleanup-processes-linux + + - name: Verify ROCm installation + run: | + echo "=== ROCm Environment ===" + rocm-smi || echo "rocm-smi not found" + rocminfo | head -40 || echo "rocminfo not found" + hipcc --version || echo "hipcc not found" + echo "=== GPU Info ===" + rocm-smi --showproductname 2>/dev/null || true + + - name: Test + id: ggml-ci + run: | + GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS=${{ matrix.gfx_target }} GG_BUILD_LOW_PERF=1 \ + bash ./ci/run.sh ~/results/whisper.cpp-rocm-${{ matrix.gfx_target }} /mnt/whisper.cpp + + - name: Cleanup after run + if: always() + uses: ./.github/actions/cleanup-processes-linux + + test-rocm-windows: + runs-on: ${{ matrix.runner }} + + strategy: + fail-fast: false + matrix: + include: + - gfx_target: gfx1151 + runner: [stx-halo, Windows] + # Uncomment when runners are available: + # - gfx_target: gfx1100 + # runner: [navi31, Windows] + # - gfx_target: gfx1200 + # runner: [rdna4, Windows] + # - gfx_target: gfx1150 + # runner: [rai300_400, Windows] + + concurrency: + group: rocm-test-windows-${{ matrix.gfx_target }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + + timeout-minutes: 120 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Cleanup before run + uses: ./.github/actions/cleanup-processes-windows + + - name: Verify ROCm installation + shell: pwsh + run: | + Write-Host "=== ROCm Environment ===" + & "$env:HIP_PATH\bin\rocm-smi.exe" 2>$null + & "$env:HIP_PATH\bin\hipcc.exe" --version 2>$null + + - name: Configure ROCm environment + shell: pwsh + run: | + $rocmPath = $env:HIP_PATH + if (-not $rocmPath) { $rocmPath = "C:\opt\rocm" } + echo "HIP_PATH=$rocmPath" >> $env:GITHUB_ENV + echo "ROCM_PATH=$rocmPath" >> $env:GITHUB_ENV + echo "HIP_PLATFORM=amd" >> $env:GITHUB_ENV + echo "$rocmPath\bin" >> $env:GITHUB_PATH + echo "$rocmPath\lib\llvm\bin" >> $env:GITHUB_PATH + + - name: Test + id: ggml-ci + shell: bash + run: | + GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS=${{ matrix.gfx_target }} GG_BUILD_LOW_PERF=1 \ + bash ./ci/run.sh ~/results/whisper.cpp-rocm-${{ matrix.gfx_target }} /mnt/whisper.cpp + + - name: Cleanup after run + if: always() + uses: ./.github/actions/cleanup-processes-windows diff --git a/.github/workflows/runner_heartbeat.yml b/.github/workflows/runner_heartbeat.yml new file mode 100644 index 00000000000..dff7d7f05a3 --- /dev/null +++ b/.github/workflows/runner_heartbeat.yml @@ -0,0 +1,59 @@ +name: Runner Heartbeat + +on: + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +jobs: + check-rocm-linux: + strategy: + fail-fast: false + matrix: + include: + - runner: [rai300_400, Linux] + name: rai300-400-linux + - runner: [stx-halo, Linux] + name: stx-halo-linux + runs-on: ${{ matrix.runner }} + timeout-minutes: 10 + steps: + - name: Heartbeat + run: | + echo "=== Runner Heartbeat: ${{ matrix.name }} ===" + echo "Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" + echo "Hostname: $(hostname)" + echo "=== GPU Status ===" + rocm-smi 2>/dev/null || echo "rocm-smi not available" + echo "=== Disk Space ===" + df -h / /mnt 2>/dev/null || df -h / + echo "=== Memory ===" + free -h + echo "=== ROCm Version ===" + cat /opt/rocm/.info/version 2>/dev/null || echo "ROCm version file not found" + + check-rocm-windows: + strategy: + fail-fast: false + matrix: + include: + - runner: [rai300_400, Windows] + name: rai300-400-windows + - runner: [stx-halo, Windows] + name: stx-halo-windows + runs-on: ${{ matrix.runner }} + timeout-minutes: 10 + steps: + - name: Heartbeat + shell: pwsh + run: | + Write-Host "=== Runner Heartbeat: ${{ matrix.name }} ===" + Write-Host "Timestamp: $(Get-Date -Format o)" + Write-Host "Hostname: $env:COMPUTERNAME" + Write-Host "=== GPU Status ===" + & "$env:HIP_PATH\bin\rocm-smi.exe" 2>$null + Write-Host "=== Disk Space ===" + Get-PSDrive -PSProvider FileSystem | Format-Table Name, Used, Free -AutoSize + Write-Host "=== Memory ===" + $os = Get-CimInstance Win32_OperatingSystem + Write-Host "Free: $([math]::Round($os.FreePhysicalMemory/1MB, 1)) GB / Total: $([math]::Round($os.TotalVisibleMemorySize/1MB, 1)) GB" diff --git a/ci/run.sh b/ci/run.sh index cbe28442e16..85ade166658 100644 --- a/ci/run.sh +++ b/ci/run.sh @@ -79,6 +79,13 @@ if [ ! -z ${GG_BUILD_ROCM} ]; then fi CMAKE_EXTRA="${CMAKE_EXTRA} -DAMDGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS}" + + # Set HIP environment if not already set + export HIP_PLATFORM=${HIP_PLATFORM:-amd} + export ROCM_PATH=${ROCM_PATH:-/opt/rocm} + export HIP_PATH=${HIP_PATH:-/opt/rocm} + export LD_LIBRARY_PATH=${ROCM_PATH}/lib:${LD_LIBRARY_PATH} + CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_PREFIX_PATH=${ROCM_PATH} -DCMAKE_HIP_COMPILER=${ROCM_PATH}/lib/llvm/bin/clang++" fi if [ ! -z ${GG_BUILD_SYCL} ]; then @@ -223,7 +230,7 @@ function gg_run_ctest { gg_check_build_requirements (time cmake -DCMAKE_BUILD_TYPE=${mode} ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log - (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + (time cmake --build . --config ${mode} -j $(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log From 91fa40d96e74120f49f31b8d787831653cae0ab6 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Wed, 4 Mar 2026 11:49:12 -0800 Subject: [PATCH 09/55] I have removed duplicated steps, stray echo, all dead code and utputs should_build outputs to be specific. I ahve removed outputs.rocm_version from both ci steps, extracted resolve_rocm to a shared script for both jobs to use them. Fixed the matrix, removed both ubuntu-rocm and windows-rocm FGGML_ROCM=1 flag which doesn't apply because it isn't a real flag. Also commented out heartbeat runners. --- .github/workflows/build.yml | 236 +++---------------------- .github/workflows/runner_heartbeat.yml | 56 +++--- ci/map-gpu-target.sh | 28 +++ ci/resolve-rocm-version.sh | 96 ++++++++++ ci/run.sh | 1 + 5 files changed, 181 insertions(+), 236 deletions(-) create mode 100755 ci/map-gpu-target.sh create mode 100755 ci/resolve-rocm-version.sh diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 347041a836d..1a8f5c9c80e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -127,17 +127,12 @@ jobs: outputs: windows_matrix: ${{ steps.set-matrix.outputs.windows_matrix }} ubuntu_matrix: ${{ steps.set-matrix.outputs.ubuntu_matrix }} - should_build_windows: ${{ steps.set-matrix.outputs.should_build_windows }} - should_build_ubuntu: ${{ steps.set-matrix.outputs.should_build_ubuntu }} steps: - name: Set matrix id: set-matrix run: | targets="${{ env.GFX_TARGETS }}" - operating_systems="windows,ubuntu" - echo "Input targets: $targets" - echo "Input operating systems: $operating_systems" target_array=$(echo "$targets" \ | tr ',' '\n' \ @@ -149,31 +144,11 @@ jobs: windows_matrix=$(echo "$target_array" \ | jq -c '{gfx_target: ., sdl2: ["ON"], build: ["Release"], arch: ["x64"], s2arc: ["x64"], s2ver: ["2.28.5"]}') - # 3. Create Ubuntu Matrix ubuntu_matrix=$(echo "$target_array" \ | jq -c '{gfx_target: ., sdl2: ["ON"], build: ["Release"], arch: ["linux/amd64"]}') - # Check which operating systems to build - should_build_windows="false" - should_build_ubuntu="false" - - if [[ "$operating_systems" == *"windows"* ]]; then - should_build_windows="true" - echo "windows_matrix=$windows_matrix" >> $GITHUB_OUTPUT - fi - - if [[ "$operating_systems" == *"ubuntu"* ]]; then - should_build_ubuntu="true" - echo "ubuntu_matrix=$ubuntu_matrix" >> $GITHUB_OUTPUT - fi - - echo "should_build_windows=$should_build_windows" >> $GITHUB_OUTPUT - echo "should_build_ubuntu=$should_build_ubuntu" >> $GITHUB_OUTPUT - - echo "Windows build: $should_build_windows" - echo "Ubuntu build: $should_build_ubuntu" - echo "Generated matrix: $matrix_targets" - #linux/amd64 + echo "windows_matrix=$windows_matrix" >> $GITHUB_OUTPUT + echo "ubuntu_matrix=$ubuntu_matrix" >> $GITHUB_OUTPUT ubuntu-22: if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || @@ -469,17 +444,20 @@ jobs: github.event.inputs.run_type == 'full-ci' }} runs-on: ubuntu-22.04 needs: [determine-tag, prepare-matrix] - # Check if we should run (based on the prepare-matrix output) strategy: - # Uses the matrix generated in prepare-matrix (gfx_target, arch, build, sdl2) matrix: ${{fromJson(needs.prepare-matrix.outputs.ubuntu_matrix)}} fail-fast: false - outputs: - rocm_version: ${{ steps.set-outputs.outputs.rocm_version }} steps: - name: Free disk space - run: curl -fsSL https://raw.githubusercontent.com/kou/arrow/e49d8ae15583ceff03237571569099a6ad62be32/ci/scripts/util_free_space.sh | bash + run: | + echo "=== Disk usage before cleanup ===" + df -h / + sudo rm -rf /usr/local/lib/android /opt/ghc /usr/local/share/boost \ + /usr/share/dotnet /usr/local/.ghcup /opt/hostedtoolcache/CodeQL + sudo docker image prune --all --force 2>/dev/null || true + echo "=== Disk usage after cleanup ===" + df -h / - name: Checkout repository uses: actions/checkout@v4 @@ -491,86 +469,10 @@ jobs: - name: Download and extract ROCm directly to /opt/rocm run: | - # Determine ROCm version to use - rocm_version="${{ env.ROCM_VERSION }}" - current_target="${{ matrix.gfx_target }}" - - # Add appropriate suffixes for different GPU targets - s3_target="$current_target" - if [ "$current_target" = "gfx110X" ]; then - s3_target="${current_target}-dgpu" - echo "Using S3 target with -dgpu suffix: $s3_target" - elif [ "$current_target" = "gfx120X" ]; then - s3_target="${current_target}-all" - echo "Using S3 target with -all suffix: $s3_target" - fi - - if [ "$rocm_version" = "latest" ]; then - echo "Auto-detecting latest ROCm version for target: $current_target" - s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=therock-dist-linux-${s3_target}-7") - - # Extract all files - files=$(echo "$s3_response" | grep -oP '(?<=)[^<]*' | grep "therock-dist-linux-${s3_target}-") - - # Extract versions and sort them properly - latest_file="" - latest_major=0 - latest_minor=0 - latest_patch=0 - latest_rc=0 - latest_is_alpha=false - - while IFS= read -r file; do - if [[ "$file" =~ therock-dist-linux-${s3_target}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then - version="${BASH_REMATCH[1]}" - major=$(echo "$version" | cut -d. -f1) - minor=$(echo "$version" | cut -d. -f2) - patch=$(echo "$version" | cut -d. -f3 | sed 's/\(a\|rc\).*//') - rc=$(echo "$version" | sed 's/.*\(a\|rc\)//') - is_alpha=false - if [[ "$version" =~ a ]]; then is_alpha=true; fi - - is_newer=false - if [ "$major" -gt "$latest_major" ]; then is_newer=true; - elif [ "$major" -eq "$latest_major" ] && [ "$minor" -gt "$latest_minor" ]; then is_newer=true; - elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -gt "$latest_patch" ]; then is_newer=true; - elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -eq "$latest_patch" ]; then - if [ "$is_alpha" = true ] && [ "$latest_is_alpha" = false ]; then is_newer=true; - elif [ "$is_alpha" = "$latest_is_alpha" ] && [ "$rc" -gt "$latest_rc" ]; then is_newer=true; - fi - fi - - if [ "$is_newer" = true ]; then - latest_file="$file" - latest_major="$major" - latest_minor="$minor" - latest_patch="$patch" - latest_rc="$rc" - latest_is_alpha="$is_alpha" - fi - fi - done <<< "$files" - - echo "Found latest file: $latest_file" - - if [[ "$latest_file" =~ therock-dist-linux-${s3_target}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then - rocm_version="${BASH_REMATCH[1]}" - echo "Detected latest ROCm version: $rocm_version" - else - echo "Failed to extract ROCm version from latest file: $latest_file" - exit 1 - fi - - rocm_url="https://therock-nightly-tarball.s3.amazonaws.com/$latest_file" - else - rocm_url="https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-linux-${s3_target}-${rocm_version}.tar.gz" - fi - - echo "DETECTED_ROCM_VERSION=$rocm_version" >> $GITHUB_ENV - - # Create directory and stream extraction + source ci/resolve-rocm-version.sh linux "${{ matrix.gfx_target }}" "${{ env.ROCM_VERSION }}" + echo "DETECTED_ROCM_VERSION=$ROCM_RESOLVED_VERSION" >> $GITHUB_ENV sudo mkdir -p /opt/rocm - curl -sL "$rocm_url" | sudo tar --use-compress-program=gzip -xf - -C /opt/rocm --strip-components=1 + curl -sL "$ROCM_TARBALL_URL" | sudo tar --use-compress-program=gzip -xf - -C /opt/rocm --strip-components=1 - name: Set ROCm environment variables run: | @@ -595,23 +497,7 @@ jobs: - name: Configure CMake run: | - - # Map GPU targets - current_target="${{ matrix.gfx_target }}" - echo "Input target: $current_target" - - if [ "$current_target" = "gfx110X" ]; then - mapped_target="gfx1100;gfx1101;gfx1102" - elif [ "$current_target" = "gfx1151" ]; then - mapped_target="gfx1151" - elif [ "$current_target" = "gfx1150" ]; then - mapped_target="gfx1150" - elif [ "$current_target" = "gfx120X" ]; then - mapped_target="gfx1200;gfx1201" - else - mapped_target="$current_target" - fi - echo "Mapped target: $mapped_target" + source ci/map-gpu-target.sh "${{ matrix.gfx_target }}" cmake -S . -B build -G Ninja \ -DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \ @@ -619,10 +505,9 @@ jobs: -DCMAKE_HIP_FLAGS="--rocm-path=/opt/rocm --rocm-device-lib-path=${{ env.ROCM_BITCODE_PATH }}" \ -DCMAKE_PREFIX_PATH=/opt/rocm \ -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ - -DGPU_TARGETS="$mapped_target" \ + -DGPU_TARGETS="$MAPPED_GPU_TARGET" \ -DBUILD_SHARED_LIBS=ON \ -DGGML_HIP=ON \ - -DGGML_ROCM=1 \ -DWHISPER_SDL2=${{ matrix.sdl2 }} - name: Build @@ -1313,8 +1198,6 @@ jobs: strategy: matrix: ${{fromJson(needs.prepare-matrix.outputs.windows_matrix)}} fail-fast: false - outputs: - rocm_version: ${{ steps.set-outputs.outputs.rocm_version }} steps: - name: Clone @@ -1372,95 +1255,36 @@ jobs: } - name: Download ROCm nightly tarball + shell: bash run: | - # Determine ROCm version to use - $rocmVersion = "${{ env.ROCM_VERSION }}" - $currentTarget = "${{ matrix.gfx_target }}" - - $s3Target = $currentTarget - if ($currentTarget -eq "gfx110X") { - $s3Target = "$currentTarget-dgpu" - Write-Host "Using S3 target with -dgpu suffix: $s3Target" - } elseif ($currentTarget -eq "gfx120X") { - $s3Target = "$currentTarget-all" - Write-Host "Using S3 target with -all suffix: $s3Target" - } - - if ($rocmVersion -eq "latest") { - Write-Host "Auto-detecting latest ROCm version for target: $currentTarget" - $s3Response = (Invoke-WebRequest "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=therock-dist-windows-$s3Target-7").Content - $files = $s3Response -split '' | Where-Object {$_ -match ''} | ForEach-Object { ($_ -split '')[0] } - - $versionFiles = @() - foreach ($file in $files) { - if ($file -match "therock-dist-windows-$s3Target-.*?(\d+\.\d+\.\d+(?:a|rc)\d+)\.tar\.gz") { - $version = $matches[1] - $versionFiles += [PSCustomObject]@{ - File = $file - Version = $version - Major = [int]($version -split '\.')[0] - Minor = [int]($version -split '\.')[1] - Patch = [int](($version -split '\.')[2] -replace '(?:a|rc).*', '') - RC = [int]($version -replace '.*(?:a|rc)', '') - IsAlpha = $version -match 'a' - } - } - } - - $latestFile = ($versionFiles | Sort-Object Major, Minor, Patch, @{Expression={if($_.IsAlpha){1}else{0}}}, RC | Select-Object -Last 1).File - Write-Host "Found latest file: $latestFile" - - if ($latestFile -match "therock-dist-windows-$s3Target-.*?(\d+\.\d+\.\d+(?:a|rc)\d+)\.tar\.gz") { - $rocmVersion = $matches[1] - Write-Host "Detected latest ROCm version: $rocmVersion" - } else { - Write-Error "Failed to extract ROCm version from latest file: $latestFile" - exit 1 - } - $rocmUrl = "https://therock-nightly-tarball.s3.amazonaws.com/$latestFile" - } else { - $rocmUrl = "https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-windows-$s3Target-$rocmVersion.tar.gz" - } - - echo "DETECTED_ROCM_VERSION=$rocmVersion" >> $env:GITHUB_ENV - Invoke-WebRequest -Uri $rocmUrl -OutFile "rocm.tar.gz" + source ci/resolve-rocm-version.sh windows "${{ matrix.gfx_target }}" "${{ env.ROCM_VERSION }}" + echo "DETECTED_ROCM_VERSION=$ROCM_RESOLVED_VERSION" >> $GITHUB_ENV + curl -sL "$ROCM_TARBALL_URL" -o rocm.tar.gz - name: Extract ROCm to C:\opt\rocm run: | New-Item -ItemType Directory -Force -Path "C:\opt\rocm" tar -xzf rocm.tar.gz -C C:\opt\rocm --strip-components=1 + - name: Map GPU target + id: gpu-target + shell: bash + run: | + source ci/map-gpu-target.sh "${{ matrix.gfx_target }}" + echo "mapped=$MAPPED_GPU_TARGET" >> $GITHUB_OUTPUT + - name: Configure shell: pwsh run: | - $currentTarget = "${{ matrix.gfx_target }}" - Write-Host "Input target: $currentTarget" - - if ($currentTarget -eq "gfx110X") { - $mapped_target = "gfx1100;gfx1101;gfx1102" - } elseif ($currentTarget -eq "gfx1151") { - $mapped_target = "gfx1151" - } elseif ($currentTarget -eq "gfx1150") { - $mapped_target = "gfx1150" - } elseif ($currentTarget -eq "gfx120X") { - $mapped_target = "gfx1200;gfx1201" - } else { - $mapped_target = $currentTarget - } - Write-Host "Mapped target: $mapped_target" - - # Set up environment variables and PATH $env:HIP_PATH = "C:\opt\rocm" $env:HIP_PLATFORM = "amd" - # Ensure bin comes before llvm\bin for consistency $env:PATH = "$env:HIP_PATH\bin;$env:HIP_PATH\lib\llvm\bin;$env:PATH" - # Define CMake arguments $cmakeArgs = @( "-S", ".", "-B", "build", "-G", "Ninja Multi-Config", - "-DGPU_TARGETS=$mapped_target", + "-DGPU_TARGETS=${{ steps.gpu-target.outputs.mapped }}", "-DGGML_HIP=ON", "-DCMAKE_C_COMPILER=$env:HIP_PATH/lib/llvm/bin/amdclang.exe", "-DCMAKE_CXX_COMPILER=$env:HIP_PATH/lib/llvm/bin/amdclang++.exe", @@ -1469,12 +1293,10 @@ jobs: "-DCMAKE_CXX_FLAGS='-D__PRFCHWINTRIN_H'", "-DCMAKE_HIP_FLAGS=--rocm-path=C:/opt/rocm", "-DCMAKE_PREFIX_PATH=$env:HIP_PATH", - "-DGGML_ROCM=1", "-DCMAKE_BUILD_TYPE=${{ matrix.build }}", "-DBUILD_SHARED_LIBS=ON", "-DWHISPER_SDL2=${{ matrix.sdl2 }}" ) - # Run CMake cmake @cmakeArgs - name: Build @@ -1528,10 +1350,6 @@ jobs: } } - - name: Copy SDL2.dll - if: matrix.sdl2 == 'ON' - run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }} - - name: Copy SDL2.dll if: matrix.sdl2 == 'ON' run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" "build/bin/${{ matrix.build }}" diff --git a/.github/workflows/runner_heartbeat.yml b/.github/workflows/runner_heartbeat.yml index dff7d7f05a3..de133378750 100644 --- a/.github/workflows/runner_heartbeat.yml +++ b/.github/workflows/runner_heartbeat.yml @@ -11,8 +11,9 @@ jobs: fail-fast: false matrix: include: - - runner: [rai300_400, Linux] - name: rai300-400-linux + # Uncomment when self-hosted runners are registered: + # - runner: [rai300_400, Linux] + # name: rai300-400-linux - runner: [stx-halo, Linux] name: stx-halo-linux runs-on: ${{ matrix.runner }} @@ -32,28 +33,29 @@ jobs: echo "=== ROCm Version ===" cat /opt/rocm/.info/version 2>/dev/null || echo "ROCm version file not found" - check-rocm-windows: - strategy: - fail-fast: false - matrix: - include: - - runner: [rai300_400, Windows] - name: rai300-400-windows - - runner: [stx-halo, Windows] - name: stx-halo-windows - runs-on: ${{ matrix.runner }} - timeout-minutes: 10 - steps: - - name: Heartbeat - shell: pwsh - run: | - Write-Host "=== Runner Heartbeat: ${{ matrix.name }} ===" - Write-Host "Timestamp: $(Get-Date -Format o)" - Write-Host "Hostname: $env:COMPUTERNAME" - Write-Host "=== GPU Status ===" - & "$env:HIP_PATH\bin\rocm-smi.exe" 2>$null - Write-Host "=== Disk Space ===" - Get-PSDrive -PSProvider FileSystem | Format-Table Name, Used, Free -AutoSize - Write-Host "=== Memory ===" - $os = Get-CimInstance Win32_OperatingSystem - Write-Host "Free: $([math]::Round($os.FreePhysicalMemory/1MB, 1)) GB / Total: $([math]::Round($os.TotalVisibleMemorySize/1MB, 1)) GB" + # Uncomment when Windows self-hosted runners are registered: + # check-rocm-windows: + # strategy: + # fail-fast: false + # matrix: + # include: + # - runner: [rai300_400, Windows] + # name: rai300-400-windows + # - runner: [stx-halo, Windows] + # name: stx-halo-windows + # runs-on: ${{ matrix.runner }} + # timeout-minutes: 10 + # steps: + # - name: Heartbeat + # shell: pwsh + # run: | + # Write-Host "=== Runner Heartbeat: ${{ matrix.name }} ===" + # Write-Host "Timestamp: $(Get-Date -Format o)" + # Write-Host "Hostname: $env:COMPUTERNAME" + # Write-Host "=== GPU Status ===" + # & "$env:HIP_PATH\bin\rocm-smi.exe" 2>$null + # Write-Host "=== Disk Space ===" + # Get-PSDrive -PSProvider FileSystem | Format-Table Name, Used, Free -AutoSize + # Write-Host "=== Memory ===" + # $os = Get-CimInstance Win32_OperatingSystem + # Write-Host "Free: $([math]::Round($os.FreePhysicalMemory/1MB, 1)) GB / Total: $([math]::Round($os.TotalVisibleMemorySize/1MB, 1)) GB" diff --git a/ci/map-gpu-target.sh b/ci/map-gpu-target.sh new file mode 100755 index 00000000000..1e7de7c9fcf --- /dev/null +++ b/ci/map-gpu-target.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# +# Map a GFX target shorthand to specific GPU architectures for CMake. +# +# Usage: +# source ci/map-gpu-target.sh +# +# Arguments: +# gfx_target - GPU target (gfx1151, gfx1150, gfx110X, gfx120X, or specific) +# +# Outputs (exported): +# MAPPED_GPU_TARGET - Semicolon-separated list of GPU architectures + +gfx_target="$1" + +if [ -z "$gfx_target" ]; then + echo "Usage: source ci/map-gpu-target.sh " + return 1 2>/dev/null || exit 1 +fi + +case "$gfx_target" in + gfx110X) MAPPED_GPU_TARGET="gfx1100;gfx1101;gfx1102" ;; + gfx120X) MAPPED_GPU_TARGET="gfx1200;gfx1201" ;; + *) MAPPED_GPU_TARGET="$gfx_target" ;; +esac + +export MAPPED_GPU_TARGET +echo "Mapped GPU target: $gfx_target -> $MAPPED_GPU_TARGET" diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh new file mode 100755 index 00000000000..7d9e1c60fc9 --- /dev/null +++ b/ci/resolve-rocm-version.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# +# Resolve the latest ROCm nightly tarball URL for a given GPU target and platform. +# +# Usage: +# source ci/resolve-rocm-version.sh +# +# Arguments: +# platform - "linux" or "windows" +# gfx_target - GPU target (gfx1151, gfx1150, gfx110X, gfx120X) +# rocm_version - Specific version (e.g. 7.11.0a20251205) or "latest" +# +# Outputs (exported): +# ROCM_RESOLVED_VERSION - The resolved version string +# ROCM_TARBALL_URL - The full S3 URL to download + +platform="$1" +gfx_target="$2" +rocm_version="$3" + +if [ -z "$platform" ] || [ -z "$gfx_target" ] || [ -z "$rocm_version" ]; then + echo "Usage: source ci/resolve-rocm-version.sh " + return 1 2>/dev/null || exit 1 +fi + +# Map GPU target to S3 naming convention +s3_target="$gfx_target" +if [ "$gfx_target" = "gfx110X" ]; then + s3_target="${gfx_target}-dgpu" +elif [ "$gfx_target" = "gfx120X" ]; then + s3_target="${gfx_target}-all" +fi + +dist_prefix="therock-dist-${platform}-${s3_target}" + +if [ "$rocm_version" = "latest" ]; then + echo "Auto-detecting latest ROCm version for ${platform}/${gfx_target}..." + s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=${dist_prefix}-7") + + files=$(echo "$s3_response" | grep -oP '(?<=)[^<]*' | grep "${dist_prefix}-") + + latest_file="" + latest_major=0 + latest_minor=0 + latest_patch=0 + latest_rc=0 + latest_is_alpha=false + + while IFS= read -r file; do + if [[ "$file" =~ ${dist_prefix}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then + version="${BASH_REMATCH[1]}" + major=$(echo "$version" | cut -d. -f1) + minor=$(echo "$version" | cut -d. -f2) + patch=$(echo "$version" | cut -d. -f3 | sed 's/\(a\|rc\).*//') + rc=$(echo "$version" | sed 's/.*\(a\|rc\)//') + is_alpha=false + if [[ "$version" =~ a ]]; then is_alpha=true; fi + + is_newer=false + if [ "$major" -gt "$latest_major" ]; then is_newer=true; + elif [ "$major" -eq "$latest_major" ] && [ "$minor" -gt "$latest_minor" ]; then is_newer=true; + elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -gt "$latest_patch" ]; then is_newer=true; + elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -eq "$latest_patch" ]; then + if [ "$is_alpha" = true ] && [ "$latest_is_alpha" = false ]; then is_newer=true; + elif [ "$is_alpha" = "$latest_is_alpha" ] && [ "$rc" -gt "$latest_rc" ]; then is_newer=true; + fi + fi + + if [ "$is_newer" = true ]; then + latest_file="$file" + latest_major="$major" + latest_minor="$minor" + latest_patch="$patch" + latest_rc="$rc" + latest_is_alpha="$is_alpha" + fi + fi + done <<< "$files" + + echo "Found latest file: $latest_file" + + if [[ "$latest_file" =~ ${dist_prefix}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then + rocm_version="${BASH_REMATCH[1]}" + echo "Detected latest ROCm version: $rocm_version" + else + echo "Failed to extract ROCm version from latest file: $latest_file" + return 1 2>/dev/null || exit 1 + fi + + export ROCM_TARBALL_URL="https://therock-nightly-tarball.s3.amazonaws.com/$latest_file" +else + export ROCM_TARBALL_URL="https://therock-nightly-tarball.s3.amazonaws.com/${dist_prefix}-${rocm_version}.tar.gz" +fi + +export ROCM_RESOLVED_VERSION="$rocm_version" +echo "ROCm URL: $ROCM_TARBALL_URL" diff --git a/ci/run.sh b/ci/run.sh index 85ade166658..a65928a1431 100644 --- a/ci/run.sh +++ b/ci/run.sh @@ -230,6 +230,7 @@ function gg_run_ctest { gg_check_build_requirements (time cmake -DCMAKE_BUILD_TYPE=${mode} ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + # Use cmake --build instead of make for cross-platform compatibility (Windows ROCm CI) (time cmake --build . --config ${mode} -j $(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log From 265c868c6689d3404fbc076fdb5d2ac244d6f915 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Wed, 4 Mar 2026 13:49:46 -0800 Subject: [PATCH 10/55] Replaced grep -oP '(?<=)[^<]*' with portable sed -n 's/.*\([^<]*\)<\/Key>.*/\1/gp'. This works on both Linux and Windows Git Bash. --- ci/resolve-rocm-version.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index 7d9e1c60fc9..6a49e401671 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -37,7 +37,7 @@ if [ "$rocm_version" = "latest" ]; then echo "Auto-detecting latest ROCm version for ${platform}/${gfx_target}..." s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=${dist_prefix}-7") - files=$(echo "$s3_response" | grep -oP '(?<=)[^<]*' | grep "${dist_prefix}-") + files=$(echo "$s3_response" | sed -n 's/.*\([^<]*\)<\/Key>.*/\1/gp' | grep "${dist_prefix}-") latest_file="" latest_major=0 From 2c853b7aabf0dbfe307b916280f496b552e05bcf Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Wed, 4 Mar 2026 15:27:42 -0800 Subject: [PATCH 11/55] Reverted the change using cmake instead of make adding mode. --- .github/workflows/build.yml | 9 ++++++--- ci/resolve-rocm-version.sh | 2 +- ci/run.sh | 3 +-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1a8f5c9c80e..c5c9290ddce 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -413,12 +413,15 @@ jobs: strategy: fail-fast: false matrix: - arch: [linux/amd64] - build: [Release] + build: [Debug, Release] + #arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le] + # TODO: arm/v7 disabled due to clang bug + # https://github.com/ggerganov/whisper.cpp/actions/runs/9657764109/job/26637633042?pr=2256#step:4:1990 + arch: [linux/amd64, linux/arm64, linux/ppc64le] steps: - name: Clone - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up QEMU uses: docker/setup-qemu-action@v3 diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index 6a49e401671..eb2bcc58727 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -37,7 +37,7 @@ if [ "$rocm_version" = "latest" ]; then echo "Auto-detecting latest ROCm version for ${platform}/${gfx_target}..." s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=${dist_prefix}-7") - files=$(echo "$s3_response" | sed -n 's/.*\([^<]*\)<\/Key>.*/\1/gp' | grep "${dist_prefix}-") + files=$(echo "$s3_response" | sed 's//\n/g' | sed -n 's/\([^<]*\)<\/Key>.*/\1/p' | grep "${dist_prefix}-") latest_file="" latest_major=0 diff --git a/ci/run.sh b/ci/run.sh index a65928a1431..9f6d73d9c04 100644 --- a/ci/run.sh +++ b/ci/run.sh @@ -230,8 +230,7 @@ function gg_run_ctest { gg_check_build_requirements (time cmake -DCMAKE_BUILD_TYPE=${mode} ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log - # Use cmake --build instead of make for cross-platform compatibility (Windows ROCm CI) - (time cmake --build . --config ${mode} -j $(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + (time make -j $(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log From 2fd0fb78dee8e36c59ec1944c33cd51d9e99a5fe Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Mon, 9 Mar 2026 13:43:18 -0700 Subject: [PATCH 12/55] make compatible with git bash --- ci/resolve-rocm-version.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index eb2bcc58727..e0fa5eba79e 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -24,10 +24,9 @@ if [ -z "$platform" ] || [ -z "$gfx_target" ] || [ -z "$rocm_version" ]; then fi # Map GPU target to S3 naming convention +# Group targets (gfx110X, gfx120X) use "-all" suffix; individual targets have no suffix s3_target="$gfx_target" -if [ "$gfx_target" = "gfx110X" ]; then - s3_target="${gfx_target}-dgpu" -elif [ "$gfx_target" = "gfx120X" ]; then +if [ "$gfx_target" = "gfx110X" ] || [ "$gfx_target" = "gfx120X" ]; then s3_target="${gfx_target}-all" fi @@ -37,7 +36,7 @@ if [ "$rocm_version" = "latest" ]; then echo "Auto-detecting latest ROCm version for ${platform}/${gfx_target}..." s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=${dist_prefix}-7") - files=$(echo "$s3_response" | sed 's//\n/g' | sed -n 's/\([^<]*\)<\/Key>.*/\1/p' | grep "${dist_prefix}-") + files=$(echo "$s3_response" | tr '<' '\n' | sed -n 's/^Key>\([^<]*\)/\1/p' | grep "${dist_prefix}-") latest_file="" latest_major=0 From 4d25bcfc083025a4cb6045e2f9a6f8ffb27f55cf Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Mon, 9 Mar 2026 14:53:01 -0700 Subject: [PATCH 13/55] changing the approach to use awk instead, since prior attempt with tr, sed and grep didn't work. --- ci/resolve-rocm-version.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index e0fa5eba79e..6ed39691426 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -36,7 +36,11 @@ if [ "$rocm_version" = "latest" ]; then echo "Auto-detecting latest ROCm version for ${platform}/${gfx_target}..." s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=${dist_prefix}-7") - files=$(echo "$s3_response" | tr '<' '\n' | sed -n 's/^Key>\([^<]*\)/\1/p' | grep "${dist_prefix}-") + # Use awk for XML parsing - portable across Linux and Windows Git Bash + files=$(echo "$s3_response" | awk -v prefix="${dist_prefix}-" ' + BEGIN { RS=""; FS="" } + NR>1 && $1 ~ prefix { print $1 } + ') latest_file="" latest_major=0 From f1287e3807b44f719c3d331d53ecf4330a915a7d Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Mon, 9 Mar 2026 15:57:22 -0700 Subject: [PATCH 14/55] attempting grep now. --- ci/resolve-rocm-version.sh | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index 6ed39691426..b715c483993 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -36,11 +36,9 @@ if [ "$rocm_version" = "latest" ]; then echo "Auto-detecting latest ROCm version for ${platform}/${gfx_target}..." s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=${dist_prefix}-7") - # Use awk for XML parsing - portable across Linux and Windows Git Bash - files=$(echo "$s3_response" | awk -v prefix="${dist_prefix}-" ' - BEGIN { RS=""; FS="" } - NR>1 && $1 ~ prefix { print $1 } - ') + # Use grep -o (basic regex, no PCRE) + sed for XML parsing + # Works on full Git Bash AND MinGit/BusyBox variants + files=$(echo "$s3_response" | tr -d '\r' | grep -o '[^<]*' | sed 's///;s/<\/Key>//' | grep "^${dist_prefix}-") latest_file="" latest_major=0 From cd3b5fcee95a8536aef628a9bb6334e1f69cec54 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Mon, 9 Mar 2026 16:27:02 -0700 Subject: [PATCH 15/55] adding powershell job for winodws instead of shell because shells grep on windows doesnt work very well. --- .github/workflows/build.yml | 64 ++++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c5c9290ddce..d0a9625ebd8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1257,12 +1257,66 @@ jobs: exit 1 } - - name: Download ROCm nightly tarball - shell: bash + - name: Resolve ROCm version and download tarball + shell: pwsh run: | - source ci/resolve-rocm-version.sh windows "${{ matrix.gfx_target }}" "${{ env.ROCM_VERSION }}" - echo "DETECTED_ROCM_VERSION=$ROCM_RESOLVED_VERSION" >> $GITHUB_ENV - curl -sL "$ROCM_TARBALL_URL" -o rocm.tar.gz + $gfxTarget = "${{ matrix.gfx_target }}" + $rocmVersion = "${{ env.ROCM_VERSION }}" + + $s3Target = $gfxTarget + if ($gfxTarget -eq "gfx110X" -or $gfxTarget -eq "gfx120X") { + $s3Target = "$gfxTarget-all" + } + $distPrefix = "therock-dist-windows-$s3Target" + + if ($rocmVersion -eq "latest") { + Write-Host "Auto-detecting latest ROCm version for windows/$gfxTarget..." + $response = Invoke-RestMethod -Uri "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=$distPrefix-7" + $keys = $response.ListBucketResult.Contents.Key | Where-Object { $_ -match "^$([regex]::Escape($distPrefix))-" } + + $latestFile = $null + $latestMajor = 0; $latestMinor = 0; $latestPatch = 0 + $latestBuild = 0; $latestIsAlpha = $false + + foreach ($key in $keys) { + if ($key -match "$([regex]::Escape($distPrefix))-.*?(\d+)\.(\d+)\.(\d+)(a|rc)(\d+)\.tar\.gz$") { + $major = [int]$Matches[1]; $minor = [int]$Matches[2]; $patch = [int]$Matches[3] + $preType = $Matches[4]; $build = [int]$Matches[5] + $isAlpha = ($preType -eq "a") + + $isNewer = $false + if ($major -gt $latestMajor) { $isNewer = $true } + elseif ($major -eq $latestMajor -and $minor -gt $latestMinor) { $isNewer = $true } + elseif ($major -eq $latestMajor -and $minor -eq $latestMinor -and $patch -gt $latestPatch) { $isNewer = $true } + elseif ($major -eq $latestMajor -and $minor -eq $latestMinor -and $patch -eq $latestPatch) { + if ($isAlpha -and -not $latestIsAlpha) { $isNewer = $true } + elseif ($isAlpha -eq $latestIsAlpha -and $build -gt $latestBuild) { $isNewer = $true } + } + + if ($isNewer) { + $latestFile = $key + $latestMajor = $major; $latestMinor = $minor; $latestPatch = $patch + $latestBuild = $build; $latestIsAlpha = $isAlpha + } + } + } + + Write-Host "Found latest file: $latestFile" + if ($latestFile -match "(\d+\.\d+\.\d+(?:a|rc)\d+)\.tar\.gz$") { + $rocmVersion = $Matches[1] + Write-Host "Detected latest ROCm version: $rocmVersion" + } else { + Write-Error "Failed to extract ROCm version from: $latestFile" + exit 1 + } + $tarballUrl = "https://therock-nightly-tarball.s3.amazonaws.com/$latestFile" + } else { + $tarballUrl = "https://therock-nightly-tarball.s3.amazonaws.com/$distPrefix-$rocmVersion.tar.gz" + } + + Write-Host "ROCm URL: $tarballUrl" + echo "DETECTED_ROCM_VERSION=$rocmVersion" >> $env:GITHUB_ENV + Invoke-WebRequest -Uri $tarballUrl -OutFile rocm.tar.gz - name: Extract ROCm to C:\opt\rocm run: | From 73de19c4de130e52c8f70675a9807c79289c36f9 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Thu, 16 Apr 2026 15:34:32 -0700 Subject: [PATCH 16/55] fix: address PR review comments from ramkrishna2910 - Fix alpha/RC version ordering bug in resolve-rocm-version.sh and build.yml (alpha was incorrectly treated as newer than RC) - Fix NULL check bug on ndim validation in ruby_whisper_context.c (ndim check was incorrectly guarded by format != NULL) - Add ${{ }} wrapper on if: conditionals at lines 615 and 1422 in build.yml --- .github/workflows/build.yml | 6 +++--- bindings/ruby/ext/ruby_whisper_context.c | 2 +- ci/resolve-rocm-version.sh | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d0a9625ebd8..94cbde537c1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -612,7 +612,7 @@ jobs: zip -r ../../whisper-bin-${{ matrix.gfx_target }}-${SAFE_ARCH}.zip . - name: Upload binaries - if: matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release + if: ${{ matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release }} uses: actions/upload-artifact@v4 with: name: whisper-bin-${{ matrix.gfx_target }}-${{ env.SAFE_ARCH }} @@ -1289,7 +1289,7 @@ jobs: elseif ($major -eq $latestMajor -and $minor -gt $latestMinor) { $isNewer = $true } elseif ($major -eq $latestMajor -and $minor -eq $latestMinor -and $patch -gt $latestPatch) { $isNewer = $true } elseif ($major -eq $latestMajor -and $minor -eq $latestMinor -and $patch -eq $latestPatch) { - if ($isAlpha -and -not $latestIsAlpha) { $isNewer = $true } + if (-not $isAlpha -and $latestIsAlpha) { $isNewer = $true } elseif ($isAlpha -eq $latestIsAlpha -and $build -gt $latestBuild) { $isNewer = $true } } @@ -1419,7 +1419,7 @@ jobs: Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath $zipName - name: Upload binaries - if: matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release + if: ${{ matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release }} uses: actions/upload-artifact@v4 with: # Unique artifact name per matrix job diff --git a/bindings/ruby/ext/ruby_whisper_context.c b/bindings/ruby/ext/ruby_whisper_context.c index c39d43bd76c..df48b8fc1d6 100644 --- a/bindings/ruby/ext/ruby_whisper_context.c +++ b/bindings/ruby/ext/ruby_whisper_context.c @@ -308,7 +308,7 @@ check_memory_view(rb_memory_view_t *memview) rb_warn("currently only format \"f\" is supported for MemoryView, but given: %s", memview->format); return false; } - if (memview->format != NULL && memview->ndim != 1) { + if (memview->ndim != 1) { rb_warn("currently only 1 dimensional MemoryView is supported, but given: %zd", memview->ndim); return false; } diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index b715c483993..997b693d5a9 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -62,7 +62,7 @@ if [ "$rocm_version" = "latest" ]; then elif [ "$major" -eq "$latest_major" ] && [ "$minor" -gt "$latest_minor" ]; then is_newer=true; elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -gt "$latest_patch" ]; then is_newer=true; elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -eq "$latest_patch" ]; then - if [ "$is_alpha" = true ] && [ "$latest_is_alpha" = false ]; then is_newer=true; + if [ "$is_alpha" = false ] && [ "$latest_is_alpha" = true ]; then is_newer=true; elif [ "$is_alpha" = "$latest_is_alpha" ] && [ "$rc" -gt "$latest_rc" ]; then is_newer=true; fi fi From 93276f0e02c6e06dd12a5091a44365fe20bd4776 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Thu, 16 Apr 2026 16:13:44 -0700 Subject: [PATCH 17/55] refactor: consolidate ROCm version resolution to shared bash script Replace duplicated ~55 lines of PowerShell version resolution logic in windows-rocm job with a call to ci/resolve-rocm-version.sh via Git Bash. This eliminates code duplication and ensures both Linux and Windows use the same version resolution logic. --- .github/workflows/build.yml | 62 +++---------------------------------- 1 file changed, 4 insertions(+), 58 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 94cbde537c1..3dd9fcb1c06 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1258,65 +1258,11 @@ jobs: } - name: Resolve ROCm version and download tarball - shell: pwsh + shell: bash run: | - $gfxTarget = "${{ matrix.gfx_target }}" - $rocmVersion = "${{ env.ROCM_VERSION }}" - - $s3Target = $gfxTarget - if ($gfxTarget -eq "gfx110X" -or $gfxTarget -eq "gfx120X") { - $s3Target = "$gfxTarget-all" - } - $distPrefix = "therock-dist-windows-$s3Target" - - if ($rocmVersion -eq "latest") { - Write-Host "Auto-detecting latest ROCm version for windows/$gfxTarget..." - $response = Invoke-RestMethod -Uri "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=$distPrefix-7" - $keys = $response.ListBucketResult.Contents.Key | Where-Object { $_ -match "^$([regex]::Escape($distPrefix))-" } - - $latestFile = $null - $latestMajor = 0; $latestMinor = 0; $latestPatch = 0 - $latestBuild = 0; $latestIsAlpha = $false - - foreach ($key in $keys) { - if ($key -match "$([regex]::Escape($distPrefix))-.*?(\d+)\.(\d+)\.(\d+)(a|rc)(\d+)\.tar\.gz$") { - $major = [int]$Matches[1]; $minor = [int]$Matches[2]; $patch = [int]$Matches[3] - $preType = $Matches[4]; $build = [int]$Matches[5] - $isAlpha = ($preType -eq "a") - - $isNewer = $false - if ($major -gt $latestMajor) { $isNewer = $true } - elseif ($major -eq $latestMajor -and $minor -gt $latestMinor) { $isNewer = $true } - elseif ($major -eq $latestMajor -and $minor -eq $latestMinor -and $patch -gt $latestPatch) { $isNewer = $true } - elseif ($major -eq $latestMajor -and $minor -eq $latestMinor -and $patch -eq $latestPatch) { - if (-not $isAlpha -and $latestIsAlpha) { $isNewer = $true } - elseif ($isAlpha -eq $latestIsAlpha -and $build -gt $latestBuild) { $isNewer = $true } - } - - if ($isNewer) { - $latestFile = $key - $latestMajor = $major; $latestMinor = $minor; $latestPatch = $patch - $latestBuild = $build; $latestIsAlpha = $isAlpha - } - } - } - - Write-Host "Found latest file: $latestFile" - if ($latestFile -match "(\d+\.\d+\.\d+(?:a|rc)\d+)\.tar\.gz$") { - $rocmVersion = $Matches[1] - Write-Host "Detected latest ROCm version: $rocmVersion" - } else { - Write-Error "Failed to extract ROCm version from: $latestFile" - exit 1 - } - $tarballUrl = "https://therock-nightly-tarball.s3.amazonaws.com/$latestFile" - } else { - $tarballUrl = "https://therock-nightly-tarball.s3.amazonaws.com/$distPrefix-$rocmVersion.tar.gz" - } - - Write-Host "ROCm URL: $tarballUrl" - echo "DETECTED_ROCM_VERSION=$rocmVersion" >> $env:GITHUB_ENV - Invoke-WebRequest -Uri $tarballUrl -OutFile rocm.tar.gz + source ci/resolve-rocm-version.sh windows "${{ matrix.gfx_target }}" "${{ env.ROCM_VERSION }}" + echo "DETECTED_ROCM_VERSION=$ROCM_RESOLVED_VERSION" >> $GITHUB_ENV + curl -sL "$ROCM_TARBALL_URL" -o rocm.tar.gz - name: Extract ROCm to C:\opt\rocm run: | From 9a4b11062074b2250a4d6c441bee84eb2dc90a6a Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Fri, 17 Apr 2026 07:12:09 -0700 Subject: [PATCH 18/55] fix(ci): fix Windows ROCm latest auto-detection regex compatibility Replace PCRE non-greedy .*? with ERE-compatible [^0-9]* in Bash regex patterns. Bash [[ =~ ]] uses POSIX ERE which does not support .*? non-greedy quantifier. On Windows Git Bash this fails strictly, leaving latest_file empty and causing 'Failed to extract ROCm version' error. Also adds: - File count validation with S3 response debug output - Empty latest_file check showing candidate files - Empty file line skip to prevent false regex matches --- ci/resolve-rocm-version.sh | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index 997b693d5a9..0244c1a77cb 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -40,6 +40,15 @@ if [ "$rocm_version" = "latest" ]; then # Works on full Git Bash AND MinGit/BusyBox variants files=$(echo "$s3_response" | tr -d '\r' | grep -o '[^<]*' | sed 's///;s/<\/Key>//' | grep "^${dist_prefix}-") + # Validate that we found any files at all + file_count=$(echo "$files" | grep -c '.' 2>/dev/null || echo "0") + if [ "$file_count" -eq 0 ]; then + echo "ERROR: No ROCm tarball files found for prefix '${dist_prefix}-'" + echo "S3 response (first 500 chars): $(echo "$s3_response" | head -c 500)" + return 1 2>/dev/null || exit 1 + fi + echo "Found $file_count candidate files from S3" + latest_file="" latest_major=0 latest_minor=0 @@ -47,8 +56,17 @@ if [ "$rocm_version" = "latest" ]; then latest_rc=0 latest_is_alpha=false + # ERE-compatible regex pattern for version extraction. + # IMPORTANT: Bash [[ =~ ]] uses POSIX ERE, NOT PCRE. + # The PCRE non-greedy quantifier .*? is NOT supported in ERE. + # On Windows Git Bash, .*? is interpreted literally and fails to match. + # Fix: Use [^0-9]* (match non-digit chars) instead of .*? - this is ERE-compatible + # and works correctly since each filename contains exactly one version number. + version_regex="^${dist_prefix}-[^0-9]*([0-9]+\\.[0-9]+\\.[0-9]+(a|rc)[0-9]+)\\.tar\\.gz$" + while IFS= read -r file; do - if [[ "$file" =~ ${dist_prefix}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then + [ -z "$file" ] && continue + if [[ "$file" =~ $version_regex ]]; then version="${BASH_REMATCH[1]}" major=$(echo "$version" | cut -d. -f1) minor=$(echo "$version" | cut -d. -f2) @@ -80,7 +98,15 @@ if [ "$rocm_version" = "latest" ]; then echo "Found latest file: $latest_file" - if [[ "$latest_file" =~ ${dist_prefix}-.*?([0-9]+\.[0-9]+\.[0-9]+(a|rc)[0-9]+)\.tar\.gz ]]; then + if [ -z "$latest_file" ]; then + echo "ERROR: No valid ROCm tarball files matched the version pattern" + echo "Showing first 5 candidate files:" + echo "$files" | head -5 + return 1 2>/dev/null || exit 1 + fi + + # Extract version from the resolved file using the same ERE-compatible pattern + if [[ "$latest_file" =~ $version_regex ]]; then rocm_version="${BASH_REMATCH[1]}" echo "Detected latest ROCm version: $rocm_version" else From fbe51d99766ecbeda972ed9588a9ca17559cef69 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Fri, 17 Apr 2026 07:46:43 -0700 Subject: [PATCH 19/55] Fix echo command for ROCm URL output --- ci/resolve-rocm-version.sh | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index 0244c1a77cb..c9974724c94 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -57,11 +57,6 @@ if [ "$rocm_version" = "latest" ]; then latest_is_alpha=false # ERE-compatible regex pattern for version extraction. - # IMPORTANT: Bash [[ =~ ]] uses POSIX ERE, NOT PCRE. - # The PCRE non-greedy quantifier .*? is NOT supported in ERE. - # On Windows Git Bash, .*? is interpreted literally and fails to match. - # Fix: Use [^0-9]* (match non-digit chars) instead of .*? - this is ERE-compatible - # and works correctly since each filename contains exactly one version number. version_regex="^${dist_prefix}-[^0-9]*([0-9]+\\.[0-9]+\\.[0-9]+(a|rc)[0-9]+)\\.tar\\.gz$" while IFS= read -r file; do From e07190559111a45305d940ca11230d2c93d24ca9 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Fri, 17 Apr 2026 08:10:07 -0700 Subject: [PATCH 20/55] update version_regex to be windows and linux compatbile. --- ci/resolve-rocm-version.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index c9974724c94..ee985dd47d7 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -57,7 +57,7 @@ if [ "$rocm_version" = "latest" ]; then latest_is_alpha=false # ERE-compatible regex pattern for version extraction. - version_regex="^${dist_prefix}-[^0-9]*([0-9]+\\.[0-9]+\\.[0-9]+(a|rc)[0-9]+)\\.tar\\.gz$" + version_regex="^${dist_prefix}-([0-9]+[.][0-9]+[.][0-9]+(a|rc)[0-9]+)[.]tar[.]gz$" while IFS= read -r file; do [ -z "$file" ] && continue From 62589cb3ea2e12b76b4e305b8b0b2a8ed011858f Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Fri, 17 Apr 2026 08:23:55 -0700 Subject: [PATCH 21/55] refactor(ci): replace Amazon S3 nightly scanning with AMD official repo - Rewrite resolve-rocm-version.sh to use AMD's official tarball repo (repo.amd.com/rocm/tarball/) instead of scanning Amazon S3 - Remove 'latest' auto-detection logic which failed on Windows Git Bash due to PCRE vs ERE regex incompatibility - Add version format validation and clear error messages - Update build.yml workflow_dispatch to use concrete ROCm versions (7.12.0, 7.2.1) with choice options instead of 'latest' --- .github/workflows/build.yml | 10 ++- ci/resolve-rocm-version.sh | 121 +++++++++--------------------------- 2 files changed, 38 insertions(+), 93 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3dd9fcb1c06..16be67d5f25 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -50,9 +50,13 @@ on: required: false default: 'gfx1151,gfx1150,gfx120X,gfx110X' rocm_version: - description: 'ROCm version to use (e.g., 7.11.0a20251205) or "latest" to auto-detect' + description: 'ROCm version to use (e.g., 7.12.0, 7.2.1)' required: false - default: 'latest' + default: '7.12.0' + type: choice + options: + - 7.12.0 + - 7.2.1 concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} @@ -66,7 +70,7 @@ env: ubuntu_image: "ubuntu:22.04" VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite" GFX_TARGETS: ${{ github.event.inputs.gfx_target || 'gfx1151,gfx1150,gfx120X,gfx110X' }} - ROCM_VERSION: ${{ github.event.inputs.rocm_version || 'latest' }} + ROCM_VERSION: ${{ github.event.inputs.rocm_version || '7.12.0' }} jobs: determine-tag: diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index ee985dd47d7..fbfe5e68e1d 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -1,18 +1,21 @@ #!/bin/bash # -# Resolve the latest ROCm nightly tarball URL for a given GPU target and platform. +# Resolve the ROCm tarball URL for a given platform and version. +# +# Uses AMD's official repo tarball distribution: +# https://repo.amd.com/rocm/tarball/therock-dist-{platform}-{gfx_target}-{version}.tar.gz # # Usage: # source ci/resolve-rocm-version.sh # # Arguments: # platform - "linux" or "windows" -# gfx_target - GPU target (gfx1151, gfx1150, gfx110X, gfx120X) -# rocm_version - Specific version (e.g. 7.11.0a20251205) or "latest" +# gfx_target - GPU target (defaults to gfx1151 if not specified or is a group target) +# rocm_version - Specific version (e.g. 7.12.0, 7.2.1) - required, no "latest" auto-detection # # Outputs (exported): # ROCM_RESOLVED_VERSION - The resolved version string -# ROCM_TARBALL_URL - The full S3 URL to download +# ROCM_TARBALL_URL - The full URL to download platform="$1" gfx_target="$2" @@ -23,96 +26,34 @@ if [ -z "$platform" ] || [ -z "$gfx_target" ] || [ -z "$rocm_version" ]; then return 1 2>/dev/null || exit 1 fi -# Map GPU target to S3 naming convention -# Group targets (gfx110X, gfx120X) use "-all" suffix; individual targets have no suffix -s3_target="$gfx_target" -if [ "$gfx_target" = "gfx110X" ] || [ "$gfx_target" = "gfx120X" ]; then - s3_target="${gfx_target}-all" -fi - -dist_prefix="therock-dist-${platform}-${s3_target}" - +# Validate that a specific version was provided (no "latest" auto-detection) if [ "$rocm_version" = "latest" ]; then - echo "Auto-detecting latest ROCm version for ${platform}/${gfx_target}..." - s3_response=$(curl -s "https://therock-nightly-tarball.s3.amazonaws.com/?prefix=${dist_prefix}-7") - - # Use grep -o (basic regex, no PCRE) + sed for XML parsing - # Works on full Git Bash AND MinGit/BusyBox variants - files=$(echo "$s3_response" | tr -d '\r' | grep -o '[^<]*' | sed 's///;s/<\/Key>//' | grep "^${dist_prefix}-") - - # Validate that we found any files at all - file_count=$(echo "$files" | grep -c '.' 2>/dev/null || echo "0") - if [ "$file_count" -eq 0 ]; then - echo "ERROR: No ROCm tarball files found for prefix '${dist_prefix}-'" - echo "S3 response (first 500 chars): $(echo "$s3_response" | head -c 500)" - return 1 2>/dev/null || exit 1 - fi - echo "Found $file_count candidate files from S3" - - latest_file="" - latest_major=0 - latest_minor=0 - latest_patch=0 - latest_rc=0 - latest_is_alpha=false - - # ERE-compatible regex pattern for version extraction. - version_regex="^${dist_prefix}-([0-9]+[.][0-9]+[.][0-9]+(a|rc)[0-9]+)[.]tar[.]gz$" - - while IFS= read -r file; do - [ -z "$file" ] && continue - if [[ "$file" =~ $version_regex ]]; then - version="${BASH_REMATCH[1]}" - major=$(echo "$version" | cut -d. -f1) - minor=$(echo "$version" | cut -d. -f2) - patch=$(echo "$version" | cut -d. -f3 | sed 's/\(a\|rc\).*//') - rc=$(echo "$version" | sed 's/.*\(a\|rc\)//') - is_alpha=false - if [[ "$version" =~ a ]]; then is_alpha=true; fi - - is_newer=false - if [ "$major" -gt "$latest_major" ]; then is_newer=true; - elif [ "$major" -eq "$latest_major" ] && [ "$minor" -gt "$latest_minor" ]; then is_newer=true; - elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -gt "$latest_patch" ]; then is_newer=true; - elif [ "$major" -eq "$latest_major" ] && [ "$minor" -eq "$latest_minor" ] && [ "$patch" -eq "$latest_patch" ]; then - if [ "$is_alpha" = false ] && [ "$latest_is_alpha" = true ]; then is_newer=true; - elif [ "$is_alpha" = "$latest_is_alpha" ] && [ "$rc" -gt "$latest_rc" ]; then is_newer=true; - fi - fi - - if [ "$is_newer" = true ]; then - latest_file="$file" - latest_major="$major" - latest_minor="$minor" - latest_patch="$patch" - latest_rc="$rc" - latest_is_alpha="$is_alpha" - fi - fi - done <<< "$files" - - echo "Found latest file: $latest_file" - - if [ -z "$latest_file" ]; then - echo "ERROR: No valid ROCm tarball files matched the version pattern" - echo "Showing first 5 candidate files:" - echo "$files" | head -5 - return 1 2>/dev/null || exit 1 - fi + echo "ERROR: 'latest' auto-detection is not supported." + echo "Please specify a concrete ROCm version (e.g., 7.12.0, 7.2.1)." + echo "Available versions: https://repo.amd.com/rocm/tarball/" + return 1 2>/dev/null || exit 1 +fi - # Extract version from the resolved file using the same ERE-compatible pattern - if [[ "$latest_file" =~ $version_regex ]]; then - rocm_version="${BASH_REMATCH[1]}" - echo "Detected latest ROCm version: $rocm_version" - else - echo "Failed to extract ROCm version from latest file: $latest_file" - return 1 2>/dev/null || exit 1 - fi +# Validate version format (should be X.Y.Z or X.Y.ZaNNNNNNNN pattern) +if ! echo "$rocm_version" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then + echo "ERROR: Invalid ROCm version format: '$rocm_version'" + echo "Expected format: X.Y.Z (e.g., 7.12.0) or X.Y.ZaNNNNNNNN (e.g., 7.11.0a20251205)" + return 1 2>/dev/null || exit 1 +fi - export ROCM_TARBALL_URL="https://therock-nightly-tarball.s3.amazonaws.com/$latest_file" -else - export ROCM_TARBALL_URL="https://therock-nightly-tarball.s3.amazonaws.com/${dist_prefix}-${rocm_version}.tar.gz" +# For the AMD tarball distribution, use gfx1151 as the base target +# The tarball contains ROCm tools/libraries for all supported GPUs +# GPU targets are specified during build via GPU_TARGETS CMake variable +# Group targets (gfx110X, gfx120X) should use gfx1151 as the base +base_target="gfx1151" +if [ "$gfx_target" != "gfx110X" ] && [ "$gfx_target" != "gfx120X" ] && [ "$gfx_target" != "gfx1150" ] && [ "$gfx_target" != "gfx1100" ]; then + # Use the specific target if it's an individual target + base_target="$gfx_target" fi +# Construct the AMD official repo URL +ROCM_TARBALL_URL="https://repo.amd.com/rocm/tarball/therock-dist-${platform}-${base_target}-${rocm_version}.tar.gz" + export ROCM_RESOLVED_VERSION="$rocm_version" +echo "ROCm version: $ROCM_RESOLVED_VERSION" echo "ROCm URL: $ROCM_TARBALL_URL" From 8ce7027abb69ab637bafce0defdde5db6def75c8 Mon Sep 17 00:00:00 2001 From: Geramy Loveless Date: Fri, 17 Apr 2026 08:25:12 -0700 Subject: [PATCH 22/55] fix(ci): allow custom ROCm version input in workflow_dispatch Remove type:choice restriction so users can type any ROCm version while keeping 7.12.0 as default and linking to available versions --- .github/workflows/build.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 16be67d5f25..f70e7218ad3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -50,13 +50,9 @@ on: required: false default: 'gfx1151,gfx1150,gfx120X,gfx110X' rocm_version: - description: 'ROCm version to use (e.g., 7.12.0, 7.2.1)' + description: 'ROCm version to use (e.g., 7.12.0, 7.2.1). Available versions: https://repo.amd.com/rocm/tarball/' required: false default: '7.12.0' - type: choice - options: - - 7.12.0 - - 7.2.1 concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} From bd9985cf0b1ad53889ed90a71eac6830ebd341ae Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 9 Jun 2026 13:21:15 -0700 Subject: [PATCH 23/55] feat: unified AMD build repo with ROCm, Vulkan, NPU, CPU support --- .github/workflows/build.yml | 2268 +++++------------------ .github/workflows/sync.yml | 148 ++ CMakeLists.txt | 1 + README.md | 967 ++-------- scripts/local-build.ps1 | 395 ++++ src/CMakeLists.txt | 32 + src/vitisai/whisper-vitisai-encoder.cpp | 204 ++ src/vitisai/whisper-vitisai-encoder.h | 32 + src/whisper.cpp | 61 +- 9 files changed, 1537 insertions(+), 2571 deletions(-) create mode 100644 .github/workflows/sync.yml create mode 100644 scripts/local-build.ps1 create mode 100644 src/vitisai/whisper-vitisai-encoder.cpp create mode 100644 src/vitisai/whisper-vitisai-encoder.h diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4aca7b56e01..069cb9d9fcc 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,85 +1,94 @@ -name: CI +name: AMD Build & Release +# ────────────────────────────────────────────────────────────────────────────── +# Triggers +# ────────────────────────────────────────────────────────────────────────────── on: - push: - branches: - - master - tags: - - 'v*' - paths: ['.github/workflows/build.yml', - '**/CMakeLists.txt', - '**/Makefile', - '**/*.mk', - '**/*.cmake', - '**/*.in', - '**/*.h', - '**/*.hpp', - '**/*.c', - '**/*.cpp', - '**/*.cu', - '**/*.cuh', - '**/*.cl', - '**/*.swift', - '**/*.m', - '**/*.mm', - '**/*.metal', - '**/*.comp', - '**/*.java'] - - pull_request: - types: [opened, synchronize, reopened] + schedule: + - cron: '0 2 * * 1' # Weekly – every Monday at 02:00 UTC workflow_dispatch: inputs: create_release: - description: 'Create new release' + description: 'Create GitHub Release' required: true type: boolean + default: false pre_release_tag: - description: 'Pre-release tag name' + description: 'Pre-release tag name (optional, overrides auto-tag)' required: false type: string run_type: - description: 'Workflow type to run' + description: 'Workflow scope' required: true type: choice options: - - full-ci - - release-only - gfx_target: - description: 'AMD GPU targets (comma-separated)' + - full-ci # all jobs + - release-only # release-producing jobs only + default: full-ci + gfx_targets: + description: 'ROCm GPU targets (comma-separated)' required: false + type: string default: 'gfx1151,gfx1150,gfx120X,gfx110X' rocm_version: - description: 'ROCm version to use (e.g., 7.12.0, 7.2.1). Available versions: https://repo.amd.com/rocm/tarball/' + description: 'ROCm version (e.g. 7.12.0)' required: false + type: string default: '7.12.0' + push: + branches: + - main + - master + paths: + - '.github/workflows/build.yml' + - '**/CMakeLists.txt' + - '**/*.cmake' + - '**/*.h' + - '**/*.hpp' + - '**/*.c' + - '**/*.cpp' + - '**/*.cu' + - '**/*.cuh' + - '**/*.comp' + tags: + - 'v*' + pull_request: + types: [opened, synchronize, reopened] concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} cancel-in-progress: true permissions: - contents: write # for creating release + contents: write +# ────────────────────────────────────────────────────────────────────────────── +# Shared environment +# ────────────────────────────────────────────────────────────────────────────── env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} - ubuntu_image: "ubuntu:22.04" - VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite" - GFX_TARGETS: ${{ github.event.inputs.gfx_target || 'gfx1151,gfx1150,gfx120X,gfx110X' }} + GFX_TARGETS: ${{ github.event.inputs.gfx_targets || 'gfx1151,gfx1150,gfx120X,gfx110X' }} ROCM_VERSION: ${{ github.event.inputs.rocm_version || '7.12.0' }} + FLEXML_URL: "https://github.com/lemonade-sdk/whisper.cpp/releases/download/deps/flexmlrt1.7.0-win.zip" + VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite" jobs: + +# ════════════════════════════════════════════════════════════════════════════════ +# 0. Determine release tag +# ════════════════════════════════════════════════════════════════════════════════ determine-tag: runs-on: ubuntu-latest outputs: - tag_name: ${{ steps.tag.outputs.name }} + tag_name: ${{ steps.tag.outputs.name }} + version: ${{ steps.tag.outputs.version }} should_release: ${{ steps.tag.outputs.should_release }} steps: - - name: Checkout with full history - uses: actions/checkout@v6 + - uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Determine tag name + + - name: Determine tag and version id: tag shell: bash run: | @@ -88,417 +97,102 @@ jobs: CUSTOM_TAG="${{ github.event.inputs.pre_release_tag }}" SHOULD_RELEASE="false" - echo "Raw values:" - echo "BUILD_NUMBER: $BUILD_NUMBER" - echo "SHORT_HASH: $SHORT_HASH" - echo "BRANCH_NAME: ${{ env.BRANCH_NAME }}" - echo "CUSTOM_TAG: $CUSTOM_TAG" - if [[ "${{ github.ref_type }}" == "tag" ]]; then - echo "Using pushed tag name" + # Triggered by sync.yml pushing a vX.Y.Z tag — this is the primary release path TAG_NAME="${{ github.ref_name }}" SHOULD_RELEASE="true" elif [[ -n "$CUSTOM_TAG" ]]; then - echo "Using custom tag" - TAG_NAME="${CUSTOM_TAG}" + TAG_NAME="$CUSTOM_TAG" SHOULD_RELEASE="true" elif [[ "${{ github.event.inputs.create_release }}" == "true" ]]; then - echo "Manual release requested" - SHOULD_RELEASE="true" TAG_NAME="b${BUILD_NUMBER}" - elif [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then - echo "Using master branch format" + SHOULD_RELEASE="true" + elif [[ "${{ env.BRANCH_NAME }}" == "main" || "${{ env.BRANCH_NAME }}" == "master" ]]; then TAG_NAME="b${BUILD_NUMBER}" SHOULD_RELEASE="false" else - echo "Using non-master branch format" - SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') - TAG_NAME="${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" + SAFE=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + TAG_NAME="${SAFE}-b${BUILD_NUMBER}-${SHORT_HASH}" SHOULD_RELEASE="false" fi - echo "Final tag name: $TAG_NAME" - echo "Should release: $SHOULD_RELEASE" - echo "name=$TAG_NAME" >> $GITHUB_OUTPUT + # Version = tag without leading 'v', used in artifact filenames + # e.g. v1.8.4 → 1.8.4, b1234 → b1234 (unchanged for dev builds) + VERSION="${TAG_NAME#v}" + + echo "name=$TAG_NAME" >> $GITHUB_OUTPUT + echo "version=$VERSION" >> $GITHUB_OUTPUT echo "should_release=$SHOULD_RELEASE" >> $GITHUB_OUTPUT - prepare-matrix: +# ════════════════════════════════════════════════════════════════════════════════ +# 1. ROCm matrix (Linux + Windows per GFX target) +# ════════════════════════════════════════════════════════════════════════════════ + prepare-rocm-matrix: runs-on: ubuntu-latest outputs: - windows_matrix: ${{ steps.set-matrix.outputs.windows_matrix }} - ubuntu_matrix: ${{ steps.set-matrix.outputs.ubuntu_matrix }} - steps: - - name: Set matrix - id: set-matrix - run: | - targets="${{ env.GFX_TARGETS }}" - echo "Input targets: $targets" - - target_array=$(echo "$targets" \ - | tr ',' '\n' \ - | sed 's/^ *//;s/ *$//' \ - | sed 's/^"//;s/"$//' \ - | jq -R . \ - | jq -s .) - - windows_matrix=$(echo "$target_array" \ - | jq -c '{gfx_target: ., sdl2: ["ON"], build: ["Release"], arch: ["x64"], s2arc: ["x64"], s2ver: ["2.28.5"]}') - - ubuntu_matrix=$(echo "$target_array" \ - | jq -c '{gfx_target: ., sdl2: ["ON"], build: ["Release"], arch: ["linux/amd64"]}') - - echo "windows_matrix=$windows_matrix" >> $GITHUB_OUTPUT - echo "ubuntu_matrix=$ubuntu_matrix" >> $GITHUB_OUTPUT - - ubuntu-22: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - - strategy: - fail-fast: false - matrix: - arch: [linux/amd64, linux/ppc64le] - + ubuntu_matrix: ${{ steps.m.outputs.ubuntu_matrix }} + windows_matrix: ${{ steps.m.outputs.windows_matrix }} steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Build ${{ matrix.arch }} - run: | - docker run --platform ${{ matrix.arch }} --rm \ - -v ${{ github.workspace }}:/workspace \ - -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' - set -e - export DEBIAN_FRONTEND=noninteractive - sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - - apt update - apt install -y build-essential libsdl2-dev cmake git - cmake -B build - cmake --build build --config Release -j $(nproc)' - - ubuntu-22-arm64: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - - strategy: - fail-fast: false - matrix: - arch: [linux/arm64] - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Build ${{ matrix.arch }} - run: | - docker run --platform ${{ matrix.arch }} --rm \ - -v ${{ github.workspace }}:/workspace \ - -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' - set -e - export DEBIAN_FRONTEND=noninteractive - sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - - apt update - apt install -y build-essential libsdl2-dev cmake git - cmake -B build -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8-a - cmake --build build --config Release -j $(nproc)' - - ubuntu-22-arm-v7: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - - strategy: - fail-fast: false - matrix: - arch: [linux/arm/v7] - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Build ${{ matrix.arch }} + - name: Build matrix JSON + id: m run: | - docker run --platform ${{ matrix.arch }} --rm \ - -v ${{ github.workspace }}:/workspace \ - -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' - set -e - export DEBIAN_FRONTEND=noninteractive - sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - - apt update - apt install -y build-essential libsdl2-dev cmake git - cmake -B build -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv7-a+fp - cmake --build build --config Release -j $(nproc)' - - macOS-latest: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: macOS-latest + targets="${{ env.GFX_TARGETS }}" + arr=$(echo "$targets" | tr ',' '\n' | sed 's/^ *//;s/ *$//' | jq -R . | jq -s .) - strategy: - matrix: - destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS'] + ubuntu_matrix=$(echo "$arr" | jq -c \ + '{gfx_target: ., build: ["Release"], sdl2: ["ON"], arch: ["linux/amd64"]}') - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 + windows_matrix=$(echo "$arr" | jq -c \ + '{gfx_target: ., build: ["Release"], sdl2: ["ON"], arch: ["x64"], s2arc: ["x64"], s2ver: ["2.28.5"]}') - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: macOS-latest-swift - evict-old-files: 1d + echo "ubuntu_matrix=$ubuntu_matrix" >> $GITHUB_OUTPUT + echo "windows_matrix=$windows_matrix" >> $GITHUB_OUTPUT - - name: Dependencies - run: | - brew update - cmake --version - brew install sdl2 - - - name: Build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DWHISPER_BUILD_EXAMPLES=OFF \ - -DWHISPER_BUILD_TESTS=OFF \ - -DWHISPER_BUILD_SERVER=OFF \ - -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) - - -# freeBSD-latest: -# runs-on: macos-13 -# -# steps: -# - name: Clone -# uses: actions/checkout@v6 -# -# - name: Build -# uses: cross-platform-actions/action@v0.27.0 -# with: -# operating_system: freebsd -# version: '14.2' -# run: | -# sudo pkg update -# sudo pkg install -y gmake sdl2 cmake git -# cmake -B build -# cmake --build build --config Release - - ubuntu-22-gcc: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} +# ════════════════════════════════════════════════════════════════════════════════ +# 2. ROCm — Linux +# ════════════════════════════════════════════════════════════════════════════════ + linux-rocm: runs-on: ubuntu-22.04 - + needs: [determine-tag, prepare-rocm-matrix] strategy: - fail-fast: false - matrix: - build: [Debug, Release] - arch: [linux/amd64, linux/ppc64le] - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Build ${{ matrix.arch }} - run: | - docker run --platform ${{ matrix.arch }} --rm \ - -v ${{ github.workspace }}:/workspace \ - -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' - set -e - export DEBIAN_FRONTEND=noninteractive - sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - - apt update - apt install -y build-essential cmake libsdl2-dev git - cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} - make - ctest -L gh --output-on-failure' - - ubuntu-22-gcc-arm64: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - - strategy: - fail-fast: false - matrix: - build: [Debug, Release] - arch: [linux/arm64] - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Build ${{ matrix.arch }} - run: | - docker run --platform ${{ matrix.arch }} --rm \ - -v ${{ github.workspace }}:/workspace \ - -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' - set -e - export DEBIAN_FRONTEND=noninteractive - sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - - apt update - apt install -y build-essential cmake libsdl2-dev git - cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8-a - make - ctest -L gh --output-on-failure' - - ubuntu-22-gcc-arm-v7: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - - strategy: - fail-fast: false - matrix: - build: [Debug, Release] - arch: [linux/arm/v7] - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Build ${{ matrix.arch }} - run: | - docker run --platform ${{ matrix.arch }} --rm \ - -v ${{ github.workspace }}:/workspace \ - -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' - set -e - export DEBIAN_FRONTEND=noninteractive - sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - - apt update - apt install -y build-essential cmake libsdl2-dev git - cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv7-a+fp - make - ctest -L gh --output-on-failure' - - ubuntu-22-clang: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - - strategy: - fail-fast: false - matrix: - arch: [linux/amd64] - build: [Release] - - steps: - - name: Clone - uses: actions/checkout@v4 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Build ${{ matrix.arch }} - run: | - docker run --platform ${{ matrix.arch }} --rm \ - -v ${{ github.workspace }}:/workspace \ - -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' - set -e - export DEBIAN_FRONTEND=noninteractive - sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - - apt update - apt install -y clang build-essential cmake libsdl2-dev git - cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang - make - ctest -L gh --output-on-failure' - - ubuntu-rocm: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - needs: [determine-tag, prepare-matrix] - strategy: - matrix: ${{fromJson(needs.prepare-matrix.outputs.ubuntu_matrix)}} + matrix: ${{ fromJson(needs.prepare-rocm-matrix.outputs.ubuntu_matrix) }} fail-fast: false steps: - name: Free disk space run: | - echo "=== Disk usage before cleanup ===" - df -h / sudo rm -rf /usr/local/lib/android /opt/ghc /usr/local/share/boost \ /usr/share/dotnet /usr/local/.ghcup /opt/hostedtoolcache/CodeQL sudo docker image prune --all --force 2>/dev/null || true - echo "=== Disk usage after cleanup ===" - df -h / - - name: Checkout repository - uses: actions/checkout@v4 + - uses: actions/checkout@v4 - - name: Install build dependencies - run: | - sudo apt update - sudo apt install -y cmake ninja-build unzip curl build-essential libsdl2-dev git patchelf + - name: Install dependencies + run: sudo apt update && sudo apt install -y cmake ninja-build curl build-essential libsdl2-dev git patchelf - - name: Download and extract ROCm directly to /opt/rocm + - name: Download ROCm tarball run: | source ci/resolve-rocm-version.sh linux "${{ matrix.gfx_target }}" "${{ env.ROCM_VERSION }}" echo "DETECTED_ROCM_VERSION=$ROCM_RESOLVED_VERSION" >> $GITHUB_ENV sudo mkdir -p /opt/rocm curl -sL "$ROCM_TARBALL_URL" | sudo tar --use-compress-program=gzip -xf - -C /opt/rocm --strip-components=1 - - name: Set ROCm environment variables + - name: Set ROCm env run: | - echo "HIP_PATH=/opt/rocm" >> $GITHUB_ENV + echo "HIP_PATH=/opt/rocm" >> $GITHUB_ENV echo "ROCM_PATH=/opt/rocm" >> $GITHUB_ENV - echo "HIP_PLATFORM=amd" >> $GITHUB_ENV + echo "HIP_PLATFORM=amd" >> $GITHUB_ENV echo "/opt/rocm/bin:/opt/rocm/llvm/bin:$PATH" >> $GITHUB_PATH - - - name: Find ROCm bitcode path + + - name: Find bitcode path run: | - # Dynamically find the directory containing device libraries (amdgcn/bitcode) BITCODE_PATH=$(find /opt/rocm -type d -name bitcode -print -quit) - - if [ -z "$BITCODE_PATH" ]; then - echo "::error::Could not find 'bitcode' directory in /opt/rocm" - find /opt/rocm -maxdepth 5 - exit 1 - fi - - echo "Found bitcode at: $BITCODE_PATH" + [ -z "$BITCODE_PATH" ] && { echo "::error::bitcode dir not found"; exit 1; } echo "ROCM_BITCODE_PATH=$BITCODE_PATH" >> $GITHUB_ENV - name: Configure CMake run: | source ci/map-gpu-target.sh "${{ matrix.gfx_target }}" - cmake -S . -B build -G Ninja \ -DCMAKE_C_COMPILER=/opt/rocm/llvm/bin/clang \ -DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \ @@ -511,1550 +205,538 @@ jobs: -DWHISPER_SDL2=${{ matrix.sdl2 }} - name: Build - run: cmake --build build --config ${{ matrix.build }} -j $(nproc) + run: cmake --build build --config ${{ matrix.build }} -j$(nproc) - # Copy Backend ROCm Folders --- - - name: Copy ROCm core and sysdep libs to build directory + - name: Copy ROCm runtime libs run: | - build_bin_path="build/bin" - rocm_bin_path="/opt/rocm/bin" - rocm_lib_path="/opt/rocm/lib" - rocm_sysdeps_path="/opt/rocm/lib/rocm_sysdeps/lib" - - # Ensure build directory exists - mkdir -p "$build_bin_path" - - # Copy rocblas/library folder - rocblas_lib_path="$rocm_lib_path/rocblas/library" - if [ -d "$rocblas_lib_path" ]; then - dest_rocblas_path="$build_bin_path/rocblas/library" - mkdir -p "$(dirname "$dest_rocblas_path")" - cp -r "$rocblas_lib_path" "$(dirname "$dest_rocblas_path")/" - echo "Copied: rocblas/library" - fi - - # Copy hipblaslt/library folder - hipblaslt_lib_path="$rocm_lib_path/hipblaslt/library" - if [ -d "$hipblaslt_lib_path" ]; then - dest_hipblaslt_path="$build_bin_path/hipblaslt/library" - mkdir -p "$(dirname "$dest_hipblaslt_path")" - cp -r "$hipblaslt_lib_path" "$(dirname "$dest_hipblaslt_path")/" - echo "Copied: hipblaslt/library" - fi - - # Copy standard ROCm shared libraries - echo "Copying core shared libraries..." - cp -v $rocm_lib_path/libhipblas.so* "$build_bin_path/" 2>/dev/null || true - cp -v $rocm_lib_path/librocblas.so* "$build_bin_path/" 2>/dev/null || true - cp -v $rocm_lib_path/libamdhip64.so* "$build_bin_path/" 2>/dev/null || true - cp -v $rocm_lib_path/librocsolver.so* "$build_bin_path/" 2>/dev/null || true - cp -v $rocm_lib_path/libroctx64.so* "$build_bin_path/" 2>/dev/null || true - cp -v $rocm_lib_path/libhipblaslt.so* "$build_bin_path/" 2>/dev/null || true - cp -v $rocm_lib_path/libamd_comgr.so* "$build_bin_path/" 2>/dev/null || true - cp -v $rocm_lib_path/libhsa-runtime64.so* "$build_bin_path/" 2>/dev/null || true - - # Copy LLVM runtime libs - cp -v $rocm_lib_path/llvm/lib/libLLVM.so* "$build_bin_path/" 2>/dev/null || true - cp -v $rocm_lib_path/llvm/lib/libclang-cpp.so* "$build_bin_path/" 2>/dev/null || true - - if [ -d "$rocm_sysdeps_path" ]; then - echo "Copying sysdep libraries from $rocm_sysdeps_path..." - # Using a broad wildcard ensures we grab elf.so.1, drm.so.2, numa.so.1, etc. - cp -v $rocm_sysdeps_path/librocm_sysdeps_*.so* "$build_bin_path/" - fi - - - name: Bundle Linked Libraries - run: | - build_bin="build/bin" - echo "Scanning dependencies for whisper-cli..." - - ldd "$build_bin/whisper-cli" | grep "=> /" | while read -r line; do - - soname=$(echo "$line" | awk '{print $1}') - path=$(echo "$line" | awk '{print $3}') - - if [[ "$soname" =~ ^(libc\.so|libm\.so|libdl\.so|librt\.so|libpthread\.so|libstdc\+\+|libgcc_s|ld-linux) ]]; then - continue - fi - echo "Bundling: $soname" - echo " Source: $path" - cp -L "$path" "$build_bin/$soname" + BIN="build/bin" + LIB="/opt/rocm/lib" + mkdir -p "$BIN" + [ -d "$LIB/rocblas/library" ] && { mkdir -p "$BIN/rocblas"; cp -r "$LIB/rocblas/library" "$BIN/rocblas/"; } + [ -d "$LIB/hipblaslt/library" ] && { mkdir -p "$BIN/hipblaslt"; cp -r "$LIB/hipblaslt/library" "$BIN/hipblaslt/"; } + for so in libhipblas librocblas libamdhip64 librocsolver libroctx64 libhipblaslt libamd_comgr libhsa-runtime64; do + cp -v $LIB/${so}.so* "$BIN/" 2>/dev/null || true done - chmod +x "$build_bin"/*.so* + cp -v $LIB/llvm/lib/libLLVM.so* "$BIN/" 2>/dev/null || true + cp -v $LIB/llvm/lib/libclang-cpp.so* "$BIN/" 2>/dev/null || true + [ -d "$LIB/rocm_sysdeps/lib" ] && cp -v $LIB/rocm_sysdeps/lib/librocm_sysdeps_*.so* "$BIN/" || true - - name: Set RPATH for portable distribution + - name: Bundle linked libraries run: | - cd build/bin - # Set RPATH to $ORIGIN so the binary looks for .so files in its own directory - # wildcards catch whisper-cli, whisper-bench, etc., plus shared libs - for file in *.so* whisper-*; do - if [ -f "$file" ] && [ ! -L "$file" ]; then - # Only patch ELF files (executables and shared objects) - if file "$file" | grep -q "ELF"; then - patchelf --set-rpath '$ORIGIN' "$file" 2>/dev/null || true - echo "Patched RPATH for $file" - fi - fi + ldd build/bin/whisper-cli | grep "=> /" | while read -r line; do + soname=$(echo "$line" | awk '{print $1}') + path=$(echo "$line" | awk '{print $3}') + [[ "$soname" =~ ^(libc|libm|libdl|librt|libpthread|libstdc\+\+|libgcc_s|ld-linux) ]] && continue + cp -L "$path" "build/bin/$soname" 2>/dev/null || true done - - name: Pack bin artifacts + - name: Set portable RPATH run: | cd build/bin - SAFE_ARCH=$(echo "${{ matrix.arch }}" | tr '/' '-') - - # 1. Save SAFE_ARCH to GITHUB_ENV so the next step can use it - echo "SAFE_ARCH=$SAFE_ARCH" >> $GITHUB_ENV - - # Include target in filename (e.g., whisper-bin-gfx1100-linux-amd64.zip) - zip -r ../../whisper-bin-${{ matrix.gfx_target }}-${SAFE_ARCH}.zip . - - - name: Upload binaries - if: ${{ matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release }} - uses: actions/upload-artifact@v4 - with: - name: whisper-bin-${{ matrix.gfx_target }}-${{ env.SAFE_ARCH }} - path: whisper-bin-*.zip - - ubuntu-22-gcc-sanitized: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - - strategy: - fail-fast: false - matrix: - sanitizer: [ADDRESS, THREAD, UNDEFINED] - arch: [linux/amd64] - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Build ${{ matrix.arch }} - run: | - docker run --platform ${{ matrix.arch }} --rm \ - -v ${{ github.workspace }}:/workspace \ - -w /workspace ${{ env.ubuntu_image }} /bin/sh -c ' - set -e - export DEBIAN_FRONTEND=noninteractive - sed -i "s|archive.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - sed -i "s|security.ubuntu.com|mirrors.kernel.org|g" /etc/apt/sources.list - - apt update - apt install -y build-essential cmake git - cmake . -DCMAKE_BUILD_TYPE=Debug \ - -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DGGML_OPENMP=OFF - make - ctest -L gh --output-on-failure' - - ubuntu-22-cmake-sycl: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - - strategy: - fail-fast: false - matrix: - dwhisper_sycl: [ON] - dcmake_c_compiler: [icx] - dcmake_cxx_compiler: [icpx] - arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le] - - continue-on-error: true - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: add oneAPI to apt - shell: bash - run: | - cd /tmp - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" - - - name: install oneAPI dpcpp compiler - shell: bash - run: | - sudo apt update - sudo apt install intel-oneapi-compiler-dpcpp-cpp git - - - name: install oneAPI MKL library - shell: bash - run: | - sudo apt install intel-oneapi-mkl-devel git - - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Build - id: cmake_build - run: | - source /opt/intel/oneapi/setvars.sh - mkdir build - cd build - cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx .. - cmake --build . --config Release -j $(nproc) - - ubuntu-22-cmake-sycl-fp16: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - - strategy: - fail-fast: false - matrix: - dwhisper_sycl: [ON] - dcmake_c_compiler: [icx] - dcmake_cxx_compiler: [icpx] - arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le] - - continue-on-error: true - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: add oneAPI to apt - shell: bash - run: | - cd /tmp - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" - - - name: install oneAPI dpcpp compiler - shell: bash - run: | - sudo apt update - sudo apt install intel-oneapi-compiler-dpcpp-cpp git - - - name: install oneAPI MKL library - shell: bash - run: | - sudo apt install intel-oneapi-mkl-devel - - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Build - id: cmake_build - run: | - source /opt/intel/oneapi/setvars.sh - mkdir build - cd build - cmake -DGGML_SYCL_F16=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx .. - cmake --build . --config Release -j $(nproc) - - windows-msys2: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: windows-latest - - strategy: - fail-fast: false - matrix: - include: - - { sys: UCRT64, env: ucrt-x86_64, build: Release } - - { sys: CLANG64, env: clang-x86_64, build: Release } - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Setup ${{ matrix.sys }} - uses: msys2/setup-msys2@v2 - with: - update: true - msystem: ${{matrix.sys}} - install: >- - base-devel - git - mingw-w64-${{matrix.env}}-toolchain - mingw-w64-${{matrix.env}}-cmake - mingw-w64-${{matrix.env}}-SDL2 - mingw-w64-${{matrix.env}}-openblas - - - name: Build using CMake - shell: msys2 {0} - run: | - cmake -B build -DWHISPER_SDL2=ON - cmake --build build --config ${{ matrix.build }} -j $(nproc) - - - name: Clean after building using CMake - shell: msys2 {0} - run: | - rm -rf build - - - name: Build using CMake w/ OpenBLAS - shell: msys2 {0} - run: | - cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS - cmake --build build --config ${{ matrix.build }} -j $(nproc) - - windows: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: windows-latest - needs: determine-tag - - strategy: - matrix: - build: [Release] - arch: [Win32, x64] - sdl2: [ON] - include: - - arch: Win32 - s2arc: x86 - jnaPath: win32-x86 - - arch: x64 - s2arc: x64 - jnaPath: win32-x86-64 - - sdl2: ON - s2ver: 2.28.5 - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Add msbuild to PATH - uses: microsoft/setup-msbuild@v2 - - - name: Fetch SDL2 and set SDL2_DIR - if: matrix.sdl2 == 'ON' - run: | - C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip - 7z x sdl2.zip - echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV - - - name: Configure - run: > - cmake -S . -B ./build -A ${{ matrix.arch }} - -DCMAKE_BUILD_TYPE=${{ matrix.build }} - -DBUILD_SHARED_LIBS=ON - -DWHISPER_SDL2=${{ matrix.sdl2 }} - - - name: Build - run: | - cd ./build - msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }} - - - name: Copy SDL2.dll - if: matrix.sdl2 == 'ON' - run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }} - - - name: Upload SDL2.dll - if: matrix.sdl2 == 'ON' - uses: actions/upload-artifact@v6 - with: - name: ${{ matrix.s2arc }}_SDL2.dll - path: build/bin/${{ matrix.build }}/SDL2.dll - - - name: Upload whisper dll - uses: actions/upload-artifact@v6 - with: - name: whisper_${{ matrix.arch }}.dll - path: build/bin/${{ matrix.build }}/whisper.dll - - - name: Upload ggml dll - uses: actions/upload-artifact@v6 - with: - name: ggml_${{ matrix.arch }}.dll - path: build/bin/${{ matrix.build }}/ggml.dll - - - name: Upload ggml base dll - uses: actions/upload-artifact@v6 - with: - name: ggml_base_${{ matrix.arch }}.dll - path: build/bin/${{ matrix.build }}/ggml-base.dll - - - name: Upload ggml cpu dll - uses: actions/upload-artifact@v6 - with: - name: ggml_cpu_${{ matrix.arch }}.dll - path: build/bin/${{ matrix.build }}/ggml-cpu.dll - - - name: Pack bin artifacts - shell: pwsh - run: | - Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-bin-${{ matrix.arch }}.zip" - - - name: Upload binaries - if: ${{ matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release }} - uses: actions/upload-artifact@v6 - with: - name: whisper-bin-${{ matrix.arch }}.zip - path: whisper-bin-${{ matrix.arch }}.zip - - windows-blas: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: windows-latest - - strategy: - matrix: - build: [Release] - arch: [Win32, x64] - blas: [ON] - sdl2: [ON] - blasver: [0.3.29] - include: - - arch: Win32 - s2arc: x86 - blasfile: x86 - - arch: x64 - s2arc: x64 - blasfile: x64_64 - - sdl2: ON - s2ver: 2.28.5 - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Export GitHub Actions cache environment variables - uses: actions/github-script@v8 - with: - script: | - core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); - core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); - - - name: Add msbuild to PATH - uses: microsoft/setup-msbuild@v2 - - - name: Install OpenBLAS and pkgconfiglite - if: matrix.blas == 'ON' - run: | - Invoke-WebRequest "https://github.com/OpenMathLib/OpenBLAS/releases/download/v${{matrix.blasver}}/OpenBLAS-${{matrix.blasver}}_${{matrix.blasfile}}.zip" -OutFile "OpenBLAS-${{matrix.blasver}}.zip" - Expand-Archive "OpenBLAS-${{matrix.blasver}}.zip" -DestinationPath "OpenBLAS-${{matrix.blasver}}" - choco install pkgconfiglite - - - name: Fetch SDL2 and set SDL2_DIR - if: matrix.sdl2 == 'ON' - run: | - C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip - 7z x sdl2.zip - echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV - - - name: Configure - run: > - cmake -S . -B ./build -A ${{ matrix.arch }} - -DCMAKE_TOOLCHAIN_FILE="$env:VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake" - -DCMAKE_BUILD_TYPE=${{ matrix.build }} - -DGGML_BLAS=${{ matrix.blas }} - -DGGML_BLAS_VENDOR=OpenBLAS - -DBLAS_LIBRARIES="$env:GITHUB_WORKSPACE/OpenBLAS-${{matrix.blasver}}/lib/libopenblas.lib" - -DBLAS_INCLUDE_DIRS="$env:GITHUB_WORKSPACE/OpenBLAS-${{matrix.blasver}}/include" - -DWHISPER_SDL2=${{ matrix.sdl2 }} - - - name: Build - run: | - cd ./build - msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }} - - - name: Copy openblas.dll - if: matrix.blas == 'ON' - run: copy "$env:GITHUB_WORKSPACE/OpenBLAS-${{matrix.blasver}}/bin/libopenblas.dll" build/bin/${{ matrix.build }} - - - name: Copy SDL2.dll - if: matrix.sdl2 == 'ON' - run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }} - - - name: Pack bin artifacts - shell: pwsh - run: | - Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-blas-bin-${{ matrix.arch }}.zip" - - - name: Upload binaries - if: ${{ matrix.blas == 'ON' && matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release }} - uses: actions/upload-artifact@v6 - with: - name: whisper-blas-bin-${{ matrix.arch }}.zip - path: whisper-blas-bin-${{ matrix.arch }}.zip - - windows-cublas: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: windows-2022 - needs: determine-tag - strategy: - fail-fast: false - matrix: - build: [Release] - arch: [x64] - cublas: [ON] - sdl2: [ON] - cuda-toolkit: [12.4.0, 11.8.0] - include: - - arch: x64 - sdl2: ON - sdl2_ver: 2.28.5 - steps: - - name: Clone repository - uses: actions/checkout@v6 + for f in *.so* whisper-*; do + [ -f "$f" ] && [ ! -L "$f" ] && file "$f" | grep -q ELF && patchelf --set-rpath '$ORIGIN' "$f" 2>/dev/null || true + done - - name: Install Ninja - id: install_ninja + - name: Package run: | - choco install ninja + VER="${{ needs.determine-tag.outputs.version }}" + ARCHIVE="whisper-${VER}-linux-rocm-${{ matrix.gfx_target }}.tar.gz" + STAGE="whisper-${VER}-linux-rocm-${{ matrix.gfx_target }}" + mkdir -p "$STAGE" && cp -r build/bin/* "$STAGE/" + tar -czf "$ARCHIVE" "$STAGE" + echo "ARCHIVE=$ARCHIVE" >> $GITHUB_ENV - - name: Install ccache - uses: hendrikmuhs/ccache-action@v1.2.16 + - uses: actions/upload-artifact@v4 + if: ${{ needs.determine-tag.outputs.should_release == 'true' }} with: - key: ${{ github.job }}-${{ matrix.cuda-toolkit }}-${{ matrix.build }} - variant: sccache - evict-old-files: 5d - - - name: Install Cuda Toolkit 11.8.0 - if: ${{ matrix.cuda-toolkit == '11.8.0' }} - run: | - $CUDA_VERSION = ${{ matrix.cuda-toolkit }} - $CUDA_TOOLKIT_DIR = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_VERSION" - $CUDA_DOWNLOAD = "https://developer.download.nvidia.com/compute/cuda/redist" - - # Components versions - $CUDART_VER = "11.8.89" - $NVCC_VER = "11.8.89" - $NVRTC_VER = "11.8.89" - $CUBLAS_VER = "11.8.1.74" - $NVTX_VER = "11.8.86" - $VS_VER = "11.8.86" - $NVPROF_VER = "11.8.87" - $CCCL_VER = "11.8.89" - - # Create the directory where the CUDA Toolkit will be installed - mkdir -p $CUDA_TOOLKIT_DIR - - # Install unzip to extract the downloaded files - choco install unzip -y - - # Download all the required components - curl -O "$CUDA_DOWNLOAD/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-${CUDART_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-${NVCC_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/libcublas/windows-x86_64/libcublas-windows-x86_64-${CUBLAS_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-${NVTX_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-${VS_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-${CCCL_VER}-archive.zip" - - # Extract all the downloaded files to the CUDA Toolkit directory - unzip '*.zip' -d $CUDA_TOOLKIT_DIR - - # Copy all the extracted files to the main CUDA Toolkit directory - xcopy "$CUDA_TOOLKIT_DIR\cuda_cudart-windows-x86_64-${CUDART_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_nvcc-windows-x86_64-${NVCC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\libcublas-windows-x86_64-${CUBLAS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_nvtx-windows-x86_64-${NVTX_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_cccl-windows-x86_64-${CCCL_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - - # Visual Studio integration - xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\v170\BuildCustomizations" /E /I /H /Y - - # Set environment variables - echo "$CUDA_TOOLKIT_DIR\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "$CUDA_TOOLKIT_DIR\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "CUDA_PATH=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 - echo "CUDA_PATH_V11_8=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 - - - name: Install Cuda Toolkit 12.4.0 - if: ${{ matrix.cuda-toolkit == '12.4.0' }} - run: | - $CUDA_VERSION = ${{ matrix.cuda-toolkit }} - $CUDA_TOOLKIT_DIR = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_VERSION" - $CUDA_DOWNLOAD = "https://developer.download.nvidia.com/compute/cuda/redist" - - # Components versions - $CUDART_VER = "12.4.127" - $NVCC_VER = "12.4.131" - $NVRTC_VER = "12.4.127" - $CUBLAS_VER = "12.4.5.8" - $NVTX_VER = "12.4.127" - $PROFILER_VER = "12.4.127" - $VS_VER = "12.4.127" - $NVPROF_VER = "12.4.128" - $CCCL_VER = "12.4.127" - - # Create the directory where the CUDA Toolkit will be installed - mkdir -p $CUDA_TOOLKIT_DIR - - # Install unzip to extract the downloaded files - choco install unzip -y - - # Download all the required components - curl -O "$CUDA_DOWNLOAD/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-${CUDART_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-${NVCC_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/libcublas/windows-x86_64/libcublas-windows-x86_64-${CUBLAS_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-${NVTX_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-${PROFILER_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-${VS_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive.zip" - curl -O "$CUDA_DOWNLOAD/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-${CCCL_VER}-archive.zip" - - # Extract all the downloaded files to the CUDA Toolkit directory - unzip -q '*.zip' -d $CUDA_TOOLKIT_DIR - - # Copy all the extracted files to the main CUDA Toolkit directory - xcopy "$CUDA_TOOLKIT_DIR\cuda_cudart-windows-x86_64-${CUDART_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_nvcc-windows-x86_64-${NVCC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_nvrtc-windows-x86_64-${NVRTC_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\libcublas-windows-x86_64-${CUBLAS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_nvtx-windows-x86_64-${NVTX_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_nvprof-windows-x86_64-${NVPROF_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_cccl-windows-x86_64-${CCCL_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\cuda_profiler_api-windows-x86_64-${PROFILER_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\*" "$CUDA_TOOLKIT_DIR" /E /I /H /Y - - # Visual Studio integration - xcopy "$CUDA_TOOLKIT_DIR\visual_studio_integration-windows-x86_64-${VS_VER}-archive\visual_studio_integration\MSBuildExtensions\*" "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\v170\BuildCustomizations" /E /I /H /Y - - # Set environment variables - echo "$CUDA_TOOLKIT_DIR\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "$CUDA_TOOLKIT_DIR\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "CUDA_PATH=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 - echo "CUDA_PATH_V12_2=$CUDA_TOOLKIT_DIR" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 - - - name: Add msbuild to PATH - uses: microsoft/setup-msbuild@v2 - - - name: Install 7-Zip - run: choco install 7zip -y - - - name: Fetch SDL2 and set SDL2_DIR - if: matrix.sdl2 == 'ON' - run: | - Invoke-WebRequest -Uri https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.sdl2_ver }}/SDL2-devel-${{ matrix.sdl2_ver }}-VC.zip -OutFile sdl2.zip - 7z x sdl2.zip - echo "SDL2_DIR=${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" | Out-File -FilePath $env:GITHUB_ENV -Append - echo "${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" > SDL2_PATH.txt - - - name: Install cmake - run: choco install cmake - - - name: Build Project - shell: cmd - run: | - call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" - cmake --version - where cmake - if "${{ matrix.cuda-toolkit }}" == "11.8.0" ( - set CUDA_FLAGS=-allow-unsupported-compiler -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH -D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR - ) else ( - set CUDA_FLAGS= - ) - cmake -S . -B build -G "Ninja Multi-Config" ^ - -DCMAKE_BUILD_TYPE=${{ matrix.build }} ^ - -DGGML_CUDA=${{ matrix.cublas }} ^ - -DWHISPER_SDL2=${{ matrix.sdl2 }} ^ - -DSDL2_DIR="%SDL2_DIR%" ^ - -DCMAKE_POLICY_VERSION_MINIMUM=3.5 ^ - -DCMAKE_CUDA_FLAGS="%CUDA_FLAGS%" - set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 - cmake --build build --config ${{ matrix.build }} -j %NUMBER_OF_PROCESSORS% - - - name: Check sccache status after build - run: | - sccache --show-stats - - - name: Copy CUDA DLLs - run: | - Get-ChildItem "$env:CUDA_PATH\bin\" -Filter "*.dll" | - Copy-Item -Destination "build/bin/${{ matrix.build }}" + name: ${{ env.ARCHIVE }} + path: ${{ env.ARCHIVE }} - - name: Copy SDL2.dll - if: matrix.sdl2 == 'ON' - run: copy "$env:SDL2_DIR/../lib/${{ matrix.arch }}/SDL2.dll" build/bin/${{ matrix.build }} - - - name: Pack bin artifacts - shell: pwsh - run: | - Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath "whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip" - - - name: Upload binaries - if: ${{ needs.determine-tag.outputs.should_release }} - uses: actions/upload-artifact@v6 - with: - name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip - path: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}.zip - +# ════════════════════════════════════════════════════════════════════════════════ +# 3. ROCm — Windows +# ════════════════════════════════════════════════════════════════════════════════ windows-rocm: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} runs-on: windows-latest - needs: [determine-tag, prepare-matrix] + needs: [determine-tag, prepare-rocm-matrix] strategy: - matrix: ${{fromJson(needs.prepare-matrix.outputs.windows_matrix)}} + matrix: ${{ fromJson(needs.prepare-rocm-matrix.outputs.windows_matrix) }} fail-fast: false steps: - - name: Clone - uses: actions/checkout@v4 + - uses: actions/checkout@v4 - name: Install Ninja run: choco install ninja - - name: Fetch SDL2 and Patch Header (Robust) + - name: Fetch SDL2 and patch header if: matrix.sdl2 == 'ON' shell: pwsh run: | - $sdlVer = "${{ matrix.s2ver }}" - $url = "https://github.com/libsdl-org/SDL/releases/download/release-$sdlVer/SDL2-devel-$sdlVer-VC.zip" - - Write-Host "Downloading SDL2 from $url..." - Invoke-WebRequest -Uri $url -OutFile "sdl2.zip" - - Write-Host "Extracting SDL2..." + $url = "https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip" + Invoke-WebRequest -Uri $url -OutFile sdl2.zip 7z x sdl2.zip - - # 1. Locate CMake config dynamically - $cmakeFile = Get-ChildItem -Path . -Recurse -Filter "sdl2-config.cmake" | Select-Object -First 1 - if ($cmakeFile) { - $cmakeDir = $cmakeFile.DirectoryName - Write-Host "Found SDL2 CMake dir at: $cmakeDir" - echo "SDL2_DIR=$cmakeDir" >> $env:GITHUB_ENV - } else { - Write-Error "FATAL: Could not find sdl2-config.cmake inside extracted files!" - exit 1 - } - - # 2. Find and Patch SDL_endian.h recursively - Write-Host "Searching for SDL_endian.h..." - $headerFile = Get-ChildItem -Path . -Recurse -Filter "SDL_endian.h" | Select-Object -First 1 - - if ($headerFile) { - Write-Host "Found header at: $($headerFile.FullName)" - $content = Get-Content $headerFile.FullName -Raw - - # The Fix: Comment out the extern declaration - if ($content -match 'extern void _m_prefetch') { - $content = $content -replace 'extern void _m_prefetch\(void \*__P\);', '// extern void _m_prefetch(void *__P);' - Set-Content -Path $headerFile.FullName -Value $content - Write-Host "SUCCESS: Patched _m_prefetch in SDL_endian.h" - } else { - Write-Host "WARNING: _m_prefetch string not found. It might be a different SDL version or already patched." - } - } else { - # Debug output if file is missing - Write-Host "Listing root directories:" - Get-ChildItem -Path . -Directory | Format-Table Name - Write-Error "FATAL: Could not locate SDL_endian.h in the workspace." - exit 1 - } + $cmake = Get-ChildItem -Recurse -Filter "sdl2-config.cmake" | Select-Object -First 1 + if ($cmake) { echo "SDL2_DIR=$($cmake.DirectoryName)" >> $env:GITHUB_ENV } + else { Write-Error "sdl2-config.cmake not found"; exit 1 } + $hdr = Get-ChildItem -Recurse -Filter "SDL_endian.h" | Select-Object -First 1 + if ($hdr) { + $c = Get-Content $hdr.FullName -Raw + if ($c -match 'extern void _m_prefetch') { + $c = $c -replace 'extern void _m_prefetch\(void \*__P\);','// extern void _m_prefetch(void *__P);' + Set-Content $hdr.FullName $c + } + } else { Write-Error "SDL_endian.h not found"; exit 1 } - - name: Resolve ROCm version and download tarball + - name: Download ROCm tarball shell: bash run: | source ci/resolve-rocm-version.sh windows "${{ matrix.gfx_target }}" "${{ env.ROCM_VERSION }}" echo "DETECTED_ROCM_VERSION=$ROCM_RESOLVED_VERSION" >> $GITHUB_ENV curl -sL "$ROCM_TARBALL_URL" -o rocm.tar.gz - - name: Extract ROCm to C:\opt\rocm + - name: Extract ROCm + shell: pwsh run: | New-Item -ItemType Directory -Force -Path "C:\opt\rocm" tar -xzf rocm.tar.gz -C C:\opt\rocm --strip-components=1 - name: Map GPU target - id: gpu-target + id: gpu shell: bash run: | source ci/map-gpu-target.sh "${{ matrix.gfx_target }}" echo "mapped=$MAPPED_GPU_TARGET" >> $GITHUB_OUTPUT - - name: Configure + - name: Configure CMake shell: pwsh run: | - $env:HIP_PATH = "C:\opt\rocm" + $env:HIP_PATH = "C:\opt\rocm" $env:HIP_PLATFORM = "amd" $env:PATH = "$env:HIP_PATH\bin;$env:HIP_PATH\lib\llvm\bin;$env:PATH" - - $cmakeArgs = @( - "-S", ".", - "-B", "build", - "-G", "Ninja Multi-Config", - "-DGPU_TARGETS=${{ steps.gpu-target.outputs.mapped }}", - "-DGGML_HIP=ON", - "-DCMAKE_C_COMPILER=$env:HIP_PATH/lib/llvm/bin/amdclang.exe", - "-DCMAKE_CXX_COMPILER=$env:HIP_PATH/lib/llvm/bin/amdclang++.exe", - "-DCMAKE_HIP_COMPILER=$env:HIP_PATH/lib/llvm/bin/amdclang++.exe", - "-DCMAKE_C_FLAGS='-D__PRFCHWINTRIN_H'", - "-DCMAKE_CXX_FLAGS='-D__PRFCHWINTRIN_H'", - "-DCMAKE_HIP_FLAGS=--rocm-path=C:/opt/rocm", - "-DCMAKE_PREFIX_PATH=$env:HIP_PATH", - "-DCMAKE_BUILD_TYPE=${{ matrix.build }}", - "-DBUILD_SHARED_LIBS=ON", - "-DWHISPER_SDL2=${{ matrix.sdl2 }}" - ) - cmake @cmakeArgs + cmake -S . -B build ` + -G "Ninja Multi-Config" ` + -DGPU_TARGETS="${{ steps.gpu.outputs.mapped }}" ` + -DGGML_HIP=ON ` + -DCMAKE_C_COMPILER="$env:HIP_PATH/lib/llvm/bin/amdclang.exe" ` + -DCMAKE_CXX_COMPILER="$env:HIP_PATH/lib/llvm/bin/amdclang++.exe" ` + -DCMAKE_HIP_COMPILER="$env:HIP_PATH/lib/llvm/bin/amdclang++.exe" ` + "-DCMAKE_C_FLAGS='-D__PRFCHWINTRIN_H'" ` + "-DCMAKE_CXX_FLAGS='-D__PRFCHWINTRIN_H'" ` + "-DCMAKE_HIP_FLAGS=--rocm-path=C:/opt/rocm" ` + -DCMAKE_PREFIX_PATH="$env:HIP_PATH" ` + -DCMAKE_BUILD_TYPE=${{ matrix.build }} ` + -DBUILD_SHARED_LIBS=ON ` + -DWHISPER_SDL2=${{ matrix.sdl2 }} - name: Build shell: pwsh - run: | - cmake --build build --config ${{ matrix.build }} -j $env:NUMBER_OF_PROCESSORS + run: cmake --build build --config ${{ matrix.build }} -j $env:NUMBER_OF_PROCESSORS - - name: Copy ROCm core DLLs to build directory + - name: Copy ROCm DLLs + shell: pwsh run: | - $rocmVersion = if ($env:DETECTED_ROCM_VERSION) { $env:DETECTED_ROCM_VERSION } else { $env:ROCM_VERSION } - $buildBinPath = "build/bin/${{ matrix.build }}" - $rocmBinPath = "C:\opt\rocm\bin" - - Write-Host "Copying ROCm core DLL files..." - - if (Test-Path $rocmBinPath) { - # Copy files matching patterns - $filesToCopy = @( - "amdhip64_*.dll", - "amd_comgr*.dll", - "libhipblas.dll", - "rocblas.dll", - "rocsolver.dll", - "hipblaslt.dll", - "libhipblaslt.dll", - "hipblas.dll" - ) - - foreach ($pattern in $filesToCopy) { - $matchingFiles = Get-ChildItem -Path $rocmBinPath -Name $pattern -ErrorAction SilentlyContinue - if ($matchingFiles) { - foreach ($file in $matchingFiles) { - Copy-Item (Join-Path $rocmBinPath $file) (Join-Path $buildBinPath $file) - Write-Host "Copied: $file" - } - } - } - - # Copy rocblas/library - $rocblasLibPath = Join-Path $rocmBinPath "rocblas\library" - if (Test-Path $rocblasLibPath) { - Copy-Item -Path $rocblasLibPath -Destination (Join-Path $buildBinPath "rocblas\library") -Recurse -Force - Write-Host "Copied: rocblas\library" - } - - # Copy hipblaslt/library - $hipblasltLibPath = Join-Path $rocmBinPath "hipblaslt\library" - if (Test-Path $hipblasltLibPath) { - Copy-Item -Path $hipblasltLibPath -Destination (Join-Path $buildBinPath "hipblaslt\library") -Recurse -Force - Write-Host "Copied: hipblaslt\library" - } + $bin = "build/bin/${{ matrix.build }}" + $rocBin = "C:\opt\rocm\bin" + @("amdhip64_*.dll","amd_comgr*.dll","libhipblas.dll","rocblas.dll", + "rocsolver.dll","hipblaslt.dll","libhipblaslt.dll","hipblas.dll") | ForEach-Object { + Get-ChildItem $rocBin -Name $_ -ErrorAction SilentlyContinue | + ForEach-Object { Copy-Item (Join-Path $rocBin $_) (Join-Path $bin $_) } } + $rocLib = Join-Path $rocBin "rocblas\library" + if (Test-Path $rocLib) { Copy-Item $rocLib -Destination (Join-Path $bin "rocblas\library") -Recurse -Force } + $hipLib = Join-Path $rocBin "hipblaslt\library" + if (Test-Path $hipLib) { Copy-Item $hipLib -Destination (Join-Path $bin "hipblaslt\library") -Recurse -Force } - name: Copy SDL2.dll if: matrix.sdl2 == 'ON' + shell: pwsh run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" "build/bin/${{ matrix.build }}" - - name: Pack bin artifacts + - name: Package shell: pwsh run: | - # Create unique zip name with target suffix - $zipName = "whisper-bin-${{ matrix.gfx_target }}-windows-${{ matrix.arch }}.zip" - Compress-Archive -Path "build/bin/${{ matrix.build }}" -DestinationPath $zipName + $a = "whisper-${{ needs.determine-tag.outputs.version }}-windows-rocm-${{ matrix.gfx_target }}.zip" + Compress-Archive -Path "build/bin/${{ matrix.build }}/*" -DestinationPath $a -Force + "ARCHIVE=$a" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 - - name: Upload binaries - if: ${{ matrix.sdl2 == 'ON' && needs.determine-tag.outputs.should_release }} - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v4 + if: ${{ needs.determine-tag.outputs.should_release == 'true' }} with: - # Unique artifact name per matrix job - name: whisper-bin-${{ matrix.gfx_target }}-windows-${{ matrix.arch }}.zip - path: whisper-bin-${{ matrix.gfx_target }}-windows-${{ matrix.arch }}.zip - - emscripten: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - - strategy: - matrix: - build: [Release] + name: ${{ env.ARCHIVE }} + path: ${{ env.ARCHIVE }} - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Setup emsdk - uses: mymindstorm/setup-emsdk@v14 - - - name: Verify - run: emcc -v - - - name: Build - run: | - emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} - make - - ios-xcode-build: - runs-on: macos-latest +# ════════════════════════════════════════════════════════════════════════════════ +# 4. Vulkan — Linux +# ════════════════════════════════════════════════════════════════════════════════ + linux-vulkan: + runs-on: ubuntu-latest needs: determine-tag - strategy: - matrix: - build: [Release] - steps: - - name: Checkout code - uses: actions/checkout@v6 + - uses: actions/checkout@v4 - - name: Configure + - name: Install dependencies run: | - cp models/for-tests-ggml-base.en.bin models/ggml-base.en.bin - mkdir models/ggml-base.en-encoder.mlmodelc + sudo apt-get update + sudo apt-get install -y build-essential cmake git libsdl2-dev pkg-config libvulkan-dev vulkan-tools + sudo apt-get install -y glslc || sudo apt-get install -y shaderc - - name: Build - id: cmake_build + - name: Check Vulkan availability run: | - sysctl -a - mkdir build - cd build - cmake -G Xcode .. \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DWHISPER_BUILD_EXAMPLES=OFF \ - -DWHISPER_BUILD_TESTS=OFF \ - -DWHISPER_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=iOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - - - name: xcodebuild for swift package - id: xcodebuild - run: | - ./build-xcframework.sh - - - name: Build objc example - run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO FRAMEWORK_FOLDER_PATH=./build-ios build - - - name: Build swiftui example - run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build + if ! command -v glslc >/dev/null 2>&1 && ! command -v glslangValidator >/dev/null 2>&1; then + echo "::error::No GLSL compiler found (glslc / shaderc)"; exit 1 + fi - - name: Pack artifacts - id: pack_artifacts + - name: Configure CMake run: | - zip --symlinks -r whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip build-apple/whisper.xcframework - - - name: Upload artifacts - if: ${{ needs.determine-tag.outputs.should_release }} - uses: actions/upload-artifact@v6 - with: - path: whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip - name: whisper-${{ needs.determine-tag.outputs.tag_name }}-xcframework.zip - - android: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - - steps: - - name: Clone - uses: actions/checkout@v6 - with: - path: whisper - - - name: Install Java - uses: actions/setup-java@v5 - with: - distribution: zulu - java-version: 21 - - - name: Setup Android SDK - uses: android-actions/setup-android@v3 + cmake -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_VULKAN=ON \ + -DWHISPER_BUILD_EXAMPLES=ON \ + -DWHISPER_BUILD_TESTS=OFF \ + -DWHISPER_BUILD_SERVER=ON - name: Build - run: | - cd whisper/examples/whisper.android - ./gradlew assembleRelease --no-daemon + run: cmake --build build --config Release -j$(nproc) - - name: Build with external ggml + - name: Validate Vulkan artifacts run: | - export PATH_TO_GGML=$PWD/ggml - cd whisper/examples/whisper.android - ./gradlew assembleRelease --no-daemon - - android_java: - runs-on: ubuntu-22.04 - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: set up JDK 11 - uses: actions/setup-java@v5 - with: - java-version: '11' - distribution: 'temurin' - cache: gradle - - - name: Setup Android SDK - uses: android-actions/setup-android@v3 - with: - cmdline-tools-version: 9.0 + VFILES=$(find build -type f \( -iname "*vulkan*.so*" -o -iname "*vulkan*" \) 2>/dev/null | wc -l) + [ "$VFILES" -eq 0 ] && echo "::warning::No Vulkan-related artifacts found" - - name: Build + - name: Package run: | - cd examples/whisper.android.java - chmod +x ./gradlew - ./gradlew assembleRelease - - bindings-java: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - needs: ['windows'] - runs-on: windows-latest - steps: - - uses: actions/checkout@v6 - - - name: Install Java - uses: actions/setup-java@v5 - with: - distribution: zulu - java-version: 20 - - - name: Download Whisper Windows lib - uses: actions/download-artifact@v7 - with: - name: whisper_x64.dll + VER="${{ needs.determine-tag.outputs.version }}" + ARCHIVE="whisper-${VER}-linux-vulkan-x86_64.tar.gz" + STAGE="whisper-${VER}-linux-vulkan-x86_64" + mkdir -p "$STAGE" + cp -r build/bin/* "$STAGE/" 2>/dev/null || true + find build -name "*.so*" -exec cp {} "$STAGE/" \; 2>/dev/null || true + tar -czf "$ARCHIVE" "$STAGE" + echo "ARCHIVE=$ARCHIVE" >> $GITHUB_ENV - - name: Download GGML Windows lib - uses: actions/download-artifact@v7 + - uses: actions/upload-artifact@v4 + if: ${{ needs.determine-tag.outputs.should_release == 'true' }} with: - name: ggml_x64.dll + name: ${{ env.ARCHIVE }} + path: ${{ env.ARCHIVE }} - - name: Download GGML Base Windows lib - uses: actions/download-artifact@v7 - with: - name: ggml_base_x64.dll +# ════════════════════════════════════════════════════════════════════════════════ +# 5. Vulkan — Windows +# ════════════════════════════════════════════════════════════════════════════════ + windows-vulkan: + runs-on: windows-latest + needs: determine-tag - - name: Download GGML CPU Windows lib - uses: actions/download-artifact@v7 - with: - name: ggml_cpu_x64.dll + steps: + - uses: actions/checkout@v4 - - name: Download SDL2.dll - uses: actions/download-artifact@v7 - with: - name: x64_SDL2.dll + - uses: microsoft/setup-msbuild@v2 - - name: List downloaded files + - name: Install Vulkan SDK shell: pwsh run: | - Get-ChildItem -Path "." -Recurse -Filter "*.dll" + winget install --id KhronosGroup.VulkanSDK -e --silent --accept-package-agreements --accept-source-agreements + $sdk = Get-ChildItem "C:\VulkanSDK" -ErrorAction SilentlyContinue | Select-Object -First 1 + if (-not $sdk) { throw "Vulkan SDK not found under C:\VulkanSDK" } + "VULKAN_SDK=$($sdk.FullName)" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 - - name: Move DLL to correct location + - name: Fetch SDL2 shell: pwsh run: | - New-Item -Path "build\bin\Release" -ItemType Directory -Force - - Copy-Item -Path "whisper.dll" -Destination "build\bin\Release\whisper.dll" -Force - Write-Host "Copied whisper.dll to build\bin\Release\whisper.dll directory" - - Copy-Item -Path "ggml.dll" -Destination "build\bin\Release\ggml.dll" -Force - Write-Host "Copied ggml.dll to build\bin\Release\ggml.dll directory" - - Copy-Item -Path "ggml-base.dll" -Destination "build\bin\Release\ggml-base.dll" -Force - Write-Host "Copied ggml-base.dll to build\bin\Release\ggml-base.dll directory" - - Copy-Item -Path "ggml-cpu.dll" -Destination "build\bin\Release\ggml-cpu.dll" -Force - Write-Host "Copied ggml-cpu.dll to build\bin\Release\ggml-cpu.dll directory" - - Copy-Item -Path "SDL2.dll" -Destination "build\bin\Release\SDL2.dll" -Force - Write-Host "Copied SDL2.dll to build\bin\Release\SDL2.dll directory" + C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-2.28.5/SDL2-devel-2.28.5-VC.zip + 7z x sdl2.zip + "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-2.28.5/cmake" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 - - name: List build release files + - name: Configure CMake shell: pwsh run: | - Get-ChildItem -Path "build\Release" -Recurse -Filter "*.dll" + cmake -S . -B ./build -A x64 ` + -DCMAKE_BUILD_TYPE=Release ` + -DBUILD_SHARED_LIBS=ON ` + -DGGML_VULKAN=ON ` + -DWHISPER_SDL2=ON ` + -DVULKAN_SDK="$env:VULKAN_SDK" - name: Build - run: | - models\download-ggml-model.cmd tiny.en models/ - cd bindings/java - chmod +x ./gradlew - ./gradlew build --info + run: cd ./build && msbuild ALL_BUILD.vcxproj -t:build -p:configuration=Release -p:platform=x64 - - name: Pack jar artifacts + - name: Copy SDL2.dll shell: pwsh - run: | - Compress-Archive -Path "bindings/java/build/libs/whispercpp-*.jar" -DestinationPath "whispercpp.jar.zip" + run: copy "$env:SDL2_DIR/../lib/x64/SDL2.dll" build/bin/Release - - name: Upload jar - uses: actions/upload-artifact@v6 - with: - name: whispercpp.jar.zip - path: whispercpp.jar.zip - -# - name: Publish package -# if: ${{ github.ref == 'refs/heads/master' }} -# uses: gradle/gradle-build-action@v2.4.2 -# with: -# arguments: publish -# build-root-directory: bindings/java -# env: -# MAVEN_USERNAME: ${{ secrets.JIRA_USER }} -# MAVEN_PASSWORD: ${{ secrets.JIRA_PASS }} -# PGP_SECRET: ${{ secrets.GPG_PRIVATE_KEY }} -# PGP_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} - - quantize: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-22.04 - - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Test quantize + - name: Package + shell: pwsh run: | - ./models/download-ggml-model.sh tiny.en - cmake -B build - cmake --build build --config Release - ./build/bin/whisper-quantize models/ggml-tiny.en.bin models/ggml-tiny.en-q4_0.bin q4_0 - - release: - if: ${{ github.event.inputs.create_release == 'true' || github.event.inputs.pre_release_tag != '' || startsWith(github.ref, 'refs/tags/v') }} - - runs-on: ubuntu-latest - - needs: - - determine-tag - - ios-xcode-build - - windows - - windows-blas - - windows-cublas - - windows-rocm - - ubuntu-rocm + $a = "whisper-${{ needs.determine-tag.outputs.version }}-windows-vulkan-x64.zip" + Compress-Archive -Path "build/bin/Release/*" -DestinationPath $a -Force + "ARCHIVE=$a" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 + - uses: actions/upload-artifact@v4 + if: ${{ needs.determine-tag.outputs.should_release == 'true' }} with: - fetch-depth: 0 - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: release - evict-old-files: 1d + name: ${{ env.ARCHIVE }} + path: ${{ env.ARCHIVE }} - # Downloads all the artifacts from the previous jobs - - name: Download artifacts - id: download-artifact - uses: actions/download-artifact@v7 - with: - path: ./artifact - - - name: Move artifacts - id: move_artifacts - run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release - - - name: Create release - id: create_release - uses: ggml-org/action-create-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - tag_name: ${{ needs.determine-tag.outputs.tag_name }} - prerelease: ${{ github.event.inputs.pre_release_tag != '' }} - draft: true - - - name: Upload release - id: upload_release - uses: actions/github-script@v3 - with: - github-token: ${{secrets.GITHUB_TOKEN}} - script: | - const path = require('path'); - const fs = require('fs'); - const release_id = '${{ steps.create_release.outputs.id }}'; - for (let file of await fs.readdirSync('./artifact/release')) { - if (path.extname(file) === '.zip') { - console.log('uploadReleaseAsset', file); - await github.repos.uploadReleaseAsset({ - owner: context.repo.owner, - repo: context.repo.repo, - release_id: release_id, - name: file, - data: await fs.readFileSync(`./artifact/release/${file}`) - }); - } - } - - coreml-base-en: - if: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/master') || - github.event.inputs.create_release == 'true' || - github.event.inputs.pre_release_tag != '' || - startsWith(github.ref, 'refs/tags/v') }} - runs-on: macos-latest +# ════════════════════════════════════════════════════════════════════════════════ +# 6. NPU (VitisAI / RyzenAI) — Windows only (self-hosted runner) +# ════════════════════════════════════════════════════════════════════════════════ + windows-npu: + runs-on: [rai-170-sdk, Windows] needs: determine-tag + continue-on-error: true # runner may be offline; don't block release steps: - - name: Checkout code - uses: actions/checkout@v6 + - uses: actions/checkout@v4 - - name: Set environment variables - id: set_vars - run: | - echo "MODEL_NAME=base.en" >> $GITHUB_ENV - echo "GEN_MODEL_NAME=whisper-${{ needs.determine-tag.outputs.tag_name }}-ggml-base.en-encoder.mlmodelc" >> $GITHUB_ENV + - uses: microsoft/setup-msbuild@v2 - - name: Download model + - name: Install CMake if not available + shell: powershell run: | - ./models/download-ggml-model.sh ${{ env.MODEL_NAME }} + $installed = Get-Command cmake -ErrorAction SilentlyContinue + if (-not $installed) { + $ver = "3.28.1" + $url = "https://github.com/Kitware/CMake/releases/download/v$ver/cmake-$ver-windows-x86_64.msi" + Invoke-WebRequest -Uri $url -OutFile cmake.msi + Start-Process msiexec.exe -ArgumentList "/i cmake.msi /quiet /norestart" -Wait + $p = "C:\Program Files\CMake\bin" + $env:PATH = "$p;$env:PATH" + echo $p >> $env:GITHUB_PATH + cmake --version + if ($LASTEXITCODE -ne 0) { Write-Error "CMake install failed"; exit 1 } + } else { cmake --version } - - name: Generate CoreML model + - name: Download FlexML Runtime + shell: powershell run: | - python3.11 -m venv venv - source venv/bin/activate - pip install ane_transformers openai-whisper coremltools - ./models/generate-coreml-model.sh ${{ env.MODEL_NAME }} - - vad: - if: ${{ github.event_name == 'push' || github.event_name == 'pull_request' || - github.event.inputs.run_type == 'full-ci' }} - runs-on: ubuntu-latest - - steps: - - name: Checkout - uses: actions/checkout@v6 + Invoke-WebRequest -Uri "${{ env.FLEXML_URL }}" -OutFile flexmlrt.zip + if (-Not (Test-Path "flexmlrt.zip")) { Write-Error "flexmlrt.zip not downloaded"; exit 1 } + if ((Get-Item "flexmlrt.zip").Length -eq 0) { Write-Error "flexmlrt.zip is empty"; exit 1 } + Write-Host "FlexML: $([math]::Round((Get-Item 'flexmlrt.zip').Length/1MB,2)) MB downloaded" - - name: Build - shell: bash + - name: Extract FlexML Runtime + shell: powershell run: | - cmake -B build - cmake --build build --config Release + tar xvf flexmlrt.zip + if ($LASTEXITCODE -ne 0) { Write-Error "Extraction failed"; exit 1 } + $dirs = Get-ChildItem -Directory | Where-Object { $_.Name -like "flexmlrt*" } + if (-not $dirs) { Write-Error "No flexmlrt directory found after extraction"; exit 1 } + Write-Host "Extracted: $($dirs.Name)" - - name: Test - shell: bash + - name: Setup FlexML, configure and build + shell: cmd run: | - ctest -R ^test-vad$ --test-dir build --output-on-failure -VV - -# TODO: simplify the following workflows using a matrix - ggml-ci-x64-cpu-low-perf: - runs-on: ubuntu-22.04 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ggml-ci-x64-cpu-low-perf - evict-old-files: 1d + cd flexmlrt + call setup.bat + if errorlevel 1 ( echo ERROR: FlexML setup.bat failed! & exit /b 1 ) + cd .. + cmake -B build -A x64 -DCMAKE_BUILD_TYPE=Release -DWHISPER_VITISAI=ON + if errorlevel 1 ( echo ERROR: CMake configure failed! & exit /b 1 ) + cmake --build build --config Release -j + if errorlevel 1 ( echo ERROR: Build failed! & exit /b 1 ) + + - name: List build output + shell: powershell + run: | + if (Test-Path "build/bin/Release") { + Get-ChildItem -Path "build/bin/Release" -Recurse | Format-Table Name, Length + } else { Write-Error "build/bin/Release not found"; exit 1 } + + - name: Copy FlexML DLLs to build output + shell: powershell + run: | + $copied = 0 + if (Test-Path "flexmlrt/bin") { + $d = Get-ChildItem -Path "flexmlrt/bin/*.dll" -ErrorAction SilentlyContinue + if ($d) { Copy-Item "flexmlrt/bin/*.dll" "build/bin/Release/" -Force; $copied += $d.Count } + } + if (Test-Path "flexmlrt/lib") { + $d = Get-ChildItem -Path "flexmlrt/lib/*.dll" -ErrorAction SilentlyContinue + if ($d) { Copy-Item "flexmlrt/lib/*.dll" "build/bin/Release/" -Force; $copied += $d.Count } + } + Write-Host "FlexML DLLs copied: $copied" - - name: Dependencies - id: depends + - name: Package + shell: powershell run: | - sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev + $a = "whisper-${{ needs.determine-tag.outputs.version }}-windows-npu-x64.zip" + Compress-Archive -Path "build/bin/Release/*" -DestinationPath $a -Force + if (-not (Test-Path $a)) { Write-Error "Package creation failed"; exit 1 } + $mb = [math]::Round((Get-Item $a).Length/1MB,2) + Write-Host "Package: $a ($mb MB)" + "ARCHIVE=$a" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 - - name: Test - id: ggml-ci + - name: Build summary + shell: powershell run: | - LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt - - ggml-ci-arm64-cpu-low-perf: - runs-on: ubuntu-22.04-arm - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 + Write-Host "NPU build complete. Artifact: $env:ARCHIVE" - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 + - uses: actions/upload-artifact@v4 + if: ${{ needs.determine-tag.outputs.should_release == 'true' }} with: - key: ggml-ci-arm64-cpu-low-perf - evict-old-files: 1d - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev + name: ${{ env.ARCHIVE }} + path: ${{ env.ARCHIVE }} - - name: Test - id: ggml-ci - run: | - LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt - - ggml-ci-x64-cpu-high-perf: - runs-on: ubuntu-22.04 +# ════════════════════════════════════════════════════════════════════════════════ +# 7. CPU — Linux +# ════════════════════════════════════════════════════════════════════════════════ + linux-cpu: + runs-on: ubuntu-latest + needs: determine-tag steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ggml-ci-x64-cpu-high-perf - evict-old-files: 1d + - uses: actions/checkout@v4 - - name: Dependencies - id: depends + - name: Install dependencies run: | sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev + sudo apt-get install -y build-essential cmake git libsdl2-dev pkg-config + echo "cmake $(cmake --version | head -1)" + echo "gcc $(gcc --version | head -1)" - - name: Test - id: ggml-ci + - name: Configure CMake run: | - LLAMA_ARG_THREADS=$(nproc) bash ./ci/run.sh ./tmp/results ./tmp/mnt - - ggml-ci-arm64-cpu-high-perf: - runs-on: ubuntu-22.04-arm - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 + cmake -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DWHISPER_BUILD_EXAMPLES=ON \ + -DWHISPER_BUILD_TESTS=OFF \ + -DWHISPER_BUILD_SERVER=ON - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ggml-ci-arm64-cpu-high-perf - evict-old-files: 1d + - name: Build + run: cmake --build build --config Release -j$(nproc) - - name: Dependencies - id: depends + - name: List build output run: | - sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev + find build/bin -type f | sort + find build/bin -type f -executable | while read f; do ls -lh "$f"; done - - name: Test - id: ggml-ci + - name: Package run: | - LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + VER="${{ needs.determine-tag.outputs.version }}" + ARCHIVE="whisper-${VER}-linux-cpu-x86_64.tar.gz" + STAGE="whisper-${VER}-linux-cpu-x86_64" + mkdir -p "$STAGE" + cp -r build/bin/* "$STAGE/" 2>/dev/null || true + find build -name "*.so*" -exec cp {} "$STAGE/" \; 2>/dev/null || true + printf "whisper.cpp CPU build for Linux\nDate: %s\nArch: %s\n" \ + "$(date -u +"%Y-%m-%d %H:%M:%S UTC")" "$(uname -m)" > "$STAGE/README.txt" + tar -czf "$ARCHIVE" "$STAGE" + echo "ARCHIVE=$ARCHIVE" >> $GITHUB_ENV - ggml-ci-arm64-cpu-high-perf-sve: - runs-on: ubuntu-22.04-arm - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 + - uses: actions/upload-artifact@v4 + if: ${{ needs.determine-tag.outputs.should_release == 'true' }} with: - key: ggml-ci-arm64-cpu-high-perf-sve - evict-old-files: 1d - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev - - - name: Test - id: ggml-ci - run: | - LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt - - ggml-ci-x64-nvidia-cuda: - runs-on: [self-hosted, Linux, mnt-root, NVIDIA] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 + name: ${{ env.ARCHIVE }} + path: ${{ env.ARCHIVE }} - - name: Test - id: ggml-ci - run: | - nvidia-smi - GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp - - ggml-ci-x64-nvidia-vulkan-cm: - runs-on: [self-hosted, Linux, mnt-root, NVIDIA] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Test - id: ggml-ci - run: | - vulkaninfo --summary - GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp - - ggml-ci-x64-nvidia-vulkan-cm2: - runs-on: [self-hosted, Linux, mnt-root, NVIDIA, COOPMAT2] +# ════════════════════════════════════════════════════════════════════════════════ +# 8. CPU — Windows +# ════════════════════════════════════════════════════════════════════════════════ + windows-cpu: + runs-on: windows-latest + needs: determine-tag steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Test - id: ggml-ci - run: | - vulkaninfo --summary - GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp - - #ggml-ci-x64-cpu-amx: - # runs-on: [self-hosted, Linux, X64, CPU, AMX] - - # steps: - # - name: Clone - # id: checkout - # uses: actions/checkout@v6 - - # - name: Test - # id: ggml-ci - # run: | - # bash ./ci/run.sh ~/results/whisper.cpp /mnt/whisper.cpp - - ggml-ci-mac-metal: - runs-on: [self-hosted, macOS, ARM64] + - uses: actions/checkout@v4 - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 + - uses: microsoft/setup-msbuild@v2 - - name: Test - id: ggml-ci + - name: Fetch SDL2 + shell: pwsh run: | - GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/whisper.cpp ~/mnt/whisper.cpp - - ggml-ci-mac-vulkan: - runs-on: [self-hosted, macOS, ARM64] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 + C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-2.28.5/SDL2-devel-2.28.5-VC.zip + 7z x sdl2.zip + "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-2.28.5/cmake" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 - - name: Test - id: ggml-ci + - name: Configure CMake + shell: pwsh run: | - vulkaninfo --summary - GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/whisper.cpp ~/mnt/whisper.cpp + cmake -S . -B ./build -A x64 ` + -DCMAKE_BUILD_TYPE=Release ` + -DBUILD_SHARED_LIBS=ON ` + -DWHISPER_SDL2=ON - # AMD ROCm GPU Testing (self-hosted runners) - test-rocm-linux: - runs-on: ${{ matrix.runner }} - - strategy: - fail-fast: false - matrix: - include: - - gfx_target: gfx1151 - runner: [stx-halo, Linux] - # Uncomment when runners are available: - # - gfx_target: gfx1100 - # runner: [navi31, Linux] - # - gfx_target: gfx1200 - # runner: [rdna4, Linux] - # - gfx_target: gfx1150 - # runner: [rai300_400, Linux] - - concurrency: - group: rocm-test-linux-${{ matrix.gfx_target }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - - timeout-minutes: 120 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 + - name: Build + run: cd ./build && msbuild ALL_BUILD.vcxproj -t:build -p:configuration=Release -p:platform=x64 - - name: Cleanup before run - uses: ./.github/actions/cleanup-processes-linux + - name: Copy SDL2.dll + shell: pwsh + run: copy "$env:SDL2_DIR/../lib/x64/SDL2.dll" build/bin/Release - - name: Verify ROCm installation - run: | - echo "=== ROCm Environment ===" - rocm-smi || echo "rocm-smi not found" - rocminfo | head -40 || echo "rocminfo not found" - hipcc --version || echo "hipcc not found" - echo "=== GPU Info ===" - rocm-smi --showproductname 2>/dev/null || true - - - name: Test - id: ggml-ci + - name: Package + shell: pwsh run: | - GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS=${{ matrix.gfx_target }} GG_BUILD_LOW_PERF=1 \ - bash ./ci/run.sh ~/results/whisper.cpp-rocm-${{ matrix.gfx_target }} /mnt/whisper.cpp - - - name: Cleanup after run - if: always() - uses: ./.github/actions/cleanup-processes-linux + $a = "whisper-${{ needs.determine-tag.outputs.version }}-windows-cpu-x64.zip" + Compress-Archive -Path "build/bin/Release/*" -DestinationPath $a -Force + "ARCHIVE=$a" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 - test-rocm-windows: - runs-on: ${{ matrix.runner }} + - uses: actions/upload-artifact@v4 + if: ${{ needs.determine-tag.outputs.should_release == 'true' }} + with: + name: ${{ env.ARCHIVE }} + path: ${{ env.ARCHIVE }} - strategy: - fail-fast: false - matrix: - include: - - gfx_target: gfx1151 - runner: [stx-halo, Windows] - # Uncomment when runners are available: - # - gfx_target: gfx1100 - # runner: [navi31, Windows] - # - gfx_target: gfx1200 - # runner: [rdna4, Windows] - # - gfx_target: gfx1150 - # runner: [rai300_400, Windows] - - concurrency: - group: rocm-test-windows-${{ matrix.gfx_target }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - - timeout-minutes: 120 +# ════════════════════════════════════════════════════════════════════════════════ +# 9. Publish GitHub Release +# ════════════════════════════════════════════════════════════════════════════════ + release: + if: always() && needs.determine-tag.outputs.should_release == 'true' + runs-on: ubuntu-latest + needs: + - determine-tag + - linux-rocm + - windows-rocm + - linux-vulkan + - windows-vulkan + - windows-npu + - linux-cpu + - windows-cpu steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 + - uses: actions/checkout@v4 + with: + fetch-depth: 0 - - name: Cleanup before run - uses: ./.github/actions/cleanup-processes-windows + - name: Download all artifacts + uses: actions/download-artifact@v4 + with: + path: ./artifacts - - name: Verify ROCm installation - shell: pwsh + - name: Flatten artifacts into release/ run: | - Write-Host "=== ROCm Environment ===" - & "$env:HIP_PATH\bin\rocm-smi.exe" 2>$null - & "$env:HIP_PATH\bin\hipcc.exe" --version 2>$null + mkdir -p release + find ./artifacts -mindepth 2 \( -name '*.zip' -o -name '*.tar.gz' \) -exec mv {} release/ \; + echo "Release assets:" + ls -lh release/ - - name: Configure ROCm environment - shell: pwsh - run: | - $rocmPath = $env:HIP_PATH - if (-not $rocmPath) { $rocmPath = "C:\opt\rocm" } - echo "HIP_PATH=$rocmPath" >> $env:GITHUB_ENV - echo "ROCM_PATH=$rocmPath" >> $env:GITHUB_ENV - echo "HIP_PLATFORM=amd" >> $env:GITHUB_ENV - echo "$rocmPath\bin" >> $env:GITHUB_PATH - echo "$rocmPath\lib\llvm\bin" >> $env:GITHUB_PATH - - - name: Test - id: ggml-ci - shell: bash - run: | - GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS=${{ matrix.gfx_target }} GG_BUILD_LOW_PERF=1 \ - bash ./ci/run.sh ~/results/whisper.cpp-rocm-${{ matrix.gfx_target }} /mnt/whisper.cpp + - name: Create release + id: create_release + uses: ggml-org/action-create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ needs.determine-tag.outputs.tag_name }} + release_name: "whisper.cpp ${{ needs.determine-tag.outputs.tag_name }} — AMD Builds" + prerelease: ${{ github.event.inputs.pre_release_tag != '' }} + draft: false - - name: Cleanup after run - if: always() - uses: ./.github/actions/cleanup-processes-windows + - name: Upload release assets + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const fs = require('fs'); + const path = require('path'); + const id = '${{ steps.create_release.outputs.id }}'; + for (const file of fs.readdirSync('./release')) { + if (!file.endsWith('.zip') && !file.endsWith('.tar.gz')) continue; + console.log('Uploading:', file); + await github.rest.repos.uploadReleaseAsset({ + owner: context.repo.owner, + repo: context.repo.repo, + release_id: id, + name: file, + data: fs.readFileSync(`./release/${file}`), + }); + } diff --git a/.github/workflows/sync.yml b/.github/workflows/sync.yml new file mode 100644 index 00000000000..9333aa774e1 --- /dev/null +++ b/.github/workflows/sync.yml @@ -0,0 +1,148 @@ +name: Sync Upstream & Auto-Release + +# Runs daily to detect new upstream whisper.cpp releases. +# When a new release is found: +# - clean merge → pushes main + creates tag vX.Y.Z → triggers build.yml +# - conflict → opens a PR for manual resolution, does NOT tag + +on: + schedule: + - cron: '0 6 * * *' # daily at 06:00 UTC + workflow_dispatch: + inputs: + upstream_tag: + description: 'Force a specific upstream tag (e.g. v1.8.5). Leave blank to auto-detect latest.' + required: false + type: string + dry_run: + description: 'Dry run — merge locally but do not push or tag' + required: false + type: boolean + default: false + +permissions: + contents: write + pull-requests: write + +jobs: + sync-and-tag: + runs-on: ubuntu-latest + + steps: + - name: Checkout (full history + tags) + uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Configure git identity + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + - name: Add upstream remote + run: git remote add upstream https://github.com/ggerganov/whisper.cpp || true + + # ── Detect which upstream release to target ────────────────────────── + - name: Detect upstream release + id: upstream + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + if [[ -n "${{ github.event.inputs.upstream_tag }}" ]]; then + UPSTREAM_TAG="${{ github.event.inputs.upstream_tag }}" + echo "Using manually specified tag: $UPSTREAM_TAG" + else + UPSTREAM_TAG=$(gh api repos/ggerganov/whisper.cpp/releases/latest --jq '.tag_name') + echo "Latest upstream release: $UPSTREAM_TAG" + fi + + # Strip leading 'v' for use in artifact filenames + VERSION="${UPSTREAM_TAG#v}" + + echo "tag=$UPSTREAM_TAG" >> $GITHUB_OUTPUT + echo "version=$VERSION" >> $GITHUB_OUTPUT + + # ── Check if we already have a release for this upstream version ────── + - name: Check if already released + id: check + run: | + git fetch --tags + # Our tags match the upstream tag exactly (e.g. v1.8.4) + EXISTING=$(git tag -l "${{ steps.upstream.outputs.tag }}" | head -1) + if [[ -n "$EXISTING" ]]; then + echo "already_released=true" >> $GITHUB_OUTPUT + echo "::notice::Already have release $EXISTING — nothing to do." + else + echo "already_released=false" >> $GITHUB_OUTPUT + echo "New upstream release detected: ${{ steps.upstream.outputs.tag }}" + fi + + # ── Merge upstream tag into main ───────────────────────────────────── + - name: Fetch upstream tags + if: steps.check.outputs.already_released == 'false' + run: git fetch upstream --tags + + - name: Attempt merge + if: steps.check.outputs.already_released == 'false' + run: | + git merge "${{ steps.upstream.outputs.tag }}" --no-edit || echo "CONFLICT=true" >> $GITHUB_ENV + + # ── Conflict path: open PR, do NOT tag ─────────────────────────────── + - name: Open conflict PR + if: steps.check.outputs.already_released == 'false' && env.CONFLICT == 'true' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + CONFLICT_FILES=$(git diff --name-only --diff-filter=U | tr '\n' ', ') + git merge --abort + + BRANCH="sync/${{ steps.upstream.outputs.tag }}" + git checkout -b "$BRANCH" + # Resolve conflicts by preferring upstream (theirs) so the branch is pushable + git merge "${{ steps.upstream.outputs.tag }}" --no-edit --strategy-option=theirs || true + git add -A + git commit -m "chore: merge upstream ${{ steps.upstream.outputs.tag }} (auto-resolved via theirs)" --allow-empty + git push origin "$BRANCH" + + gh pr create \ + --title "Sync upstream ${{ steps.upstream.outputs.tag }} — conflict resolution needed" \ + --body "## Upstream sync: ${{ steps.upstream.outputs.tag }} + +Conflicts were detected during automatic merge. Files affected: + +\`\`\` +$CONFLICT_FILES +\`\`\` + +**This PR was auto-resolved using upstream (theirs) as a baseline — please review the diff carefully before merging.** + +Once merged, manually create tag \`${{ steps.upstream.outputs.tag }}\` on main to trigger the release build: +\`\`\`bash +git tag ${{ steps.upstream.outputs.tag }} +git push origin ${{ steps.upstream.outputs.tag }} +\`\`\` +" \ + --base main \ + --head "$BRANCH" + + echo "::warning::Merge conflict detected — PR opened for manual resolution. Release build NOT triggered." + + # ── Clean merge path: push main + tag → triggers build.yml ─────────── + - name: Push merged main + if: steps.check.outputs.already_released == 'false' && env.CONFLICT != 'true' && github.event.inputs.dry_run != 'true' + run: git push origin HEAD:main + + - name: Create and push release tag + if: steps.check.outputs.already_released == 'false' && env.CONFLICT != 'true' && github.event.inputs.dry_run != 'true' + run: | + TAG="${{ steps.upstream.outputs.tag }}" + git tag "$TAG" -m "AMD builds for upstream $TAG" + git push origin "$TAG" + echo "::notice::Pushed tag $TAG — build.yml will now run and publish the release." + + - name: Dry-run summary + if: github.event.inputs.dry_run == 'true' && steps.check.outputs.already_released == 'false' && env.CONFLICT != 'true' + run: | + echo "DRY RUN — merge was clean. Would have pushed main and tagged ${{ steps.upstream.outputs.tag }}." + echo "Re-run with dry_run=false to publish." diff --git a/CMakeLists.txt b/CMakeLists.txt index a0f74041321..d4dc318056a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,6 +91,7 @@ endif() option(WHISPER_COREML "whisper: enable Core ML framework" OFF) option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF) option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF) +option(WHISPER_VITISAI "whisper: support for AMD Vitis AI" OFF) # Required for relocatable CMake package include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake) diff --git a/README.md b/README.md index 474a1301da7..92160e40276 100644 --- a/README.md +++ b/README.md @@ -1,862 +1,275 @@ -# whisper.cpp +# whisper-cpp-amd + + + GitHub release (latest by date) + + + Latest release date + + + License + + + ROCm 7.x + + + Powered by whisper.cpp + + + Platform: Windows | Linux + + + GPU Targets + + + NPU: Ryzen AI 300 + + +Pre-built releases of **[whisper.cpp](https://github.com/ggerganov/whisper.cpp)** with full AMD hardware acceleration — **ROCm™ GPU**, **Vulkan GPU**, **RyzenAI NPU**, and optimised **CPU** builds — for Windows and Linux. + +Releases track upstream whisper.cpp exactly: every time upstream publishes a new version, our automated pipeline syncs, builds all backends, and publishes a matching release within 24 hours. No manual steps. No lag. + +> [!IMPORTANT] +> **No ROCm installation required.** All ROCm and Vulkan runtime libraries are bundled inside every release archive. Download, extract, and run. + +> [!NOTE] +> This project is maintained by the [Lemonade SDK](https://github.com/lemonade-sdk/lemonade) team. Our primary focus is seamless integration with Lemonade and similar AMD-optimised AI applications. We welcome collaborations and contributions that advance AMD whisper.cpp support. -![whisper.cpp](https://user-images.githubusercontent.com/1991296/235238348-05d0f6a4-da44-4900-a1de-d0707e75b763.jpeg) - -[![Actions Status](https://github.com/ggml-org/whisper.cpp/workflows/CI/badge.svg)](https://github.com/ggml-org/whisper.cpp/actions) -[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) -[![Conan Center](https://shields.io/conan/v/whisper-cpp)](https://conan.io/center/whisper-cpp) -[![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/) - -Stable: [v1.8.1](https://github.com/ggml-org/whisper.cpp/releases/tag/v1.8.1) / [Roadmap](https://github.com/orgs/ggml-org/projects/4/) - -High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model: - -- Plain C/C++ implementation without dependencies -- Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](#core-ml-support) -- AVX intrinsics support for x86 architectures -- [VSX intrinsics support for POWER architectures](#power-vsx-intrinsics) -- Mixed F16 / F32 precision -- [Integer quantization support](#quantization) -- Zero memory allocations at runtime -- [Vulkan support](#vulkan-gpu-support) -- Support for CPU-only inference -- [Efficient GPU support for NVIDIA](#nvidia-gpu-support) -- [OpenVINO Support](#openvino-support) -- [Ascend NPU Support](#ascend-npu-support) -- [Moore Threads GPU Support](#moore-threads-gpu-support) -- [C-style API](https://github.com/ggml-org/whisper.cpp/blob/master/include/whisper.h) -- [Voice Activity Detection (VAD)](#voice-activity-detection-vad) - -Supported platforms: - -- [x] Mac OS (Intel and Arm) -- [x] [iOS](examples/whisper.objc) -- [x] [Android](examples/whisper.android) -- [x] [Java](bindings/java/README.md) -- [x] Linux / [FreeBSD](https://github.com/ggml-org/whisper.cpp/issues/56#issuecomment-1350920264) -- [x] [WebAssembly](examples/whisper.wasm) -- [x] Windows ([MSVC](https://github.com/ggml-org/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggml-org/whisper.cpp/issues/168)) -- [x] [Raspberry Pi](https://github.com/ggml-org/whisper.cpp/discussions/166) -- [x] [Docker](https://github.com/ggml-org/whisper.cpp/pkgs/container/whisper.cpp) - -The entire high-level implementation of the model is contained in [whisper.h](include/whisper.h) and [whisper.cpp](src/whisper.cpp). -The rest of the code is part of the [`ggml`](https://github.com/ggml-org/ggml) machine learning library. - -Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications. -As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: [whisper.objc](examples/whisper.objc) - -https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4 - -You can also easily make your own offline voice assistant application: [command](examples/command) - -https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4 - -On Apple Silicon, the inference runs fully on the GPU via Metal: - -https://github.com/ggml-org/whisper.cpp/assets/1991296/c82e8f86-60dc-49f2-b048-d2fdbd6b5225 - -## Quick start +--- -First clone the repository: +## 🎯 Supported Devices -```bash -git clone https://github.com/ggml-org/whisper.cpp.git -``` +### ROCm GPU -Navigate into the directory: +| Architecture | Devices | +|---|---| +| **gfx1151** — RDNA3.5 APU | Ryzen AI MAX+ Pro 395 (Strix Halo) | +| **gfx1150** — RDNA3.5 APU | Ryzen AI 300 series (Strix Point) | +| **gfx120X** — RDNA4 dGPU | Radeon RX 9070 XT / 9070 / 9060 XT / 9060 | +| **gfx110X** — RDNA3 dGPU & iGPU | RX 7900 XTX/XT/GRE, RX 7800 XT, RX 7700 XT, RX 7600 XT/7600; iGPU Radeon 780M / 760M / 740M | -``` -cd whisper.cpp -``` +### Vulkan GPU -Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example: +Any GPU with a Vulkan 1.3-capable driver — AMD, NVIDIA, Intel. Covers iGPUs on all platforms where a Vulkan driver is present. -```bash -sh ./models/download-ggml-model.sh base.en -``` +### NPU — RyzenAI -Now build the [whisper-cli](examples/cli) example and transcribe an audio file like this: +| Device | OS | Requirement | +|---|---|---| +| Ryzen AI 300 series (Strix Point / Strix Halo) | Windows only | NPU driver ≥ `.280` | -```bash -# build the project -cmake -B build -cmake --build build -j --config Release +### CPU -# transcribe an audio file -./build/bin/whisper-cli -f samples/jfk.wav -``` +Optimised CPU-only builds for x86-64. Windows and Linux. No GPU required. --- -For a quick demo, simply run `make base.en`. - -The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`. +## 📦 Downloads -For detailed usage instructions, run: `./build/bin/whisper-cli -h` +All builds are self-contained — no separate driver or runtime installation needed (except the NPU driver for the NPU build). -Note that the [whisper-cli](examples/cli) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool. -For example, you can use `ffmpeg` like this: +### ROCm — GPU Accelerated -```bash -ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav -``` - -## More audio samples - -If you want some extra audio samples to play with, simply run: - -``` -make -j samples -``` - -This will download a few more audio files from Wikipedia and convert them to 16-bit WAV format via `ffmpeg`. +| GPU Target | Linux | Windows | +|---|---|---| +| **gfx1151** (Ryzen AI MAX+ Pro 395) | [![Linux gfx1151](https://img.shields.io/badge/Download-Linux%20gfx1151-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | [![Windows gfx1151](https://img.shields.io/badge/Download-Windows%20gfx1151-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | +| **gfx1150** (Ryzen AI 300) | [![Linux gfx1150](https://img.shields.io/badge/Download-Linux%20gfx1150-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | [![Windows gfx1150](https://img.shields.io/badge/Download-Windows%20gfx1150-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | +| **gfx120X** (RDNA4 dGPU) | [![Linux gfx120X](https://img.shields.io/badge/Download-Linux%20gfx120X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | [![Windows gfx120X](https://img.shields.io/badge/Download-Windows%20gfx120X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | +| **gfx110X** (RDNA3 dGPU & iGPU) | [![Linux gfx110X](https://img.shields.io/badge/Download-Linux%20gfx110X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | [![Windows gfx110X](https://img.shields.io/badge/Download-Windows%20gfx110X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | -You can download and run the other models as follows: +### Vulkan — Cross-Vendor GPU -``` -make -j tiny.en -make -j tiny -make -j base.en -make -j base -make -j small.en -make -j small -make -j medium.en -make -j medium -make -j large-v1 -make -j large-v2 -make -j large-v3 -make -j large-v3-turbo -``` +| Linux | Windows | +|---|---| +| [![Linux Vulkan](https://img.shields.io/badge/Download-Linux%20Vulkan-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | [![Windows Vulkan](https://img.shields.io/badge/Download-Windows%20Vulkan-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | -## Memory usage +### NPU — RyzenAI (Windows only) -| Model | Disk | Mem | -| ------ | ------- | ------- | -| tiny | 75 MiB | ~273 MB | -| base | 142 MiB | ~388 MB | -| small | 466 MiB | ~852 MB | -| medium | 1.5 GiB | ~2.1 GB | -| large | 2.9 GiB | ~3.9 GB | +| Windows | +|---| +| [![Windows NPU](https://img.shields.io/badge/Download-Windows%20NPU%20(RyzenAI)-red?logo=amd&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | -## POWER VSX Intrinsics +> Requires NPU driver ≥ `.280` and a pre-compiled `.rai` encoder model from [AMD's Hugging Face collection](https://huggingface.co/collections/amd/ryzen-ai-16-whisper-npu-optimized-onnx-models). Place the `.rai` file alongside your `ggml-*.bin` model — whisper-cli picks it up automatically. -`whisper.cpp` supports POWER architectures and includes code which -significantly speeds operation on Linux running on POWER9/10, making it -capable of faster-than-realtime transcription on underclocked Raptor -Talos II. Ensure you have a BLAS package installed, and replace the -standard cmake setup with: +### CPU — No GPU Required -```bash -# build with GGML_BLAS defined -cmake -B build -DGGML_BLAS=1 -cmake --build build -j --config Release -./build/bin/whisper-cli [ .. etc .. ] -``` +| Linux | Windows | +|---|---| +| [![Linux CPU](https://img.shields.io/badge/Download-Linux%20CPU-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | [![Windows CPU](https://img.shields.io/badge/Download-Windows%20CPU-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | -## Quantization +--- -`whisper.cpp` supports integer quantization of the Whisper `ggml` models. -Quantized models require less memory and disk space and depending on the hardware can be processed more efficiently. +## 🧪 Quick Smoketest -Here are the steps for creating and using a quantized model: +### 1. Get a model ```bash -# quantize a model with Q5_0 method -cmake -B build -cmake --build build -j --config Release -./build/bin/quantize models/ggml-base.en.bin models/ggml-base.en-q5_0.bin q5_0 - -# run the examples as usual, specifying the quantized model file -./build/bin/whisper-cli -m models/ggml-base.en-q5_0.bin ./samples/gb0.wav -``` - -## Core ML support - -On Apple Silicon devices, the Encoder inference can be executed on the Apple Neural Engine (ANE) via Core ML. This can result in significant -speed-up - more than x3 faster compared with CPU-only execution. Here are the instructions for generating a Core ML model and using it with `whisper.cpp`: - -- Install Python dependencies needed for the creation of the Core ML model: - - ```bash - pip install ane_transformers - pip install openai-whisper - pip install coremltools - ``` - - - To ensure `coremltools` operates correctly, please confirm that [Xcode](https://developer.apple.com/xcode/) is installed and execute `xcode-select --install` to install the command-line tools. - - Python 3.11 is recommended. - - MacOS Sonoma (version 14) or newer is recommended, as older versions of MacOS might experience issues with transcription hallucination. - - [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step: - - To create an environment, use: `conda create -n py311-whisper python=3.11 -y` - - To activate the environment, use: `conda activate py311-whisper` - -- Generate a Core ML model. For example, to generate a `base.en` model, use: - - ```bash - ./models/generate-coreml-model.sh base.en - ``` - - This will generate the folder `models/ggml-base.en-encoder.mlmodelc` - -- Build `whisper.cpp` with Core ML support: - - ```bash - # using CMake - cmake -B build -DWHISPER_COREML=1 - cmake --build build -j --config Release - ``` - -- Run the examples as usual. For example: - - ```text - $ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav - - ... - - whisper_init_state: loading Core ML model from 'models/ggml-base.en-encoder.mlmodelc' - whisper_init_state: first run on a device may take a while ... - whisper_init_state: Core ML model loaded - - system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | COREML = 1 | - - ... - ``` - - The first run on a device is slow, since the ANE service compiles the Core ML model to some device-specific format. - Next runs are faster. - -For more information about the Core ML implementation please refer to PR [#566](https://github.com/ggml-org/whisper.cpp/pull/566). - -## OpenVINO support - -On platforms that support [OpenVINO](https://github.com/openvinotoolkit/openvino), the Encoder inference can be executed -on OpenVINO-supported devices including x86 CPUs and Intel GPUs (integrated & discrete). - -This can result in significant speedup in encoder performance. Here are the instructions for generating the OpenVINO model and using it with `whisper.cpp`: - -- First, setup python virtual env. and install python dependencies. Python 3.10 is recommended. - - Windows: - - ```powershell - cd models - python -m venv openvino_conv_env - openvino_conv_env\Scripts\activate - python -m pip install --upgrade pip - pip install -r requirements-openvino.txt - ``` - - Linux and macOS: - - ```bash - cd models - python3 -m venv openvino_conv_env - source openvino_conv_env/bin/activate - python -m pip install --upgrade pip - pip install -r requirements-openvino.txt - ``` - -- Generate an OpenVINO encoder model. For example, to generate a `base.en` model, use: - - ``` - python convert-whisper-to-openvino.py --model base.en - ``` - - This will produce ggml-base.en-encoder-openvino.xml/.bin IR model files. It's recommended to relocate these to the same folder as `ggml` models, as that - is the default location that the OpenVINO extension will search at runtime. - -- Build `whisper.cpp` with OpenVINO support: - - Download OpenVINO package from [release page](https://github.com/openvinotoolkit/openvino/releases). The recommended version to use is [2024.6.0](https://github.com/openvinotoolkit/openvino/releases/tag/2024.6.0). Ready to use Binaries of the required libraries can be found in the [OpenVino Archives](https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.6/) - - After downloading & extracting package onto your development system, set up required environment by sourcing setupvars script. For example: - - Linux: - - ```bash - source /path/to/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/setupvars.sh - ``` - - Windows (cmd): - - ```powershell - C:\Path\To\w_openvino_toolkit_windows_2023.0.0.10926.b4452d56304_x86_64\setupvars.bat - ``` - - And then build the project using cmake: - - ```bash - cmake -B build -DWHISPER_OPENVINO=1 - cmake --build build -j --config Release - ``` - -- Run the examples as usual. For example: - - ```text - $ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav - - ... - - whisper_ctx_init_openvino_encoder: loading OpenVINO model from 'models/ggml-base.en-encoder-openvino.xml' - whisper_ctx_init_openvino_encoder: first run on a device may take a while ... - whisper_openvino_init: path_model = models/ggml-base.en-encoder-openvino.xml, device = GPU, cache_dir = models/ggml-base.en-encoder-openvino-cache - whisper_ctx_init_openvino_encoder: OpenVINO model loaded - - system_info: n_threads = 4 / 8 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | COREML = 0 | OPENVINO = 1 | - - ... - ``` - - The first time run on an OpenVINO device is slow, since the OpenVINO framework will compile the IR (Intermediate Representation) model to a device-specific 'blob'. This device-specific blob will get - cached for the next run. - -For more information about the OpenVINO implementation please refer to PR [#1037](https://github.com/ggml-org/whisper.cpp/pull/1037). - -## NVIDIA GPU support - -With NVIDIA cards the processing of the models is done efficiently on the GPU via cuBLAS and custom CUDA kernels. -First, make sure you have installed `cuda`: https://developer.nvidia.com/cuda-downloads - -Now build `whisper.cpp` with CUDA support: - -``` -cmake -B build -DGGML_CUDA=1 -cmake --build build -j --config Release -``` - -or for newer NVIDIA GPU's (RTX 5000 series): -``` -cmake -B build -DGGML_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="86" -cmake --build build -j --config Release -``` - -## Vulkan GPU support -Cross-vendor solution which allows you to accelerate workload on your GPU. -First, make sure your graphics card driver provides support for Vulkan API. +# Download the tiny.en model (~75 MB) for a fast smoke test +./models/download-ggml-model.sh tiny.en -Now build `whisper.cpp` with Vulkan support: +# Or grab any ggml-*.bin from https://huggingface.co/ggerganov/whisper.cpp ``` -cmake -B build -DGGML_VULKAN=1 -cmake --build build -j --config Release -``` - -## BLAS CPU support via OpenBLAS - -Encoder processing can be accelerated on the CPU via OpenBLAS. -First, make sure you have installed `openblas`: https://www.openblas.net/ - -Now build `whisper.cpp` with OpenBLAS support: - -``` -cmake -B build -DGGML_BLAS=1 -cmake --build build -j --config Release -``` - -## Ascend NPU support - -Ascend NPU provides inference acceleration via [`CANN`](https://www.hiascend.com/en/software/cann) and AI cores. - -First, check if your Ascend NPU device is supported: - -**Verified devices** -| Ascend NPU | Status | -|:-----------------------------:|:-------:| -| Atlas 300T A2 | Support | -| Atlas 300I Duo | Support | - -Then, make sure you have installed [`CANN toolkit`](https://www.hiascend.com/en/software/cann/community) . The lasted version of CANN is recommanded. - -Now build `whisper.cpp` with CANN support: - -``` -cmake -B build -DGGML_CANN=1 -cmake --build build -j --config Release -``` - -Run the inference examples as usual, for example: - -``` -./build/bin/whisper-cli -f samples/jfk.wav -m models/ggml-base.en.bin -t 8 -``` - -*Notes:* - -- If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag. -- If you run successfully with your Ascend NPU device, please help update the table `Verified devices`. - -## Moore Threads GPU support - -With Moore Threads cards the processing of the models is done efficiently on the GPU via muBLAS and custom MUSA kernels. -First, make sure you have installed `MUSA SDK rc4.2.0`: https://developer.mthreads.com/sdk/download/musa?equipment=&os=&driverVersion=&version=4.2.0 -Now build `whisper.cpp` with MUSA support: - -``` -cmake -B build -DGGML_MUSA=1 -cmake --build build -j --config Release -``` - -or specify the architecture for your Moore Threads GPU. For example, if you have a MTT S80 GPU, you can specify the architecture as follows: - -``` -cmake -B build -DGGML_MUSA=1 -DMUSA_ARCHITECTURES="21" -cmake --build build -j --config Release -``` - -## FFmpeg support (Linux only) - -If you want to support more audio formats (such as Opus and AAC), you can turn on the `WHISPER_FFMPEG` build flag to enable FFmpeg integration. - -First, you need to install required libraries: +### 2. Transcribe the bundled sample ```bash -# Debian/Ubuntu -sudo apt install libavcodec-dev libavformat-dev libavutil-dev +# Linux +./whisper-cli -m models/ggml-tiny.en.bin -f samples/jfk.wav -# RHEL/Fedora -sudo dnf install libavcodec-free-devel libavformat-free-devel libavutil-free-devel +# Windows +whisper-cli.exe -m models\ggml-tiny.en.bin -f samples\jfk.wav ``` -Then you can build the project as follows: - -```bash -cmake -B build -D WHISPER_FFMPEG=yes -cmake --build build -``` +Expected: a transcription of the JFK "Ask not what your country can do for you" excerpt. -Run the following example to confirm it's working: +### 3. Verify GPU is active (ROCm) ```bash -# Convert an audio file to Opus format -ffmpeg -i samples/jfk.wav jfk.opus - -# Transcribe the audio file -./build/bin/whisper-cli --model models/ggml-base.en.bin --file jfk.opus +# At startup whisper-cli prints the backend in use — look for: +# ggml_hip: using device ... +./whisper-cli -m models/ggml-tiny.en.bin -f samples/jfk.wav 2>&1 | grep -i "hip\|rocm\|device" ``` -## Docker - -### Prerequisites - -- Docker must be installed and running on your system. -- Create a folder to store big models & intermediate files (ex. /whisper/models) - -### Images - -We have multiple Docker images available for this project: +### 4. Verify NPU is active (VitisAI) -1. `ghcr.io/ggml-org/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`) -2. `ghcr.io/ggml-org/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`) -3. `ghcr.io/ggml-org/whisper.cpp:main-musa`: Same as `main` but compiled with MUSA support. (platforms: `linux/amd64`) -4. `ghcr.io/ggml-org/whisper.cpp:main-vulkan`: Same as `main` but compiled with Vulkan support. (platforms: `linux/amd64`) - -### Usage - -```shell -# download model and persist it in a local folder -docker run -it --rm \ - -v path/to/models:/models \ - whisper.cpp:main "./models/download-ggml-model.sh base /models" - -# transcribe an audio file -docker run -it --rm \ - -v path/to/models:/models \ - -v path/to/audios:/audios \ - whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f /audios/jfk.wav" - -# transcribe an audio file in samples folder -docker run -it --rm \ - -v path/to/models:/models \ - whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f ./samples/jfk.wav" - -# run the web server -docker run -it --rm -p "8080:8080" \ - -v path/to/models:/models \ - whisper.cpp:main "whisper-server --host 127.0.0.1 -m /models/ggml-base.bin" - -# run the bench too on the small.en model using 4 threads -docker run -it --rm \ - -v path/to/models:/models \ - whisper.cpp:main "whisper-bench -m /models/ggml-small.en.bin -t 4" ``` - -## Installing with Conan - -You can install pre-built binaries for whisper.cpp or build it from source using [Conan](https://conan.io/). Use the following command: - -``` -conan install --requires="whisper-cpp/[*]" --build=missing +# Place the .rai encoder alongside the .bin model, then run normally. +# Look for this line in stdout: +# whisper_vitisai_encode: Vitis AI model inference completed. +whisper-cli.exe -m models\ggml-tiny.en.bin -f samples\jfk.wav ``` -For detailed instructions on how to use Conan, please refer to the [Conan documentation](https://docs.conan.io/2/). - -## Limitations - -- Inference only - -## Real-time audio input example - -This is a naive example of performing real-time inference on audio from your microphone. -The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continuously. -More info is available in [issue #10](https://github.com/ggml-org/whisper.cpp/issues/10). -You will need to have [sdl2](https://wiki.libsdl.org/SDL2/Installation) installed for it to work properly. +### 5. Verify portability (Linux ROCm) ```bash -cmake -B build -DWHISPER_SDL2=ON -cmake --build build -j --config Release -./build/bin/whisper-stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000 +# ROCm runtime libs are bundled — RPATH should point to $ORIGIN (same dir as binary) +readelf -d whisper-cli | grep RPATH # -> $ORIGIN +ldd whisper-cli | grep "not found" # -> (empty — all deps resolved locally) ``` -https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4 +--- -## Confidence color-coding +## 🔄 Release Cadence -Adding the `--print-colors` argument will print the transcribed text using an experimental color coding strategy -to highlight words with high or low confidence: +Releases are fully automated and mirror upstream whisper.cpp releases with no manual steps: -```bash -./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/gb0.wav --print-colors ``` - -image - -## Controlling the length of the generated text segments (experimental) - -For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`: - -```text -$ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16 - -whisper_model_load: loading model from './models/ggml-base.en.bin' -... -system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | - -main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ... - -[00:00:00.000 --> 00:00:00.850] And so my -[00:00:00.850 --> 00:00:01.590] fellow -[00:00:01.590 --> 00:00:04.140] Americans, ask -[00:00:04.140 --> 00:00:05.660] not what your -[00:00:05.660 --> 00:00:06.840] country can do -[00:00:06.840 --> 00:00:08.430] for you, ask -[00:00:08.430 --> 00:00:09.440] what you can do -[00:00:09.440 --> 00:00:10.020] for your -[00:00:10.020 --> 00:00:11.000] country. +upstream whisper.cpp releases vX.Y.Z + | + v (detected within 24 h by daily sync job) + sync.yml merges upstream into main, pushes tag vX.Y.Z + | + v (tag push triggers build pipeline) + build.yml builds all backend/OS combinations in parallel + | + v + GitHub Release: "whisper.cpp vX.Y.Z — AMD Builds" + with 13 artifacts across all backends and OS targets ``` -## Word-level timestamp (experimental) - -The `--max-len` argument can be used to obtain word-level timestamps. Simply use `-ml 1`: - -```text -$ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1 - -whisper_model_load: loading model from './models/ggml-base.en.bin' -... -system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | - -main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ... - -[00:00:00.000 --> 00:00:00.320] -[00:00:00.320 --> 00:00:00.370] And -[00:00:00.370 --> 00:00:00.690] so -[00:00:00.690 --> 00:00:00.850] my -[00:00:00.850 --> 00:00:01.590] fellow -[00:00:01.590 --> 00:00:02.850] Americans -[00:00:02.850 --> 00:00:03.300] , -[00:00:03.300 --> 00:00:04.140] ask -[00:00:04.140 --> 00:00:04.990] not -[00:00:04.990 --> 00:00:05.410] what -[00:00:05.410 --> 00:00:05.660] your -[00:00:05.660 --> 00:00:06.260] country -[00:00:06.260 --> 00:00:06.600] can -[00:00:06.600 --> 00:00:06.840] do -[00:00:06.840 --> 00:00:07.010] for -[00:00:07.010 --> 00:00:08.170] you -[00:00:08.170 --> 00:00:08.190] , -[00:00:08.190 --> 00:00:08.430] ask -[00:00:08.430 --> 00:00:08.910] what -[00:00:08.910 --> 00:00:09.040] you -[00:00:09.040 --> 00:00:09.320] can -[00:00:09.320 --> 00:00:09.440] do -[00:00:09.440 --> 00:00:09.760] for -[00:00:09.760 --> 00:00:10.020] your -[00:00:10.020 --> 00:00:10.510] country -[00:00:10.510 --> 00:00:11.000] . -``` +**Every release ships 13 artifacts:** -## Speaker segmentation via tinydiarize (experimental) - -More information about this approach is available here: https://github.com/ggml-org/whisper.cpp/pull/1058 - -Sample usage: - -```py -# download a tinydiarize compatible model -./models/download-ggml-model.sh small.en-tdrz - -# run as usual, adding the "-tdrz" command-line argument -./build/bin/whisper-cli -f ./samples/a13.wav -m ./models/ggml-small.en-tdrz.bin -tdrz -... -main: processing './samples/a13.wav' (480000 samples, 30.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, tdrz = 1, timestamps = 1 ... -... -[00:00:00.000 --> 00:00:03.800] Okay Houston, we've had a problem here. [SPEAKER_TURN] -[00:00:03.800 --> 00:00:06.200] This is Houston. Say again please. [SPEAKER_TURN] -[00:00:06.200 --> 00:00:08.260] Uh Houston we've had a problem. -[00:00:08.260 --> 00:00:11.320] We've had a main beam up on a volt. [SPEAKER_TURN] -[00:00:11.320 --> 00:00:13.820] Roger main beam interval. [SPEAKER_TURN] -[00:00:13.820 --> 00:00:15.100] Uh uh [SPEAKER_TURN] -[00:00:15.100 --> 00:00:18.020] So okay stand, by thirteen we're looking at it. [SPEAKER_TURN] -[00:00:18.020 --> 00:00:25.740] Okay uh right now uh Houston the uh voltage is uh is looking good um. -[00:00:27.620 --> 00:00:29.940] And we had a a pretty large bank or so. ``` - -## Karaoke-style movie generation (experimental) - -The [whisper-cli](examples/cli) example provides support for output of karaoke-style movies, where the -currently pronounced word is highlighted. Use the `-owts` argument and run the generated bash script. -This requires to have `ffmpeg` installed. - -Here are a few _"typical"_ examples: - -```bash -./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts -source ./samples/jfk.wav.wts -ffplay ./samples/jfk.wav.mp4 +whisper-{version}-linux-rocm-gfx1151.tar.gz +whisper-{version}-linux-rocm-gfx1150.tar.gz +whisper-{version}-linux-rocm-gfx120X.tar.gz +whisper-{version}-linux-rocm-gfx110X.tar.gz +whisper-{version}-windows-rocm-gfx1151.zip +whisper-{version}-windows-rocm-gfx1150.zip +whisper-{version}-windows-rocm-gfx120X.zip +whisper-{version}-windows-rocm-gfx110X.zip +whisper-{version}-linux-vulkan-x86_64.tar.gz +whisper-{version}-windows-vulkan-x64.zip +whisper-{version}-windows-npu-x64.zip (may be absent if NPU runner offline) +whisper-{version}-linux-cpu-x86_64.tar.gz +whisper-{version}-windows-cpu-x64.zip ``` -https://user-images.githubusercontent.com/1991296/199337465-dbee4b5e-9aeb-48a3-b1c6-323ac4db5b2c.mp4 +> [!TIP] +> **Linux APU out of VRAM despite free memory (gfx1150 / gfx1151)?** +> Add `ttm.pages_limit=12582912` to your kernel command line (e.g. in GRUB), run `update-grub`, and reboot. +> See the [TheRock FAQ](https://github.com/ROCm/TheRock/blob/main/docs/faq.md#gfx1151-strix-halo-specific-questions) for details. --- -```bash -./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts -source ./samples/mm0.wav.wts -ffplay ./samples/mm0.wav.mp4 -``` +## 🖥️ Local Builds (Windows) -https://user-images.githubusercontent.com/1991296/199337504-cc8fd233-0cb7-4920-95f9-4227de3570aa.mp4 +Reproduce any CI build locally using the bundled PowerShell script. Produces identical artifacts to what CI publishes. ---- - -```bash -./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts -source ./samples/gb0.wav.wts -ffplay ./samples/gb0.wav.mp4 -``` +```powershell +# Prerequisites: CMake, VS Build Tools 2022, 7-Zip, internet access -https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a0cd-f28a317987ba.mp4 +# CPU only (~2 min, no GPU needed) +.\scripts\local-build.ps1 -Backend cpu ---- +# Vulkan — requires Vulkan SDK from https://vulkan.lunarg.com +.\scripts\local-build.ps1 -Backend vulkan -## Video comparison of different models +# ROCm for RDNA3 iGPU — downloads ROCm tarball (~2-4 GB, cached after first run) +.\scripts\local-build.ps1 -Backend rocm -GfxTarget gfx1151 -Use the [scripts/bench-wts.sh](https://github.com/ggml-org/whisper.cpp/blob/master/scripts/bench-wts.sh) script to generate a video in the following format: +# NPU — requires RyzenAI hardware + NPU driver >= .280 +.\scripts\local-build.ps1 -Backend npu -```bash -./scripts/bench-wts.sh samples/jfk.wav -ffplay ./samples/jfk.wav.all.mp4 +# All backends, version-stamped artifacts placed in .\dist\ +.\scripts\local-build.ps1 -Backend all -Version 1.8.4 ``` -https://user-images.githubusercontent.com/1991296/223206245-2d36d903-cf8e-4f09-8c3b-eb9f9c39d6fc.mp4 - --- -## Benchmarks - -In order to have an objective comparison of the performance of the inference across different system configurations, -use the [whisper-bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it -took to execute it. The results are summarized in the following Github issue: - -[Benchmark results](https://github.com/ggml-org/whisper.cpp/issues/89) +## 📦 Dependencies -Additionally a script to run whisper.cpp with different models and audio files is provided [bench.py](scripts/bench.py). +### Bundled in every release (no installation needed) -You can run it with the following command, by default it will run against any standard model in the models folder. +| Backend | What is included | +|---|---| +| ROCm | `amdhip64`, `rocblas`, `hipblaslt` + library data, LLVM runtime, all system deps; RPATH=`$ORIGIN` on Linux | +| Vulkan | SPIR-V shaders embedded at build time; links against system Vulkan loader | +| NPU | FlexML Runtime DLLs (`flexmlrt/bin` + `flexmlrt/lib`) | +| CPU | SDL2.dll included on Windows | -```bash -python3 scripts/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2 -``` +### Build-time only -It is written in python with the intention of being easy to modify and extend for your benchmarking use case. - -It outputs a csv file with the results of the benchmarking. - -## `ggml` format - -The original models are converted to a custom binary format. This allows to pack everything needed into a single file: - -- model parameters -- mel filters -- vocabulary -- weights - -You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script -or manually from here: - -- https://huggingface.co/ggerganov/whisper.cpp - -For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or [models/README.md](models/README.md). - -## [Bindings](https://github.com/ggml-org/whisper.cpp/discussions/categories/bindings) - -- [x] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggml-org/whisper.cpp/discussions/310) -- [x] JavaScript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggml-org/whisper.cpp/discussions/309) - - React Native (iOS / Android): [whisper.rn](https://github.com/mybigday/whisper.rn) -- [x] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggml-org/whisper.cpp/discussions/312) -- [x] Java: - - [GiviMAD/whisper-jni](https://github.com/GiviMAD/whisper-jni) -- [x] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggml-org/whisper.cpp/discussions/507) -- [x] Objective-C / Swift: [ggml-org/whisper.spm](https://github.com/ggml-org/whisper.spm) | [#313](https://github.com/ggml-org/whisper.cpp/discussions/313) - - [exPHAT/SwiftWhisper](https://github.com/exPHAT/SwiftWhisper) -- [x] .NET: | [#422](https://github.com/ggml-org/whisper.cpp/discussions/422) - - [sandrohanea/whisper.net](https://github.com/sandrohanea/whisper.net) - - [NickDarvey/whisper](https://github.com/NickDarvey/whisper) -- [x] Python: | [#9](https://github.com/ggml-org/whisper.cpp/issues/9) - - [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython) - - [AIWintermuteAI/whispercpp](https://github.com/AIWintermuteAI/whispercpp) (Updated fork of aarnphm/whispercpp) - - [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11) - - [abdeladim-s/pywhispercpp](https://github.com/abdeladim-s/pywhispercpp) (Pybind11) -- [x] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper) -- [x] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity) - -## XCFramework -The XCFramework is a precompiled version of the library for iOS, visionOS, tvOS, -and macOS. It can be used in Swift projects without the need to compile the -library from source. For example, the v1.7.5 version of the XCFramework can be -used as follows: - -```swift -// swift-tools-version: 5.10 -// The swift-tools-version declares the minimum version of Swift required to build this package. - -import PackageDescription - -let package = Package( - name: "Whisper", - targets: [ - .executableTarget( - name: "Whisper", - dependencies: [ - "WhisperFramework" - ]), - .binaryTarget( - name: "WhisperFramework", - url: "https://github.com/ggml-org/whisper.cpp/releases/download/v1.7.5/whisper-v1.7.5-xcframework.zip", - checksum: "c7faeb328620d6012e130f3d705c51a6ea6c995605f2df50f6e1ad68c59c6c4a" - ) - ] -) -``` +| Tool | Purpose | +|---|---| +| [whisper.cpp](https://github.com/ggerganov/whisper.cpp) | Upstream source | +| [ROCm / TheRock](https://github.com/ROCm/TheRock) | HIP compiler + GPU runtime (tarball, not installed globally) | +| [FlexML Runtime](https://github.com/lemonade-sdk/whisper.cpp/releases/tag/deps) | VitisAI NPU inference | +| [Vulkan SDK](https://vulkan.lunarg.com/sdk/home) | GLSL to SPIR-V shader compilation | +| [CMake >= 3.21](https://cmake.org/) | Build system | +| [Ninja](https://ninja-build.org/) | Fast build backend (ROCm builds) | +| [VS Build Tools 2022](https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2022) | Windows MSVC toolchain | -## Voice Activity Detection (VAD) -Support for Voice Activity Detection (VAD) can be enabled using the `--vad` -argument to `whisper-cli`. In addition to this option a VAD model is also -required. - -The way this works is that first the audio samples are passed through -the VAD model which will detect speech segments. Using this information, -only the speech segments that are detected are extracted from the original audio -input and passed to whisper for processing. This reduces the amount of audio -data that needs to be processed by whisper and can significantly speed up the -transcription process. - -The following VAD models are currently supported: - -### Silero-VAD -[Silero-vad](https://github.com/snakers4/silero-vad) is a lightweight VAD model -written in Python that is fast and accurate. - -Models can be downloaded by running the following command on Linux or MacOS: -```console -$ ./models/download-vad-model.sh silero-v6.2.0 -Downloading ggml model silero-v6.2.0 from 'https://huggingface.co/ggml-org/whisper-vad' ... -ggml-silero-v6.2.0.bin 100%[==============================================>] 864.35K --.-KB/s in 0.04s -Done! Model 'silero-v6.2.0' saved in '/path/models/ggml-silero-v6.2.0.bin' -You can now use it like this: +--- - $ ./build/bin/whisper-cli -vm /path/models/ggml-silero-v6.2.0.bin --vad -f samples/jfk.wav -m models/ggml-base.en.bin +## 🏗️ Repository Structure ``` -And the following command on Windows: -```console -> .\models\download-vad-model.cmd silero-v6.2.0 -Downloading vad model silero-v6.2.0... -Done! Model silero-v6.2.0 saved in C:\Users\danie\work\ai\whisper.cpp\ggml-silero-v6.2.0.bin -You can now use it like this: - -C:\path\build\bin\Release\whisper-cli.exe -vm C:\path\ggml-silero-v6.2.0.bin --vad -m models/ggml-base.en.bin -f samples\jfk.wav - +whisper-cpp-amd/ +├── .github/ +│ └── workflows/ +│ ├── build.yml # All AMD backends — builds + publishes releases +│ └── sync.yml # Daily upstream sync + auto-tagging +├── ci/ +│ ├── resolve-rocm-version.sh # Resolves AMD tarball URL for a given ROCm version +│ └── map-gpu-target.sh # Maps gfx110X/gfx120X shorthands to specific arch lists +├── src/ +│ └── vitisai/ +│ ├── whisper-vitisai-encoder.h # VitisAI NPU encoder C interface +│ └── whisper-vitisai-encoder.cpp # FlexML runtime integration +├── scripts/ +│ └── local-build.ps1 # Local Windows build script (mirrors CI jobs exactly) +├── ggml/ # GGML library (all GPU backends live here) +├── src/ # whisper.cpp source (VitisAI hooks added) +└── CMakeLists.txt # Adds -DWHISPER_VITISAI option ``` -To see a list of all available models, run the above commands without any -arguments. +--- -This model can be also be converted manually to ggml using the following command: -```console -$ python3 -m venv venv && source venv/bin/activate -$ (venv) pip install silero-vad -$ (venv) $ python models/convert-silero-vad-to-ggml.py --output models/silero.bin -Saving GGML Silero-VAD model to models/silero-v6.2.0-ggml.bin -``` -And it can then be used with whisper as follows: -```console -$ ./build/bin/whisper-cli \ - --file ./samples/jfk.wav \ - --model ./models/ggml-base.en.bin \ - --vad \ - --vad-model ./models/silero-v6.2.0-ggml.bin -``` +## 📄 License + +This project is licensed under the MIT License — see [LICENSE](LICENSE) for details. -### VAD Options - -* --vad-threshold: Threshold probability for speech detection. A probability -for a speech segment/frame above this threshold will be considered as speech. - -* --vad-min-speech-duration-ms: Minimum speech duration in milliseconds. Speech -segments shorter than this value will be discarded to filter out brief noise or -false positives. - -* --vad-min-silence-duration-ms: Minimum silence duration in milliseconds. Silence -periods must be at least this long to end a speech segment. Shorter silence -periods will be ignored and included as part of the speech. - -* --vad-max-speech-duration-s: Maximum speech duration in seconds. Speech segments -longer than this will be automatically split into multiple segments at silence -points exceeding 98ms to prevent excessively long segments. - -* --vad-speech-pad-ms: Speech padding in milliseconds. Adds this amount of padding -before and after each detected speech segment to avoid cutting off speech edges. - -* --vad-samples-overlap: Amount of audio to extend from each speech segment into -the next one, in seconds (e.g., 0.10 = 100ms overlap). This ensures speech isn't -cut off abruptly between segments when they're concatenated together. - -## Examples - -There are various examples of using the library for different projects in the [examples](examples) folder. -Some of the examples are even ported to run in the browser using WebAssembly. Check them out! - -| Example | Web | Description | -| --------------------------------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- | -| [whisper-cli](examples/cli) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper | -| [whisper-bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine | -| [whisper-stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture | -| [whisper-command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic | -| [whisper-server](examples/server) | | HTTP transcription server with OAI-like API | -| [whisper-talk-llama](examples/talk-llama) | | Talk with a LLaMA bot | -| [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp | -| [whisper.swiftui](examples/whisper.swiftui) | | SwiftUI iOS / macOS application using whisper.cpp | -| [whisper.android](examples/whisper.android) | | Android mobile application using whisper.cpp | -| [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim | -| [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture | -| [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggml-org/whisper.cpp/issues/185) | -| [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) | -| [wchess](examples/wchess) | [wchess.wasm](examples/wchess) | Voice-controlled chess | - -## [Discussions](https://github.com/ggml-org/whisper.cpp/discussions) - -If you have any kind of feedback about this project feel free to use the Discussions section and open a new topic. -You can use the [Show and tell](https://github.com/ggml-org/whisper.cpp/discussions/categories/show-and-tell) category -to share your own projects that use `whisper.cpp`. If you have a question, make sure to check the -[Frequently asked questions (#126)](https://github.com/ggml-org/whisper.cpp/discussions/126) discussion. +whisper.cpp is copyright Georgi Gerganov and contributors — [ggerganov/whisper.cpp](https://github.com/ggerganov/whisper.cpp). +ROCm is copyright Advanced Micro Devices, Inc. +VitisAI encoder copyright 2025 Advanced Micro Devices, Inc. diff --git a/scripts/local-build.ps1 b/scripts/local-build.ps1 new file mode 100644 index 00000000000..db90dd5a747 --- /dev/null +++ b/scripts/local-build.ps1 @@ -0,0 +1,395 @@ +<# +.SYNOPSIS + Local build script for whisper-cpp-amd. Mirrors the GitHub Actions build.yml jobs for Windows. + +.DESCRIPTION + Builds one or more AMD backends locally, producing the same zip artifacts that CI publishes. + +.PARAMETER Backend + Which backend to build: cpu, vulkan, rocm, npu, all. Default: cpu + +.PARAMETER GfxTarget + ROCm GPU target. Default: gfx1151 + Common: gfx1151, gfx1150, gfx1100, gfx1200 + +.PARAMETER RocmVersion + ROCm version to download. Default: 7.12.0 + +.PARAMETER OutputDir + Directory for final zip artifacts. Default: .\dist + +.PARAMETER BuildDir + CMake build directory prefix. Default: .\build-local + +.PARAMETER Version + Version string used in artifact filenames. Default: local + +.EXAMPLE + .\scripts\local-build.ps1 -Backend cpu + .\scripts\local-build.ps1 -Backend vulkan + .\scripts\local-build.ps1 -Backend rocm -GfxTarget gfx1151 + .\scripts\local-build.ps1 -Backend npu + .\scripts\local-build.ps1 -Backend all -Version 1.8.4 +#> + +param( + [ValidateSet("cpu","vulkan","rocm","npu","all")] + [string]$Backend = "cpu", + [string]$GfxTarget = "gfx1151", + [string]$RocmVersion = "7.12.0", + [string]$OutputDir = ".\dist", + [string]$BuildDir = ".\build-local", + [string]$Version = "local" +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = "Stop" + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +function Write-Step([string]$msg) { + Write-Host "" + Write-Host "================================================" -ForegroundColor Cyan + Write-Host " $msg" -ForegroundColor Cyan + Write-Host "================================================" -ForegroundColor Cyan +} + +function Write-Ok([string]$msg) { Write-Host " [OK] $msg" -ForegroundColor Green } +function Write-Info([string]$msg) { Write-Host " --> $msg" -ForegroundColor Yellow } +function Write-Fail([string]$msg) { Write-Host " [X] $msg" -ForegroundColor Red } + +function Require-Command([string]$cmd) { + if (-not (Get-Command $cmd -ErrorAction SilentlyContinue)) { + Write-Fail "$cmd not found in PATH" + throw "Missing requirement: $cmd" + } + Write-Ok "$cmd found" +} + +function Download-SDL2 { + param([string]$Ver = "2.28.5") + $sdlDir = "SDL2-$Ver" + if (Test-Path $sdlDir) { + Write-Info "SDL2 already extracted at $sdlDir" + } else { + Write-Info "Downloading SDL2 $Ver ..." + $url = "https://github.com/libsdl-org/SDL/releases/download/release-$Ver/SDL2-devel-$Ver-VC.zip" + Invoke-WebRequest -Uri $url -OutFile "sdl2.zip" + 7z x sdl2.zip -y | Out-Null + Remove-Item sdl2.zip + + # Patch SDL_endian.h (needed for AMD clang compatibility) + $hdr = Get-ChildItem -Recurse -Filter "SDL_endian.h" | Select-Object -First 1 + if ($hdr) { + $content = Get-Content $hdr.FullName -Raw + if ($content -match 'extern void _m_prefetch') { + $patched = $content -replace 'extern void _m_prefetch\(void \*__P\);', '// extern void _m_prefetch(void *__P);' + Set-Content -Path $hdr.FullName -Value $patched -NoNewline + Write-Ok "Patched SDL_endian.h" + } + } + } + $cmake = Get-ChildItem -Recurse -Filter "sdl2-config.cmake" | Select-Object -First 1 + if (-not $cmake) { throw "sdl2-config.cmake not found after SDL2 extraction" } + return $cmake.DirectoryName +} + +function Package-Build { + param([string]$Name, [string]$BinPath) + New-Item -ItemType Directory -Force -Path $OutputDir | Out-Null + $zip = Join-Path $OutputDir "$Name.zip" + Write-Info "Creating $zip ..." + Compress-Archive -Path "$BinPath\*" -DestinationPath $zip -Force + $mb = [math]::Round((Get-Item $zip).Length / 1MB, 2) + Write-Ok "Created $zip ($mb MB)" + return $zip +} + +function Run-MSBuild { + param([string]$Dir, [string[]]$ConfigArgs, [string]$Config = "Release", [string]$Arch = "x64") + Write-Info "CMake configure ..." + & cmake -S . -B $Dir @ConfigArgs + if ($LASTEXITCODE -ne 0) { throw "CMake configure failed (exit $LASTEXITCODE)" } + Write-Info "MSBuild $Config ..." + & cmake --build $Dir --config $Config -j $env:NUMBER_OF_PROCESSORS + if ($LASTEXITCODE -ne 0) { throw "Build failed (exit $LASTEXITCODE)" } +} + +# ── Preflight ───────────────────────────────────────────────────────────────── + +if (-not (Test-Path "CMakeLists.txt") -or -not (Test-Path "src\whisper.cpp")) { + Write-Fail "Run this script from the whisper-cpp-amd repo root." + exit 1 +} + +Require-Command cmake +New-Item -ItemType Directory -Force -Path $OutputDir | Out-Null + +# ── Build functions ─────────────────────────────────────────────────────────── + +function Build-CPU { + Write-Step "CPU - Windows x64" + Require-Command msbuild + + $SDL2_DIR = Download-SDL2 + $dir = "$BuildDir-cpu" + + Run-MSBuild $dir @( + "-A", "x64", + "-DCMAKE_BUILD_TYPE=Release", + "-DBUILD_SHARED_LIBS=ON", + "-DWHISPER_SDL2=ON", + "-DSDL2_DIR=$SDL2_DIR" + ) + + $sdl2dll = Get-ChildItem -Path "SDL2-*\lib\x64\SDL2.dll" -ErrorAction SilentlyContinue | Select-Object -First 1 + if ($sdl2dll) { Copy-Item $sdl2dll.FullName "$dir\bin\Release\" -Force } + + $zip = Package-Build "whisper-$Version-windows-cpu-x64" "$dir\bin\Release" + Write-Ok "CPU build done. Artifact: $zip" +} + +function Build-Vulkan { + Write-Step "Vulkan - Windows x64" + Require-Command msbuild + + # Locate Vulkan SDK + $VULKAN_SDK = $env:VULKAN_SDK + if (-not $VULKAN_SDK) { + $sdkDir = Get-ChildItem "C:\VulkanSDK" -ErrorAction SilentlyContinue | + Sort-Object Name -Descending | Select-Object -First 1 + if (-not $sdkDir) { + Write-Fail "Vulkan SDK not found. Install from https://vulkan.lunarg.com/sdk/home" + throw "Missing Vulkan SDK" + } + $VULKAN_SDK = $sdkDir.FullName + } + Write-Ok "Vulkan SDK: $VULKAN_SDK" + + $SDL2_DIR = Download-SDL2 + $dir = "$BuildDir-vulkan" + + Run-MSBuild $dir @( + "-A", "x64", + "-DCMAKE_BUILD_TYPE=Release", + "-DBUILD_SHARED_LIBS=ON", + "-DGGML_VULKAN=ON", + "-DWHISPER_SDL2=ON", + "-DSDL2_DIR=$SDL2_DIR", + "-DVULKAN_SDK=$VULKAN_SDK" + ) + + $sdl2dll = Get-ChildItem -Path "SDL2-*\lib\x64\SDL2.dll" -ErrorAction SilentlyContinue | Select-Object -First 1 + if ($sdl2dll) { Copy-Item $sdl2dll.FullName "$dir\bin\Release\" -Force } + + $zip = Package-Build "whisper-$Version-windows-vulkan-x64" "$dir\bin\Release" + Write-Ok "Vulkan build done. Artifact: $zip" +} + +function Build-ROCm { + Write-Step "ROCm - Windows x64 (target: $GfxTarget)" + Require-Command ninja + + # ── Download ROCm tarball ────────────────────────────────────────────── + $rocmRoot = "C:\opt\rocm" + if (-not (Test-Path "$rocmRoot\bin\amdclang.exe")) { + Write-Info "Downloading ROCm $RocmVersion for $GfxTarget (2-4 GB, takes a few minutes) ..." + + # Replicate resolve-rocm-version.sh: group targets use gfx1151 as the base tarball + $baseTarget = $GfxTarget + if ($GfxTarget -in @("gfx110X","gfx120X","gfx1150","gfx1100")) { + $baseTarget = "gfx1151" + } + $tarballUrl = "https://repo.amd.com/rocm/tarball/therock-dist-windows-${baseTarget}-${RocmVersion}.tar.gz" + Write-Info "URL: $tarballUrl" + + Invoke-WebRequest -Uri $tarballUrl -OutFile rocm.tar.gz + New-Item -ItemType Directory -Force -Path $rocmRoot | Out-Null + & tar -xzf rocm.tar.gz -C $rocmRoot --strip-components=1 + if ($LASTEXITCODE -ne 0) { throw "ROCm extraction failed" } + Remove-Item rocm.tar.gz + Write-Ok "ROCm extracted to $rocmRoot" + } else { + Write-Ok "ROCm already present at $rocmRoot" + } + + # ── Map GFX target (mirrors map-gpu-target.sh) ───────────────────────── + $mappedTarget = switch ($GfxTarget) { + "gfx110X" { "gfx1100;gfx1101;gfx1102" } + "gfx120X" { "gfx1200;gfx1201" } + default { $GfxTarget } + } + Write-Info "GPU target: $GfxTarget -> $mappedTarget" + + $SDL2_DIR = Download-SDL2 + + # ── Set ROCm env ────────────────────────────────────────────────────── + $env:HIP_PATH = $rocmRoot + $env:HIP_PLATFORM = "amd" + $env:PATH = "$rocmRoot\bin;$rocmRoot\lib\llvm\bin;$env:PATH" + + # ── Configure ───────────────────────────────────────────────────────── + $dir = "$BuildDir-rocm-$GfxTarget" + Write-Info "CMake configure (Ninja Multi-Config) ..." + & cmake -S . -B $dir ` + -G "Ninja Multi-Config" ` + "-DGPU_TARGETS=$mappedTarget" ` + -DGGML_HIP=ON ` + "-DCMAKE_C_COMPILER=$rocmRoot/lib/llvm/bin/amdclang.exe" ` + "-DCMAKE_CXX_COMPILER=$rocmRoot/lib/llvm/bin/amdclang++.exe" ` + "-DCMAKE_HIP_COMPILER=$rocmRoot/lib/llvm/bin/amdclang++.exe" ` + "-DCMAKE_C_FLAGS=-D__PRFCHWINTRIN_H" ` + "-DCMAKE_CXX_FLAGS=-D__PRFCHWINTRIN_H" ` + "-DCMAKE_HIP_FLAGS=--rocm-path=$rocmRoot" ` + "-DCMAKE_PREFIX_PATH=$rocmRoot" ` + -DCMAKE_BUILD_TYPE=Release ` + -DBUILD_SHARED_LIBS=ON ` + -DWHISPER_SDL2=ON ` + "-DSDL2_DIR=$SDL2_DIR" + if ($LASTEXITCODE -ne 0) { throw "CMake configure failed" } + + Write-Info "Building ..." + & cmake --build $dir --config Release -j $env:NUMBER_OF_PROCESSORS + if ($LASTEXITCODE -ne 0) { throw "Build failed" } + + # ── Copy ROCm DLLs ──────────────────────────────────────────────────── + $binOut = "$dir\bin\Release" + $rocBin = "$rocmRoot\bin" + Write-Info "Copying ROCm DLLs ..." + @("amdhip64_*.dll","amd_comgr*.dll","libhipblas.dll","rocblas.dll", + "rocsolver.dll","hipblaslt.dll","libhipblaslt.dll","hipblas.dll") | ForEach-Object { + Get-ChildItem $rocBin -Name $_ -ErrorAction SilentlyContinue | + ForEach-Object { Copy-Item (Join-Path $rocBin $_) (Join-Path $binOut $_) -Force } + } + $rocblasLib = Join-Path $rocBin "rocblas\library" + if (Test-Path $rocblasLib) { + Copy-Item $rocblasLib -Destination (Join-Path $binOut "rocblas\library") -Recurse -Force + } + $hipblasltLib = Join-Path $rocBin "hipblaslt\library" + if (Test-Path $hipblasltLib) { + Copy-Item $hipblasltLib -Destination (Join-Path $binOut "hipblaslt\library") -Recurse -Force + } + + $sdl2dll = Get-ChildItem -Path "SDL2-*\lib\x64\SDL2.dll" -ErrorAction SilentlyContinue | Select-Object -First 1 + if ($sdl2dll) { Copy-Item $sdl2dll.FullName $binOut -Force } + + $zip = Package-Build "whisper-$Version-windows-rocm-$GfxTarget" $binOut + Write-Ok "ROCm build done. Artifact: $zip" +} + +function Build-NPU { + Write-Step "NPU (VitisAI / RyzenAI) - Windows x64" + Require-Command msbuild + + # ── FlexML Runtime ──────────────────────────────────────────────────── + $flexmlDir = Get-ChildItem -Directory | Where-Object { $_.Name -like "flexmlrt*" } | Select-Object -First 1 + if (-not $flexmlDir) { + Write-Info "Downloading FlexML Runtime ..." + $url = "https://github.com/lemonade-sdk/whisper.cpp/releases/download/deps/flexmlrt1.7.0-win.zip" + Invoke-WebRequest -Uri $url -OutFile flexmlrt.zip + if (-not (Test-Path "flexmlrt.zip") -or (Get-Item "flexmlrt.zip").Length -eq 0) { + throw "flexmlrt.zip download failed or is empty" + } + $mb = [math]::Round((Get-Item "flexmlrt.zip").Length / 1MB, 2) + Write-Ok "Downloaded FlexML: $mb MB" + + & tar xvf flexmlrt.zip + if ($LASTEXITCODE -ne 0) { throw "FlexML extraction failed" } + Remove-Item flexmlrt.zip + + $flexmlDir = Get-ChildItem -Directory | Where-Object { $_.Name -like "flexmlrt*" } | Select-Object -First 1 + if (-not $flexmlDir) { throw "No flexmlrt directory found after extraction" } + } + Write-Ok "FlexML Runtime: $($flexmlDir.FullName)" + + # ── Run setup.bat via a temporary cmd script ─────────────────────────── + # cmd /c with && is not reliable from PowerShell; use a temp .bat file instead + $tempBat = [System.IO.Path]::GetTempFileName() + ".bat" + $setupPath = Join-Path $flexmlDir.FullName "setup.bat" + Set-Content -Path $tempBat -Value "@echo off`r`ncall `"$setupPath`"`r`nif errorlevel 1 exit /b 1`r`necho FLEXML_OK" + Write-Info "Running FlexML setup.bat ..." + $setupOut = & cmd /c $tempBat 2>&1 + Remove-Item $tempBat -ErrorAction SilentlyContinue + + if ($LASTEXITCODE -ne 0 -or ($setupOut -notmatch "FLEXML_OK")) { + Write-Fail "FlexML setup.bat failed. Output:" + $setupOut | ForEach-Object { Write-Host " $_" } + throw "FlexML setup failed. Ensure NPU drivers (>= .280) are installed." + } + Write-Ok "FlexML environment configured" + + # ── CMake configure + build ─────────────────────────────────────────── + $dir = "$BuildDir-npu" + Write-Info "CMake configure with -DWHISPER_VITISAI=ON ..." + & cmake -B $dir -A x64 -DCMAKE_BUILD_TYPE=Release -DWHISPER_VITISAI=ON + if ($LASTEXITCODE -ne 0) { throw "CMake configure failed" } + + Write-Info "Building ..." + & cmake --build $dir --config Release -j $env:NUMBER_OF_PROCESSORS + if ($LASTEXITCODE -ne 0) { throw "Build failed" } + + # ── List output ─────────────────────────────────────────────────────── + $binOut = "$dir\bin\Release" + if (Test-Path $binOut) { + Write-Info "Build output:" + Get-ChildItem $binOut | Format-Table Name, Length -AutoSize + } else { + throw "Expected output directory $binOut not found" + } + + # ── Copy FlexML DLLs ───────────────────────────────────────────────── + Write-Info "Copying FlexML DLLs ..." + $copied = 0 + foreach ($sub in @("bin", "lib")) { + $subPath = Join-Path $flexmlDir.FullName $sub + if (Test-Path $subPath) { + $dlls = Get-ChildItem "$subPath\*.dll" -ErrorAction SilentlyContinue + if ($dlls) { + Copy-Item $dlls.FullName $binOut -Force + $copied += $dlls.Count + } + } + } + Write-Ok "Copied $copied FlexML DLLs" + + $zip = Package-Build "whisper-$Version-windows-npu-x64" $binOut + Write-Ok "NPU build done. Artifact: $zip" + Write-Info "To run: place the .rai encoder model next to your ggml-*.bin and run whisper-cli.exe normally." +} + +# ── Main dispatch ───────────────────────────────────────────────────────────── + +$targets = if ($Backend -eq "all") { @("cpu","vulkan","rocm","npu") } else { @($Backend) } +$results = [ordered]@{} + +foreach ($t in $targets) { + try { + switch ($t) { + "cpu" { Build-CPU } + "vulkan" { Build-Vulkan } + "rocm" { Build-ROCm } + "npu" { Build-NPU } + } + $results[$t] = "[OK] PASSED" + } catch { + Write-Fail "[$t] failed: $_" + $results[$t] = "[FAIL] $_" + } +} + +# ── Summary ─────────────────────────────────────────────────────────────────── + +Write-Step "Build Summary" +foreach ($t in $targets) { + $color = if ($results[$t].StartsWith("[OK]")) { "Green" } else { "Red" } + Write-Host " $t : $($results[$t])" -ForegroundColor $color +} + +Write-Host "" +Write-Host "Artifacts in: $(Resolve-Path $OutputDir)" -ForegroundColor Cyan +if (Test-Path $OutputDir) { + Get-ChildItem $OutputDir -Filter "*.zip" | ForEach-Object { + $mb = [math]::Round($_.Length / 1MB, 2) + Write-Host " $($_.Name) ($mb MB)" + } +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 095a2791de5..fe10876eaf7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -48,6 +48,10 @@ if (WHISPER_OPENVINO) find_package(OpenVINO REQUIRED COMPONENTS Runtime) endif() +if (WHISPER_VITISAI) + find_package(FlexmlRT REQUIRED) +endif() + # # libraries # @@ -101,6 +105,30 @@ if (WHISPER_OPENVINO) set_target_properties(${TARGET} PROPERTIES FOLDER "libs") endif() +if (WHISPER_VITISAI) + set(TARGET whisper.vitisai) + + add_library(${TARGET} OBJECT + vitisai/whisper-vitisai-encoder.h + vitisai/whisper-vitisai-encoder.cpp + ) + + target_include_directories(${TARGET} PUBLIC + . + ) + + set_property(TARGET ${TARGET} PROPERTY POSITION_INDEPENDENT_CODE ON) + set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_VITISAI) + + # C++17 required for MSVC (FlexML headers use structured bindings etc.) + if (MSVC) + target_compile_options(${TARGET} PRIVATE /std:c++17) + endif() + + target_link_libraries(${TARGET} PRIVATE ggml flexmlrt::flexmlrt) + set_target_properties(${TARGET} PROPERTIES FOLDER "libs") +endif() + # whisper add_library(whisper @@ -137,6 +165,10 @@ if (WHISPER_OPENVINO) target_link_libraries(whisper PRIVATE whisper.openvino) endif() +if (WHISPER_VITISAI) + target_link_libraries(whisper PRIVATE whisper.vitisai) +endif() + if (WHISPER_MKL) target_link_libraries(whisper PRIVATE MKL::MKL) endif() diff --git a/src/vitisai/whisper-vitisai-encoder.cpp b/src/vitisai/whisper-vitisai-encoder.cpp new file mode 100644 index 00000000000..a6d20a88c9a --- /dev/null +++ b/src/vitisai/whisper-vitisai-encoder.cpp @@ -0,0 +1,204 @@ +// Copyright(C) 2025 Advanced Micro Devices, Inc. All rights reserved. +#include "vitisai/whisper-vitisai-encoder.h" +#include "FlexMLClient.h" +#include "ggml.h" +#include "ggml-backend.h" + +#include +#include +#ifdef _WIN32 + #include +#else + #include + #include + #include +#endif +#include +#include + +struct whisper_vitisai_context { + std::string model_path; + std::shared_ptr runner; + uint8_t * fbs_buffer; + size_t fbs_buffer_size; +}; + +// Function to mmap rai file for Linux and MapViewOfFile for Windows +bool map_rai_file(const char * path, uint8_t ** buffer, size_t * size) { +#ifdef _WIN32 + // Open the file + HANDLE hFile = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (hFile == INVALID_HANDLE_VALUE) { + std::fprintf(stderr, "%s: %d: Failed to open rai file '%s'\n", __func__, __LINE__, path); + return false; + } + + // Get the file size + LARGE_INTEGER fileSize; + if (!GetFileSizeEx(hFile, &fileSize)) { + CloseHandle(hFile); + std::fprintf(stderr, "%s: %d: Failed to get file size for rai file '%s'\n", __func__, __LINE__, path); + return false; + } + + // Create a file mapping object + HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, fileSize.QuadPart, NULL); + if (hMapping == NULL) { + CloseHandle(hFile); + std::fprintf(stderr, "%s: %d: Failed to create file mapping for rai file '%s'\n", __func__, __LINE__, path); + return false; + } + + // Map the file + *buffer = (uint8_t *)MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, fileSize.QuadPart); + if (*buffer == NULL) { + CloseHandle(hMapping); + CloseHandle(hFile); + std::fprintf(stderr, "%s: %d: Failed to map rai file '%s'\n", __func__, __LINE__, path); + return false; + } + *size = fileSize.QuadPart; + return true; +#else + // Open the file + FILE * fd = fopen(path, "rb"); + if (!fd) { + std::fprintf(stderr, "%s: %d: Failed to open rai file '%s'\n", __func__, __LINE__, path); + return false; + } + + // Get the file size + struct stat st; + if (fstat(fileno(fd), &st) == -1) { + fclose(fd); + std::fprintf(stderr, "%s: %d: Failed to get file size for rai file '%s'\n", __func__, __LINE__, path); + return false; + } + + // Mmap the file + *buffer = (uint8_t *)mmap(nullptr, st.st_size, PROT_READ, MAP_SHARED, fileno(fd), 0); + if (*buffer == MAP_FAILED) { + fclose(fd); + std::fprintf(stderr, "%s: %d: Failed to mmap rai file '%s'\n", __func__, __LINE__, path); + return false; + } + *size = st.st_size; + return true; +#endif // _WIN32 +} + +void unmap_rai_file(uint8_t * buffer, size_t size) { +#ifdef _WIN32 + UnmapViewOfFile(buffer); +#else + munmap(buffer, size); +#endif // _WIN32 +} + +struct whisper_vitisai_context * whisper_vitisai_init(const char * path_model) { + if (!path_model) { + std::fprintf(stderr, "%s: path_model is null\n", __func__); + return nullptr; + } + + auto * ctx = new whisper_vitisai_context; + ctx->model_path = path_model; + + // Override the model path with the environment variable if it is set + if (const char * env_model_path = std::getenv("OVERRIDE_VITISAI_MODEL_PATH")) { + if (env_model_path[0] != '\0') { + ctx->model_path = env_model_path; + } + } + + // Step 1: Set up the model + flexmlrt::client::Options options; + options.modelPath = ctx->model_path; + options.deviceName = "stx"; + options.debug = false; + options.executeMode = 2; + options.extOptions["ai_analyzer_profiling"] = true; // Enable AIA profiling + options.extOptions["enable_preemption"] = true; + + // Check if model_path is rai file and if so, add fbs_buffer and fbs_buffer_size to the options + if (ctx->model_path.find(".rai") != std::string::npos) { + // mmap rai file for both Linux and Windows and pass the buffer to the options + ctx->fbs_buffer = nullptr; + ctx->fbs_buffer_size = 0; + if (map_rai_file(ctx->model_path.c_str(), &ctx->fbs_buffer, &ctx->fbs_buffer_size)) { + options.extOptions["fbs_buffer"] = ctx->fbs_buffer; + options.extOptions["fbs_buffer_size"] = ctx->fbs_buffer_size; + options.subgraphName = "vaiml_par_0"; + options.extOptions["cache_dir"] = std::string("."); + } else { + std::fprintf(stderr, "%s: Failed to mmap rai file '%s'\n", __func__, ctx->model_path.c_str()); + delete ctx; + return nullptr; + } + } + + try { + ctx->runner = std::make_shared(options); + + if (!ctx->runner->good()) { + throw std::runtime_error("Runner creation ran into an error"); + } + } catch (const std::exception & e) { + std::fprintf(stderr, "%s: Exception during Vitis AI runner creation: %s\n", __func__, e.what()); + delete ctx; + return nullptr; + } + return ctx; +} + +void whisper_vitisai_free(struct whisper_vitisai_context * ctx) { + if (!ctx) { + return; + } + + std::fprintf(stderr, "%s: releasing Vitis AI encoder context for model '%s'\n", __func__, ctx->model_path.c_str()); + if (ctx->fbs_buffer) { + unmap_rai_file(ctx->fbs_buffer, ctx->fbs_buffer_size); + } + delete ctx; +} + +int whisper_vitisai_encode(struct whisper_vitisai_context * ctx, struct ggml_tensor * mel, struct ggml_tensor * out) { + if (!ctx || !mel || !out) { + std::fprintf(stderr, "%s: ctx/mel/out must not be null\n", __func__); + return 0; + } + + if (ggml_n_dims(mel) != 2) { + std::fprintf(stderr, "%s: mel tensor expected to have 2 dims, got %d\n", __func__, ggml_n_dims(mel)); + return 0; + } + + if (ggml_n_dims(out) != 2) { + std::fprintf(stderr, "%s: out tensor expected to have 2 dims, got %d\n", __func__, ggml_n_dims(out)); + return 0; + } + + // setup input and output tensors for Vitis AI model + std::vector input_tensors, output_tensors; + auto model = ctx->runner; + + // Get tensors as CPU tensors (hwTensor = false) + input_tensors = model->getIOTensors("input", false); + output_tensors = model->getIOTensors("output", false); + + // TODO: add assert checks for tensor numbers and shapes + + input_tensors[0].data = mel->data; + output_tensors[0].data = out->data; + + try { + model->forward(input_tensors, output_tensors); + std::fprintf(stdout, "%s: Vitis AI model inference completed.\n", __func__); + } catch (const std::exception & e) { + std::fprintf(stderr, "%s: Exception during model inference: %s\n", __func__, e.what()); + return 0; + } + + return 1; +} diff --git a/src/vitisai/whisper-vitisai-encoder.h b/src/vitisai/whisper-vitisai-encoder.h new file mode 100644 index 00000000000..05dc812be88 --- /dev/null +++ b/src/vitisai/whisper-vitisai-encoder.h @@ -0,0 +1,32 @@ +// Copyright(C) 2025 Advanced Micro Devices, Inc. All rights reserved. + +#pragma once + +#include +#include +#include + +#if __cplusplus +extern "C" { +#endif + +struct whisper_vitisai_context; + +struct whisper_vitisai_context * whisper_vitisai_init(const char * path_model); +void whisper_vitisai_free(struct whisper_vitisai_context * ctx); + +// Function to mmap rai file for Linux and MapViewOfFile for Windows +bool map_rai_file(const char * path, uint8_t ** buffer, size_t * size); +// Function to unmap rai file for Linux and UnmapViewOfFile for Windows +void unmap_rai_file(uint8_t * buffer, size_t size); + +struct ggml_tensor; + +int whisper_vitisai_encode( + struct whisper_vitisai_context * ctx, + struct ggml_tensor * mel, + struct ggml_tensor * out); + +#if __cplusplus +} +#endif diff --git a/src/whisper.cpp b/src/whisper.cpp index 2f356da0f06..a038a5959ea 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -14,6 +14,10 @@ #include "openvino/whisper-openvino-encoder.h" #endif +#ifdef WHISPER_USE_VITISAI +#include "vitisai/whisper-vitisai-encoder.h" +#endif + #include #include #include @@ -903,6 +907,10 @@ struct whisper_state { whisper_openvino_context * ctx_openvino = nullptr; #endif +#ifdef WHISPER_USE_VITISAI + whisper_vitisai_context * ctx_vitisai = nullptr; +#endif + // [EXPERIMENTAL] token-level timestamps data int64_t t_beg = 0; int64_t t_last = 0; @@ -1970,7 +1978,13 @@ static bool whisper_encode_external(const whisper_state & wstate) { const bool use_openvino = wstate.ctx_openvino != nullptr; #endif - return use_coreml || use_openvino; +#ifndef WHISPER_USE_VITISAI + const bool use_vitisai = false; +#else + const bool use_vitisai = wstate.ctx_vitisai != nullptr; +#endif + + return use_coreml || use_openvino || use_vitisai; } static struct ggml_cgraph * whisper_build_graph_conv( @@ -2411,6 +2425,8 @@ static bool whisper_encode_internal( #if defined(WHISPER_USE_COREML) whisper_coreml_encode(wstate.ctx_coreml, mel->ne[0], mel->ne[1], (float *) mel->data, (float *) wstate.embd_enc->data); +#elif defined(WHISPER_USE_VITISAI) + whisper_vitisai_encode(wstate.ctx_vitisai, mel, wstate.embd_enc); #elif defined(WHISPER_USE_OPENVINO) whisper_openvino_encode(wstate.ctx_openvino, mel, wstate.embd_enc); #endif @@ -3346,6 +3362,20 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) { } #endif +#ifdef WHISPER_USE_VITISAI +// replace extension with Vitis AI encoder artifact (.rai) +static std::string whisper_get_vitisai_path_encoder_cache(std::string path_bin) { + auto pos = path_bin.rfind('.'); + if (pos != std::string::npos) { + path_bin = path_bin.substr(0, pos); + } + + path_bin += "-encoder-vitisai.rai"; + + return path_bin; +} +#endif + #ifdef WHISPER_USE_OPENVINO // replace .bin with-encoder-openvino.xml static std::string whisper_openvino_get_path_encoder(std::string path_bin) { @@ -3455,6 +3485,19 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) { } #endif +#ifdef WHISPER_USE_VITISAI + const auto path_vitisai = whisper_get_vitisai_path_encoder_cache(ctx->path_model); + + state->ctx_vitisai = whisper_vitisai_init(path_vitisai.c_str()); + if (!state->ctx_vitisai) { + WHISPER_LOG_ERROR("%s: failed to load Vitis AI model from '%s'\n", __func__, path_vitisai.c_str()); + whisper_free_state(state); + return nullptr; + } else { + WHISPER_LOG_INFO("%s: Vitis AI model loaded\n", __func__); + } +#endif + state->logits.reserve(ctx->vocab.n_vocab * ctx->model.hparams.n_text_ctx); state->batch = whisper_batch_init(ctx->model.hparams.n_text_ctx, WHISPER_MAX_DECODERS); @@ -3821,6 +3864,13 @@ void whisper_free_state(struct whisper_state * state) { } #endif +#ifdef WHISPER_USE_VITISAI + if (state->ctx_vitisai != nullptr) { + whisper_vitisai_free(state->ctx_vitisai); + state->ctx_vitisai = nullptr; + } +#endif + whisper_batch_free(state->batch); ggml_backend_sched_free(state->sched_conv.sched); @@ -4312,11 +4362,20 @@ static int whisper_has_openvino(void) { #endif } +static int whisper_has_vitisai(void) { +#ifdef WHISPER_USE_VITISAI + return 1; +#else + return 0; +#endif +} + const char * whisper_print_system_info(void) { static std::string s; s = ""; s += "WHISPER : "; + s += "VITISAI = " + std::to_string(whisper_has_vitisai()) + " | "; s += "COREML = " + std::to_string(whisper_has_coreml()) + " | "; s += "OPENVINO = " + std::to_string(whisper_has_openvino()) + " | "; From 46fdb26677ed548e99d02afc977bb7c774836ded Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 9 Jun 2026 14:47:46 -0700 Subject: [PATCH 24/55] workflow: updates, test --- .github/workflows/bindings-go.yml | 22 --- .github/workflows/bindings-ruby.yml | 21 --- .github/workflows/build.yml | 8 +- .github/workflows/docker.yml | 77 ---------- .github/workflows/examples-wasm.yml | 97 ------------ .github/workflows/examples.yml | 48 ------ .github/workflows/runner_heartbeat.yml | 61 -------- .github/workflows/sync.yml | 28 ++-- .github/workflows/test-whisper.yml | 196 +++++++++++++++++++++++++ 9 files changed, 217 insertions(+), 341 deletions(-) delete mode 100644 .github/workflows/bindings-go.yml delete mode 100644 .github/workflows/bindings-ruby.yml delete mode 100644 .github/workflows/docker.yml delete mode 100644 .github/workflows/examples-wasm.yml delete mode 100644 .github/workflows/examples.yml delete mode 100644 .github/workflows/runner_heartbeat.yml create mode 100644 .github/workflows/test-whisper.yml diff --git a/.github/workflows/bindings-go.yml b/.github/workflows/bindings-go.yml deleted file mode 100644 index 83473e4636a..00000000000 --- a/.github/workflows/bindings-go.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: Bindings Tests (Go) -on: - push: - paths: - - bindings/go/** - - whisper.h - pull_request: - paths: - - bindings/go/** - - whisper.h - -jobs: - ubuntu-22: - runs-on: ubuntu-22.04 - steps: - - uses: actions/setup-go@v6 - with: - go-version: '^1.23' - - uses: actions/checkout@v6 - - run: | - cd bindings/go - make test diff --git a/.github/workflows/bindings-ruby.yml b/.github/workflows/bindings-ruby.yml deleted file mode 100644 index c3f158e26e4..00000000000 --- a/.github/workflows/bindings-ruby.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: Bindings Tests (Ruby) - -on: - push: - branches: - - master - pull_request: - types: [opened, synchronize, reopened] - -jobs: - ubuntu-22: - runs-on: ubuntu-22.04 - defaults: - run: - working-directory: bindings/ruby - steps: - - uses: ruby/setup-ruby@v1 - with: - ruby-version: '3.2' - - uses: actions/checkout@v6 - - run: rake test diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 069cb9d9fcc..9850ed6e1d7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -402,7 +402,11 @@ jobs: - name: Validate Vulkan artifacts run: | VFILES=$(find build -type f \( -iname "*vulkan*.so*" -o -iname "*vulkan*" \) 2>/dev/null | wc -l) - [ "$VFILES" -eq 0 ] && echo "::warning::No Vulkan-related artifacts found" + if [ "$VFILES" -eq 0 ]; then + echo "::warning::No Vulkan-related artifacts found" + else + echo "Vulkan artifacts found: $VFILES file(s)" + fi - name: Package run: | @@ -482,7 +486,7 @@ jobs: # 6. NPU (VitisAI / RyzenAI) — Windows only (self-hosted runner) # ════════════════════════════════════════════════════════════════════════════════ windows-npu: - runs-on: [rai-170-sdk, Windows] + runs-on: [stx-halo, Windows] needs: determine-tag continue-on-error: true # runner may be offline; don't block release diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml deleted file mode 100644 index 6c0de0ece70..00000000000 --- a/.github/workflows/docker.yml +++ /dev/null @@ -1,77 +0,0 @@ -name: Publish Docker image - -on: - pull_request: - push: - branches: - - master - -jobs: - push_to_registry: - name: Push Docker image to Docker Hub - if: github.event.pull_request.draft == false - - runs-on: ubuntu-22.04 - env: - COMMIT_SHA: ${{ github.sha }} - strategy: - fail-fast: false - matrix: - config: - - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" } - - { tag: "main-musa", dockerfile: ".devops/main-musa.Dockerfile", platform: "linux/amd64" } - - { tag: "main-intel", dockerfile: ".devops/main-intel.Dockerfile", platform: "linux/amd64" } - - { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" } - - { tag: "main-vulkan", dockerfile: ".devops/main-vulkan.Dockerfile", platform: "linux/amd64" } - - steps: - - name: Check out the repo - uses: actions/checkout@v6 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - with: - image: tonistiigi/binfmt:qemu-v7.0.0-28 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Log in to Docker Hub - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Free up disk space - run: | - sudo apt-get remove -y '^dotnet-.*' '^llvm-.*' '^mysql-.*' '^postgresql-.*' - sudo apt-get autoremove -y - sudo apt-get autoclean - - sudo rm -rf /usr/share/dotnet - sudo rm -rf /usr/local/lib/android - sudo rm -rf /opt/ghc - sudo rm -rf /opt/hostedtoolcache/CodeQL - - docker system prune -af - - df -h - - - name: Generate tags - id: tags - run: | - TAGS="ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}" - if [ "${{ github.event_name }}" == "push" ]; then - TAGS="$TAGS,ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}" - fi - echo "tags=$TAGS" >> $GITHUB_OUTPUT - - - name: Build and push Docker image (tagged) - uses: docker/build-push-action@v6 - with: - context: . - push: ${{ github.event_name == 'push' }} - platforms: ${{ matrix.config.platform }} - tags: ${{ steps.tags.outputs.tags }} - file: ${{ matrix.config.dockerfile }} diff --git a/.github/workflows/examples-wasm.yml b/.github/workflows/examples-wasm.yml deleted file mode 100644 index 927438cdad8..00000000000 --- a/.github/workflows/examples-wasm.yml +++ /dev/null @@ -1,97 +0,0 @@ -name: Examples WASM -on: - push: - branches: ["master"] - - workflow_dispatch: - -permissions: - contents: read - pages: write - id-token: write - -concurrency: - group: "pages" - cancel-in-progress: false - -jobs: - deploy-wasm-github-pages: - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v6 - - - name: Setup Pages - uses: actions/configure-pages@v5 - - - name: Setup emsdk - uses: mymindstorm/setup-emsdk@v14 - - - name: Build WASM Examples - # Enable for real build later in whisper.cpp - run: | - mkdir -p build-em && cd build-em - emcmake cmake .. -DCMAKE_BUILD_TYPE=Release - make -j - - - name: Create staging directory - run: mkdir -p staging - - - name: Create .nojekyll file in staging directory - run: touch staging/.nojekyll - - - name: Copy application files - run: | - build_dir=build-em/bin - - ls ${build_dir} - - # command.wasm - target_dir=staging/command.wasm - mkdir -p ${target_dir} - cp ${build_dir}/command.wasm/{index.html,command.js,helpers.js} ${target_dir} - cp ${build_dir}/libcommand.js ${target_dir} - - # bench.wasm - target_dir=staging/bench.wasm - mkdir -p ${target_dir} - cp ${build_dir}/bench.wasm/{index.html,bench.js,helpers.js} ${target_dir} - cp ${build_dir}/libbench.js ${target_dir} - - # stream.wasm - target_dir=staging/stream.wasm - mkdir -p ${target_dir} - cp ${build_dir}/stream.wasm/{index.html,stream.js,helpers.js} ${target_dir} - cp ${build_dir}/libstream.js ${target_dir} - - # wchess.wasm - target_dir=staging/wchess.wasm - mkdir -p ${target_dir} - cp -r ${build_dir}/wchess.wasm/{index.html,css,img,js} ${target_dir} - cp ${build_dir}/wchess.wasm.js ${target_dir} - - # whisper.wasm (this will be the main example page) - target_dir=staging - mkdir -p ${target_dir} - cp ${build_dir}/whisper.wasm/{index.html,main.js,helpers.js} ${target_dir} - cp ${build_dir}/libmain.js ${target_dir} - - # Copy Cross-Origin Isolation service worker - cp -v examples/coi-serviceworker.js staging/ - - - name: List files in staging directory (for debugging) - run: | - echo "Files in staging directory:" - find staging -type f | sort - - - name: Upload artifact - uses: actions/upload-pages-artifact@v4 - with: - path: ./staging - - - name: Deploy to GitHub Pages - id: deployment - uses: actions/deploy-pages@v4 diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml deleted file mode 100644 index 1c9ade5a300..00000000000 --- a/.github/workflows/examples.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: Examples Tests -on: - push: - paths: - - examples/addon.node/** - - whisper.h - pull_request: - paths: - - examples/addon.node/** - - whisper.h - -jobs: - addon_node-ubuntu-22: - runs-on: ubuntu-22.04 - strategy: - matrix: - node-version: [ 16.x, 18.x ] - steps: - - name: Clone - uses: actions/checkout@v6 - - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install build-essential git - sudo apt-get install cmake - sudo apt-get install libsdl2-dev - - - name: Use Node.js ${{ matrix.node-version }} - uses: actions/setup-node@v6 - with: - node-version: ${{ matrix.node-version }} - cache: 'npm' - - - name: Install package.json dependencies - working-directory: ./examples/addon.node - run: npm install - - - name: Compile addon.node - run: npx cmake-js compile -T addon.node -B Release - - - name: Download test model - run: | - bash ./models/download-ggml-model.sh base.en - - name: Test - run: | - cd examples/addon.node - npm run test diff --git a/.github/workflows/runner_heartbeat.yml b/.github/workflows/runner_heartbeat.yml deleted file mode 100644 index de133378750..00000000000 --- a/.github/workflows/runner_heartbeat.yml +++ /dev/null @@ -1,61 +0,0 @@ -name: Runner Heartbeat - -on: - schedule: - - cron: '0 */6 * * *' - workflow_dispatch: - -jobs: - check-rocm-linux: - strategy: - fail-fast: false - matrix: - include: - # Uncomment when self-hosted runners are registered: - # - runner: [rai300_400, Linux] - # name: rai300-400-linux - - runner: [stx-halo, Linux] - name: stx-halo-linux - runs-on: ${{ matrix.runner }} - timeout-minutes: 10 - steps: - - name: Heartbeat - run: | - echo "=== Runner Heartbeat: ${{ matrix.name }} ===" - echo "Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" - echo "Hostname: $(hostname)" - echo "=== GPU Status ===" - rocm-smi 2>/dev/null || echo "rocm-smi not available" - echo "=== Disk Space ===" - df -h / /mnt 2>/dev/null || df -h / - echo "=== Memory ===" - free -h - echo "=== ROCm Version ===" - cat /opt/rocm/.info/version 2>/dev/null || echo "ROCm version file not found" - - # Uncomment when Windows self-hosted runners are registered: - # check-rocm-windows: - # strategy: - # fail-fast: false - # matrix: - # include: - # - runner: [rai300_400, Windows] - # name: rai300-400-windows - # - runner: [stx-halo, Windows] - # name: stx-halo-windows - # runs-on: ${{ matrix.runner }} - # timeout-minutes: 10 - # steps: - # - name: Heartbeat - # shell: pwsh - # run: | - # Write-Host "=== Runner Heartbeat: ${{ matrix.name }} ===" - # Write-Host "Timestamp: $(Get-Date -Format o)" - # Write-Host "Hostname: $env:COMPUTERNAME" - # Write-Host "=== GPU Status ===" - # & "$env:HIP_PATH\bin\rocm-smi.exe" 2>$null - # Write-Host "=== Disk Space ===" - # Get-PSDrive -PSProvider FileSystem | Format-Table Name, Used, Free -AutoSize - # Write-Host "=== Memory ===" - # $os = Get-CimInstance Win32_OperatingSystem - # Write-Host "Free: $([math]::Round($os.FreePhysicalMemory/1MB, 1)) GB / Total: $([math]::Round($os.TotalVisibleMemorySize/1MB, 1)) GB" diff --git a/.github/workflows/sync.yml b/.github/workflows/sync.yml index 9333aa774e1..b4d1411518a 100644 --- a/.github/workflows/sync.yml +++ b/.github/workflows/sync.yml @@ -105,24 +105,26 @@ jobs: git commit -m "chore: merge upstream ${{ steps.upstream.outputs.tag }} (auto-resolved via theirs)" --allow-empty git push origin "$BRANCH" - gh pr create \ - --title "Sync upstream ${{ steps.upstream.outputs.tag }} — conflict resolution needed" \ - --body "## Upstream sync: ${{ steps.upstream.outputs.tag }} + TAG="${{ steps.upstream.outputs.tag }}" + cat > /tmp/pr-body.md << EOF +## Upstream sync: ${TAG} Conflicts were detected during automatic merge. Files affected: -\`\`\` -$CONFLICT_FILES -\`\`\` + ${CONFLICT_FILES} + +This PR was auto-resolved using upstream (theirs) as a baseline. +Please review the diff carefully before merging. -**This PR was auto-resolved using upstream (theirs) as a baseline — please review the diff carefully before merging.** +Once merged, manually create the release tag on main to trigger the build: -Once merged, manually create tag \`${{ steps.upstream.outputs.tag }}\` on main to trigger the release build: -\`\`\`bash -git tag ${{ steps.upstream.outputs.tag }} -git push origin ${{ steps.upstream.outputs.tag }} -\`\`\` -" \ + git tag ${TAG} + git push origin ${TAG} +EOF + + gh pr create \ + --title "Sync upstream ${TAG} - conflict resolution needed" \ + --body-file /tmp/pr-body.md \ --base main \ --head "$BRANCH" diff --git a/.github/workflows/test-whisper.yml b/.github/workflows/test-whisper.yml new file mode 100644 index 00000000000..dfd650ebb3e --- /dev/null +++ b/.github/workflows/test-whisper.yml @@ -0,0 +1,196 @@ +name: Test whisper-cli + +# Tests the whisper-cli binary from completed release builds. +# Three trigger modes: +# workflow_run - auto-runs after "AMD Build & Release" succeeds, downloads artifacts +# workflow_dispatch - manual, supply the build run-id to download artifacts from +# pull_request - builds from source (no artifact available yet) and tests + +on: + workflow_run: + workflows: ["AMD Build & Release"] + types: [completed] + workflow_dispatch: + inputs: + run_id: + description: 'build.yml run ID to download artifacts from (found in the Actions tab URL)' + required: true + type: string + pull_request: + paths: + - 'src/**' + - 'ggml/**' + - 'CMakeLists.txt' + - '**.cmake' + +permissions: + actions: read + contents: read + +jobs: + + # ------------------------------------------------------------------------- + # Test real release artifact on Linux (CPU build) + # Only runs when triggered by a successful build.yml run or manual dispatch + # ------------------------------------------------------------------------- + test-linux-artifact: + if: > + (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || + github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + + steps: + - name: Checkout (for samples/ and models/ scripts) + uses: actions/checkout@v4 + + - name: Download linux-cpu artifact + uses: actions/download-artifact@v4 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + run-id: ${{ github.event.inputs.run_id || github.event.workflow_run.id }} + pattern: 'whisper-*-linux-cpu-x86_64.tar.gz' + merge-multiple: true + path: ./artifact + + - name: Extract artifact + run: | + mkdir -p ./bin + TARBALL=$(ls artifact/whisper-*-linux-cpu-x86_64.tar.gz | head -1) + echo "Extracting: $TARBALL" + tar -xzf "$TARBALL" --strip-components=1 -C ./bin + chmod +x ./bin/whisper-cli + echo "Binaries in ./bin:" + ls -lh ./bin/whisper-cli + + - name: Download tiny model + run: | + mkdir -p models + bash ./models/download-ggml-model.sh tiny + + - name: Run transcription + run: | + ./bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result + echo "--- Transcription output ---" + cat jfk-result.txt + + - name: Verify transcription correctness + run: | + if grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt; then + echo "PASS: transcription contains expected words" + else + echo "FAIL: expected words not found in transcription" + echo "--- Full output ---" + cat jfk-result.txt + exit 1 + fi + + # ------------------------------------------------------------------------- + # Test real release artifact on Windows (CPU build) + # ------------------------------------------------------------------------- + test-windows-artifact: + if: > + (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || + github.event_name == 'workflow_dispatch' + runs-on: windows-latest + + steps: + - name: Checkout (for samples/) + uses: actions/checkout@v4 + + - name: Download windows-cpu artifact + uses: actions/download-artifact@v4 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + run-id: ${{ github.event.inputs.run_id || github.event.workflow_run.id }} + pattern: 'whisper-*-windows-cpu-x64.zip' + merge-multiple: true + path: ./artifact + + - name: Extract artifact + shell: pwsh + run: | + New-Item -ItemType Directory -Force -Path ./bin | Out-Null + $zip = Get-ChildItem artifact -Filter "whisper-*-windows-cpu-x64.zip" | Select-Object -First 1 + Write-Host "Extracting: $($zip.FullName)" + Expand-Archive -Path $zip.FullName -DestinationPath ./bin -Force + Write-Host "Binaries:" + Get-ChildItem ./bin -Filter "*.exe" | Format-Table Name, Length + + - name: Download tiny model + shell: pwsh + run: | + New-Item -ItemType Directory -Force -Path models | Out-Null + $url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" + Write-Host "Downloading ggml-tiny.bin..." + Invoke-WebRequest -Uri $url -OutFile "models\ggml-tiny.bin" + $mb = [math]::Round((Get-Item "models\ggml-tiny.bin").Length / 1MB, 2) + Write-Host "Downloaded: $mb MB" + + - name: Run transcription + shell: pwsh + run: | + .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result + Write-Host "--- Transcription output ---" + Get-Content jfk-result.txt + + - name: Verify transcription correctness + shell: pwsh + run: | + $text = Get-Content "jfk-result.txt" -Raw -ErrorAction SilentlyContinue + if (-not $text) { Write-Error "jfk-result.txt is empty or missing"; exit 1 } + if ($text -match "country|ask not|nation|kennedy") { + Write-Host "PASS: transcription contains expected words" -ForegroundColor Green + } else { + Write-Error "FAIL: expected words not found in transcription" + Write-Host "--- Full output ---" + Write-Host $text + exit 1 + } + + # ------------------------------------------------------------------------- + # Build from source and test (pull_request only - no artifact available yet) + # ------------------------------------------------------------------------- + test-from-source: + if: github.event_name == 'pull_request' + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install build dependencies + run: | + sudo apt-get update + sudo apt-get install -y build-essential cmake git libsdl2-dev pkg-config + + - name: Build from source + run: | + cmake -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DWHISPER_BUILD_EXAMPLES=ON \ + -DWHISPER_BUILD_TESTS=OFF \ + -DWHISPER_BUILD_SERVER=OFF + cmake --build build --config Release -j$(nproc) + echo "Built whisper-cli:" + ls -lh build/bin/whisper-cli + + - name: Download tiny model + run: | + mkdir -p models + bash ./models/download-ggml-model.sh tiny + + - name: Run transcription + run: | + ./build/bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result + echo "--- Transcription output ---" + cat jfk-result.txt + + - name: Verify transcription correctness + run: | + if grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt; then + echo "PASS: transcription contains expected words" + else + echo "FAIL: expected words not found in transcription" + cat jfk-result.txt + exit 1 + fi From 56844c2b2824be497c050af22b48bd6a009050a9 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 9 Jun 2026 16:26:22 -0700 Subject: [PATCH 25/55] added NPU runner --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9850ed6e1d7..247bad410b5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -486,7 +486,7 @@ jobs: # 6. NPU (VitisAI / RyzenAI) — Windows only (self-hosted runner) # ════════════════════════════════════════════════════════════════════════════════ windows-npu: - runs-on: [stx-halo, Windows] + runs-on: [self-hosted, Windows, rai300_400] needs: determine-tag continue-on-error: true # runner may be offline; don't block release From c7aba15b37105167dafbe63a72951d66a1fd2c5a Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 9 Jun 2026 16:31:12 -0700 Subject: [PATCH 26/55] added NPU runner --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 247bad410b5..07274e0498d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -486,7 +486,7 @@ jobs: # 6. NPU (VitisAI / RyzenAI) — Windows only (self-hosted runner) # ════════════════════════════════════════════════════════════════════════════════ windows-npu: - runs-on: [self-hosted, Windows, rai300_400] + runs-on: [self-hosted, Windows, stx-halo] needs: determine-tag continue-on-error: true # runner may be offline; don't block release From 697a6519f2d7b30015e17adb71e9e28b2a1185f3 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 9 Jun 2026 17:28:11 -0700 Subject: [PATCH 27/55] runner: Update NPU runner --- .github/workflows/build.yml | 19 +- .github/workflows/test-whisper.yml | 324 ++++++++++++++++++----------- 2 files changed, 209 insertions(+), 134 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 07274e0498d..b0c111fb6a9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -36,23 +36,12 @@ on: type: string default: '7.12.0' push: - branches: - - main - - master - paths: - - '.github/workflows/build.yml' - - '**/CMakeLists.txt' - - '**/*.cmake' - - '**/*.h' - - '**/*.hpp' - - '**/*.c' - - '**/*.cpp' - - '**/*.cu' - - '**/*.cuh' - - '**/*.comp' tags: - 'v*' pull_request: + branches: + - master + - main types: [opened, synchronize, reopened] concurrency: @@ -486,7 +475,7 @@ jobs: # 6. NPU (VitisAI / RyzenAI) — Windows only (self-hosted runner) # ════════════════════════════════════════════════════════════════════════════════ windows-npu: - runs-on: [self-hosted, Windows, stx-halo] + runs-on: [self-hosted, Windows, rai300_400] needs: determine-tag continue-on-error: true # runner may be offline; don't block release diff --git a/.github/workflows/test-whisper.yml b/.github/workflows/test-whisper.yml index dfd650ebb3e..3a9d2047ff3 100644 --- a/.github/workflows/test-whisper.yml +++ b/.github/workflows/test-whisper.yml @@ -1,120 +1,104 @@ name: Test whisper-cli -# Tests the whisper-cli binary from completed release builds. -# Three trigger modes: -# workflow_run - auto-runs after "AMD Build & Release" succeeds, downloads artifacts -# workflow_dispatch - manual, supply the build run-id to download artifacts from -# pull_request - builds from source (no artifact available yet) and tests +# Downloads a published release artifact and runs whisper-cli against jfk.wav. +# Tests on real self-hosted GPU hardware (stx-halo runners). +# Trigger manually after a release, or let it run automatically via workflow_dispatch +# from build.yml once artifacts are published. on: - workflow_run: - workflows: ["AMD Build & Release"] - types: [completed] workflow_dispatch: inputs: - run_id: - description: 'build.yml run ID to download artifacts from (found in the Actions tab URL)' - required: true + release_tag: + description: 'Release tag to test (e.g. v1.8.4) or "latest"' + required: false + default: 'latest' + type: string + gfx_target: + description: 'ROCm GPU target to test' + required: false + default: 'gfx1151' type: string - pull_request: - paths: - - 'src/**' - - 'ggml/**' - - 'CMakeLists.txt' - - '**.cmake' -permissions: - actions: read - contents: read +env: + RELEASE_TAG: ${{ github.event.inputs.release_tag || 'latest' }} + GFX_TARGET: ${{ github.event.inputs.gfx_target || 'gfx1151' }} jobs: - # ------------------------------------------------------------------------- - # Test real release artifact on Linux (CPU build) - # Only runs when triggered by a successful build.yml run or manual dispatch - # ------------------------------------------------------------------------- - test-linux-artifact: - if: > - (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || - github.event_name == 'workflow_dispatch' + # --------------------------------------------------------------------------- + # Resolve release tag (latest or specific) + # --------------------------------------------------------------------------- + prepare: runs-on: ubuntu-latest - + outputs: + release_tag: ${{ steps.resolve.outputs.release_tag }} steps: - - name: Checkout (for samples/ and models/ scripts) - uses: actions/checkout@v4 - - - name: Download linux-cpu artifact - uses: actions/download-artifact@v4 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - run-id: ${{ github.event.inputs.run_id || github.event.workflow_run.id }} - pattern: 'whisper-*-linux-cpu-x86_64.tar.gz' - merge-multiple: true - path: ./artifact - - - name: Extract artifact + - name: Resolve release tag + id: resolve + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - mkdir -p ./bin - TARBALL=$(ls artifact/whisper-*-linux-cpu-x86_64.tar.gz | head -1) - echo "Extracting: $TARBALL" - tar -xzf "$TARBALL" --strip-components=1 -C ./bin - chmod +x ./bin/whisper-cli - echo "Binaries in ./bin:" - ls -lh ./bin/whisper-cli - - - name: Download tiny model - run: | - mkdir -p models - bash ./models/download-ggml-model.sh tiny - - - name: Run transcription - run: | - ./bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result - echo "--- Transcription output ---" - cat jfk-result.txt - - - name: Verify transcription correctness - run: | - if grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt; then - echo "PASS: transcription contains expected words" + if [ "${{ env.RELEASE_TAG }}" = "latest" ]; then + TAG=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/${{ github.repository }}/releases/latest" \ + | jq -r '.tag_name') + echo "Resolved latest release: $TAG" else - echo "FAIL: expected words not found in transcription" - echo "--- Full output ---" - cat jfk-result.txt - exit 1 + TAG="${{ env.RELEASE_TAG }}" + STATUS=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/${{ github.repository }}/releases/tags/$TAG") + if [ "$STATUS" != "200" ]; then + echo "Error: Release $TAG not found (HTTP $STATUS)" + exit 1 + fi + echo "Using specified release: $TAG" fi + echo "release_tag=$TAG" >> $GITHUB_OUTPUT - # ------------------------------------------------------------------------- - # Test real release artifact on Windows (CPU build) - # ------------------------------------------------------------------------- - test-windows-artifact: - if: > - (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') || - github.event_name == 'workflow_dispatch' - runs-on: windows-latest + # --------------------------------------------------------------------------- + # Test ROCm artifact on Windows (self-hosted stx-halo GPU runner) + # --------------------------------------------------------------------------- + test-windows-rocm: + runs-on: [self-hosted, Windows, stx-halo] + needs: prepare steps: - - name: Checkout (for samples/) + - name: Checkout (for samples/jfk.wav and models/ scripts) uses: actions/checkout@v4 - - name: Download windows-cpu artifact - uses: actions/download-artifact@v4 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - run-id: ${{ github.event.inputs.run_id || github.event.workflow_run.id }} - pattern: 'whisper-*-windows-cpu-x64.zip' - merge-multiple: true - path: ./artifact - - - name: Extract artifact + - name: Download ROCm Windows artifact + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} shell: pwsh run: | - New-Item -ItemType Directory -Force -Path ./bin | Out-Null - $zip = Get-ChildItem artifact -Filter "whisper-*-windows-cpu-x64.zip" | Select-Object -First 1 - Write-Host "Extracting: $($zip.FullName)" - Expand-Archive -Path $zip.FullName -DestinationPath ./bin -Force + $tag = "${{ needs.prepare.outputs.release_tag }}" + $target = "${{ env.GFX_TARGET }}" + # strip leading 'v' for artifact filename (e.g. v1.8.4 -> 1.8.4) + $ver = $tag.TrimStart('v') + $asset = "whisper-${ver}-windows-rocm-${target}.zip" + $repo = "${{ github.repository }}" + + Write-Host "Downloading: $asset from release $tag" + + $headers = @{ "Authorization" = "token $env:GITHUB_TOKEN" } + $release = Invoke-RestMethod -Uri "https://api.github.com/repos/$repo/releases/tags/$tag" -Headers $headers + $found = $release.assets | Where-Object { $_.name -eq $asset } + + if (-not $found) { + Write-Error "Asset '$asset' not found in release '$tag'" + Write-Host "Available assets:" + $release.assets | ForEach-Object { Write-Host " $($_.name)" } + exit 1 + } + + Write-Host "Found: $($found.name) ($([math]::Round($found.size/1MB,2)) MB)" + Invoke-WebRequest -Uri $found.browser_download_url -OutFile $asset -Headers $headers + + Write-Host "Extracting..." + Expand-Archive -Path $asset -DestinationPath whisper-bin -Force Write-Host "Binaries:" - Get-ChildItem ./bin -Filter "*.exe" | Format-Table Name, Length + Get-ChildItem whisper-bin -Filter "*.exe" | Format-Table Name, Length - name: Download tiny model shell: pwsh @@ -123,17 +107,24 @@ jobs: $url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" Write-Host "Downloading ggml-tiny.bin..." Invoke-WebRequest -Uri $url -OutFile "models\ggml-tiny.bin" - $mb = [math]::Round((Get-Item "models\ggml-tiny.bin").Length / 1MB, 2) + $mb = [math]::Round((Get-Item "models\ggml-tiny.bin").Length/1MB,2) Write-Host "Downloaded: $mb MB" - name: Run transcription shell: pwsh run: | - .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result + $cli = "whisper-bin\whisper-cli.exe" + if (-not (Test-Path $cli)) { + Write-Error "whisper-cli.exe not found. Contents of whisper-bin:" + Get-ChildItem -Recurse whisper-bin | Format-Table Name, Length + exit 1 + } + Write-Host "Running whisper-cli against samples\jfk.wav ..." + & $cli -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result Write-Host "--- Transcription output ---" Get-Content jfk-result.txt - - name: Verify transcription correctness + - name: Verify transcription shell: pwsh run: | $text = Get-Content "jfk-result.txt" -Raw -ErrorAction SilentlyContinue @@ -142,50 +133,84 @@ jobs: Write-Host "PASS: transcription contains expected words" -ForegroundColor Green } else { Write-Error "FAIL: expected words not found in transcription" - Write-Host "--- Full output ---" Write-Host $text exit 1 } - # ------------------------------------------------------------------------- - # Build from source and test (pull_request only - no artifact available yet) - # ------------------------------------------------------------------------- - test-from-source: - if: github.event_name == 'pull_request' - runs-on: ubuntu-latest + # --------------------------------------------------------------------------- + # Test ROCm artifact on Linux (self-hosted stx-halo GPU runner) + # --------------------------------------------------------------------------- + test-linux-rocm: + runs-on: [self-hosted, Linux, stx-halo] + needs: prepare steps: - - name: Checkout + - name: Checkout (for samples/jfk.wav) uses: actions/checkout@v4 - - name: Install build dependencies + - name: Install jq if needed run: | - sudo apt-get update - sudo apt-get install -y build-essential cmake git libsdl2-dev pkg-config + if ! command -v jq &>/dev/null; then + mkdir -p ~/bin + curl -L -o ~/bin/jq https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64 + chmod +x ~/bin/jq + echo "$HOME/bin" >> $GITHUB_PATH + fi - - name: Build from source + - name: Download ROCm Linux artifact + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - cmake -B build \ - -DCMAKE_BUILD_TYPE=Release \ - -DWHISPER_BUILD_EXAMPLES=ON \ - -DWHISPER_BUILD_TESTS=OFF \ - -DWHISPER_BUILD_SERVER=OFF - cmake --build build --config Release -j$(nproc) - echo "Built whisper-cli:" - ls -lh build/bin/whisper-cli + TAG="${{ needs.prepare.outputs.release_tag }}" + TARGET="${{ env.GFX_TARGET }}" + VER="${TAG#v}" + ASSET="whisper-${VER}-linux-rocm-${TARGET}.tar.gz" + REPO="${{ github.repository }}" + + echo "Downloading: $ASSET from release $TAG" + + RELEASE=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$REPO/releases/tags/$TAG") + + URL=$(echo "$RELEASE" | jq -r ".assets[] | select(.name == \"$ASSET\") | .browser_download_url") + + if [ -z "$URL" ] || [ "$URL" = "null" ]; then + echo "Asset '$ASSET' not found in release '$TAG'" + echo "Available assets:" + echo "$RELEASE" | jq -r '.assets[].name' + exit 1 + fi + + SIZE=$(echo "$RELEASE" | jq -r ".assets[] | select(.name == \"$ASSET\") | .size") + echo "Found: $ASSET ($SIZE bytes)" + + curl -L -H "Authorization: token $GITHUB_TOKEN" -o "$ASSET" "$URL" + + echo "Extracting..." + mkdir -p whisper-bin + tar -xzf "$ASSET" --strip-components=1 -C whisper-bin + chmod +x whisper-bin/whisper-cli + echo "Binaries:" + ls -lh whisper-bin/whisper-cli - name: Download tiny model run: | mkdir -p models - bash ./models/download-ggml-model.sh tiny + curl -L -o models/ggml-tiny.bin \ + "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" + echo "Downloaded: $(du -h models/ggml-tiny.bin | cut -f1)" + + - name: Set library path + run: echo "LD_LIBRARY_PATH=$(pwd)/whisper-bin:$LD_LIBRARY_PATH" >> $GITHUB_ENV - name: Run transcription run: | - ./build/bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result + echo "Running whisper-cli against samples/jfk.wav ..." + ./whisper-bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result echo "--- Transcription output ---" cat jfk-result.txt - - name: Verify transcription correctness + - name: Verify transcription run: | if grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt; then echo "PASS: transcription contains expected words" @@ -194,3 +219,64 @@ jobs: cat jfk-result.txt exit 1 fi + + # --------------------------------------------------------------------------- + # Test CPU artifact on Linux (GitHub-hosted runner - no GPU needed) + # --------------------------------------------------------------------------- + test-linux-cpu: + runs-on: ubuntu-latest + needs: prepare + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Download CPU Linux artifact + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + TAG="${{ needs.prepare.outputs.release_tag }}" + VER="${TAG#v}" + ASSET="whisper-${VER}-linux-cpu-x86_64.tar.gz" + REPO="${{ github.repository }}" + + echo "Downloading: $ASSET from release $TAG" + + RELEASE=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$REPO/releases/tags/$TAG") + + URL=$(echo "$RELEASE" | jq -r ".assets[] | select(.name == \"$ASSET\") | .browser_download_url") + + if [ -z "$URL" ] || [ "$URL" = "null" ]; then + echo "Asset '$ASSET' not found. Available:" + echo "$RELEASE" | jq -r '.assets[].name' + exit 1 + fi + + curl -L -H "Authorization: token $GITHUB_TOKEN" -o "$ASSET" "$URL" + mkdir -p whisper-bin + tar -xzf "$ASSET" --strip-components=1 -C whisper-bin + chmod +x whisper-bin/whisper-cli + ls -lh whisper-bin/whisper-cli + + - name: Download tiny model + run: | + mkdir -p models + curl -L -o models/ggml-tiny.bin \ + "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" + + - name: Run transcription + run: | + ./whisper-bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result + echo "--- Transcription output ---" + cat jfk-result.txt + + - name: Verify transcription + run: | + if grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt; then + echo "PASS: transcription contains expected words" + else + echo "FAIL: expected words not found" + cat jfk-result.txt + exit 1 + fi From 00c9258db14969c5cd298a21e381e0dbad4bc4df Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 9 Jun 2026 17:59:29 -0700 Subject: [PATCH 28/55] workflow: updated with specific tests --- .github/workflows/build.yml | 320 +++++++++++++++++++++++++++++++++++- 1 file changed, 312 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b0c111fb6a9..59e7d42b330 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -236,7 +236,6 @@ jobs: echo "ARCHIVE=$ARCHIVE" >> $GITHUB_ENV - uses: actions/upload-artifact@v4 - if: ${{ needs.determine-tag.outputs.should_release == 'true' }} with: name: ${{ env.ARCHIVE }} path: ${{ env.ARCHIVE }} @@ -349,7 +348,6 @@ jobs: "ARCHIVE=$a" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 - uses: actions/upload-artifact@v4 - if: ${{ needs.determine-tag.outputs.should_release == 'true' }} with: name: ${{ env.ARCHIVE }} path: ${{ env.ARCHIVE }} @@ -409,7 +407,6 @@ jobs: echo "ARCHIVE=$ARCHIVE" >> $GITHUB_ENV - uses: actions/upload-artifact@v4 - if: ${{ needs.determine-tag.outputs.should_release == 'true' }} with: name: ${{ env.ARCHIVE }} path: ${{ env.ARCHIVE }} @@ -466,7 +463,6 @@ jobs: "ARCHIVE=$a" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 - uses: actions/upload-artifact@v4 - if: ${{ needs.determine-tag.outputs.should_release == 'true' }} with: name: ${{ env.ARCHIVE }} path: ${{ env.ARCHIVE }} @@ -566,7 +562,6 @@ jobs: Write-Host "NPU build complete. Artifact: $env:ARCHIVE" - uses: actions/upload-artifact@v4 - if: ${{ needs.determine-tag.outputs.should_release == 'true' }} with: name: ${{ env.ARCHIVE }} path: ${{ env.ARCHIVE }} @@ -618,7 +613,6 @@ jobs: echo "ARCHIVE=$ARCHIVE" >> $GITHUB_ENV - uses: actions/upload-artifact@v4 - if: ${{ needs.determine-tag.outputs.should_release == 'true' }} with: name: ${{ env.ARCHIVE }} path: ${{ env.ARCHIVE }} @@ -665,16 +659,319 @@ jobs: "ARCHIVE=$a" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 - uses: actions/upload-artifact@v4 - if: ${{ needs.determine-tag.outputs.should_release == 'true' }} with: name: ${{ env.ARCHIVE }} path: ${{ env.ARCHIVE }} # ════════════════════════════════════════════════════════════════════════════════ # 9. Publish GitHub Release +# ════════════════════════════════════════════════════════════════════════════════ +# Shared model download step (reused across all test jobs via inline steps) +# Models: ggml-tiny.bin from HuggingFace ggerganov/whisper.cpp +# ggml-tiny-encoder-vitisai.rai from amd/whisper-tiny-onnx-npu +# ════════════════════════════════════════════════════════════════════════════════ + +# ════════════════════════════════════════════════════════════════════════════════ +# 9. Test — CPU Windows (GitHub-hosted, no GPU needed) +# ════════════════════════════════════════════════════════════════════════════════ + test-cpu-windows: + runs-on: windows-latest + needs: [determine-tag, windows-cpu] + if: needs.windows-cpu.result == 'success' + continue-on-error: true + steps: + - uses: actions/checkout@v4 + + - name: Download artifact + uses: actions/download-artifact@v4 + with: + name: whisper-${{ needs.determine-tag.outputs.version }}-windows-cpu-x64.zip + path: ./artifact + + - name: Extract + shell: pwsh + run: | + New-Item -ItemType Directory -Force -Path bin | Out-Null + Expand-Archive -Path (Get-ChildItem artifact -Filter "*.zip" | Select-Object -First 1).FullName -DestinationPath bin -Force + + - name: Download tiny model + shell: pwsh + run: | + New-Item -ItemType Directory -Force -Path models | Out-Null + Invoke-WebRequest -Uri "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -OutFile "models\ggml-tiny.bin" + + - name: Run and verify + shell: pwsh + run: | + .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result + $text = Get-Content "jfk-result.txt" -Raw + Write-Host $text + if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS" -ForegroundColor Green } + else { Write-Error "FAIL: expected words not found"; exit 1 } + +# ════════════════════════════════════════════════════════════════════════════════ +# 10. Test — CPU Linux (GitHub-hosted, no GPU needed) +# ════════════════════════════════════════════════════════════════════════════════ + test-cpu-linux: + runs-on: ubuntu-latest + needs: [determine-tag, linux-cpu] + if: needs.linux-cpu.result == 'success' + continue-on-error: true + steps: + - uses: actions/checkout@v4 + + - name: Download artifact + uses: actions/download-artifact@v4 + with: + name: whisper-${{ needs.determine-tag.outputs.version }}-linux-cpu-x86_64.tar.gz + path: ./artifact + + - name: Extract + run: | + mkdir -p bin + tar -xzf artifact/*.tar.gz --strip-components=1 -C bin + chmod +x bin/whisper-cli + + - name: Download tiny model + run: | + mkdir -p models + curl -L -o models/ggml-tiny.bin "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" + + - name: Run and verify + run: | + LD_LIBRARY_PATH=$(pwd)/bin:$LD_LIBRARY_PATH ./bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result + cat jfk-result.txt + grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt && echo "PASS" || { echo "FAIL"; exit 1; } + +# ════════════════════════════════════════════════════════════════════════════════ +# 11. Test — Vulkan Windows (stx-halo, has Vulkan driver) +# ════════════════════════════════════════════════════════════════════════════════ + test-vulkan-windows: + runs-on: [self-hosted, Windows, stx-halo] + needs: [determine-tag, windows-vulkan] + if: needs.windows-vulkan.result == 'success' + continue-on-error: true + steps: + - uses: actions/checkout@v4 + + - name: Download artifact + uses: actions/download-artifact@v4 + with: + name: whisper-${{ needs.determine-tag.outputs.version }}-windows-vulkan-x64.zip + path: ./artifact + + - name: Extract + shell: pwsh + run: | + New-Item -ItemType Directory -Force -Path bin | Out-Null + Expand-Archive -Path (Get-ChildItem artifact -Filter "*.zip" | Select-Object -First 1).FullName -DestinationPath bin -Force + + - name: Download tiny model + shell: pwsh + run: | + New-Item -ItemType Directory -Force -Path models | Out-Null + Invoke-WebRequest -Uri "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -OutFile "models\ggml-tiny.bin" + + - name: Run and verify + shell: pwsh + run: | + .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result + $text = Get-Content "jfk-result.txt" -Raw + Write-Host $text + if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS" -ForegroundColor Green } + else { Write-Error "FAIL: expected words not found"; exit 1 } + +# ════════════════════════════════════════════════════════════════════════════════ +# 12. Test — Vulkan Linux (stx-halo, has Vulkan driver) +# ════════════════════════════════════════════════════════════════════════════════ + test-vulkan-linux: + runs-on: [self-hosted, Linux, stx-halo] + needs: [determine-tag, linux-vulkan] + if: needs.linux-vulkan.result == 'success' + continue-on-error: true + steps: + - uses: actions/checkout@v4 + + - name: Download artifact + uses: actions/download-artifact@v4 + with: + name: whisper-${{ needs.determine-tag.outputs.version }}-linux-vulkan-x86_64.tar.gz + path: ./artifact + + - name: Extract + run: | + mkdir -p bin + tar -xzf artifact/*.tar.gz --strip-components=1 -C bin + chmod +x bin/whisper-cli + + - name: Download tiny model + run: | + mkdir -p models + curl -L -o models/ggml-tiny.bin "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" + + - name: Run and verify + run: | + LD_LIBRARY_PATH=$(pwd)/bin:$LD_LIBRARY_PATH ./bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result + cat jfk-result.txt + grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt && echo "PASS" || { echo "FAIL"; exit 1; } + +# ════════════════════════════════════════════════════════════════════════════════ +# 13. Test — ROCm Windows (stx-halo, gfx1150 — tests gfx1151 artifact on halo) +# ════════════════════════════════════════════════════════════════════════════════ + test-rocm-windows: + runs-on: [self-hosted, Windows, stx-halo] + needs: [determine-tag, windows-rocm] + if: needs.windows-rocm.result == 'success' + continue-on-error: true + steps: + - uses: actions/checkout@v4 + + - name: Download ROCm artifact (gfx1151 - closest to stx-halo gfx1150) + uses: actions/download-artifact@v4 + with: + name: whisper-${{ needs.determine-tag.outputs.version }}-windows-rocm-gfx1151.zip + path: ./artifact + + - name: Extract + shell: pwsh + run: | + New-Item -ItemType Directory -Force -Path bin | Out-Null + Expand-Archive -Path (Get-ChildItem artifact -Filter "*.zip" | Select-Object -First 1).FullName -DestinationPath bin -Force + + - name: Download tiny model + shell: pwsh + run: | + New-Item -ItemType Directory -Force -Path models | Out-Null + Invoke-WebRequest -Uri "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -OutFile "models\ggml-tiny.bin" + + - name: Run and verify + shell: pwsh + run: | + .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result + $text = Get-Content "jfk-result.txt" -Raw + Write-Host $text + if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS" -ForegroundColor Green } + else { Write-Error "FAIL: expected words not found"; exit 1 } + +# ════════════════════════════════════════════════════════════════════════════════ +# 14. Test — ROCm Linux (stx-halo Linux, gfx1151) +# ════════════════════════════════════════════════════════════════════════════════ + test-rocm-linux: + runs-on: [self-hosted, Linux, stx-halo] + needs: [determine-tag, linux-rocm] + if: needs.linux-rocm.result == 'success' + continue-on-error: true + steps: + - uses: actions/checkout@v4 + + - name: Download ROCm artifact (gfx1151) + uses: actions/download-artifact@v4 + with: + name: whisper-${{ needs.determine-tag.outputs.version }}-linux-rocm-gfx1151.tar.gz + path: ./artifact + + - name: Extract + run: | + mkdir -p bin + tar -xzf artifact/*.tar.gz --strip-components=1 -C bin + chmod +x bin/whisper-cli + + - name: Download tiny model + run: | + mkdir -p models + curl -L -o models/ggml-tiny.bin "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" + + - name: Run and verify + run: | + LD_LIBRARY_PATH=$(pwd)/bin:$LD_LIBRARY_PATH ./bin/whisper-cli -m models/ggml-tiny.bin -f samples/jfk.wav -otxt -of jfk-result + cat jfk-result.txt + grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt && echo "PASS" || { echo "FAIL"; exit 1; } + +# ════════════════════════════════════════════════════════════════════════════════ +# 15. Test — NPU Windows (rai300_400 runner, needs FlexML + .rai model) +# ════════════════════════════════════════════════════════════════════════════════ + test-npu-windows: + runs-on: [self-hosted, Windows, rai300_400] + needs: [determine-tag, windows-npu] + if: needs.windows-npu.result == 'success' + continue-on-error: true + steps: + - uses: actions/checkout@v4 + + - name: Download NPU artifact + uses: actions/download-artifact@v4 + with: + name: whisper-${{ needs.determine-tag.outputs.version }}-windows-npu-x64.zip + path: ./artifact + + - name: Extract + shell: pwsh + run: | + New-Item -ItemType Directory -Force -Path bin | Out-Null + Expand-Archive -Path (Get-ChildItem artifact -Filter "*.zip" | Select-Object -First 1).FullName -DestinationPath bin -Force + + - name: Download FlexML Runtime and setup environment + shell: pwsh + run: | + Invoke-WebRequest -Uri "${{ env.FLEXML_URL }}" -OutFile flexmlrt.zip + tar xvf flexmlrt.zip + Remove-Item flexmlrt.zip + + - name: Setup FlexML environment + shell: cmd + run: | + cd flexmlrt + call setup.bat + if errorlevel 1 ( echo ERROR: FlexML setup failed! & exit /b 1 ) + + - name: Copy FlexML DLLs to bin + shell: pwsh + run: | + $flexml = Get-ChildItem -Directory | Where-Object { $_.Name -like "flexmlrt*" } | Select-Object -First 1 + foreach ($sub in @("bin","lib")) { + $path = Join-Path $flexml.FullName $sub + if (Test-Path $path) { + Get-ChildItem "$path\*.dll" -ErrorAction SilentlyContinue | + ForEach-Object { Copy-Item $_.FullName "bin\" -Force } + } + } + + - name: Download models (ggml weights + .rai NPU encoder) + shell: pwsh + run: | + New-Item -ItemType Directory -Force -Path models | Out-Null + # GGML weights for decoder (runs on CPU) + Invoke-WebRequest -Uri "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -OutFile "models\ggml-tiny.bin" + # Pre-compiled NPU encoder -- must be named ggml-tiny-encoder-vitisai.rai + # alongside ggml-tiny.bin so whisper-cli auto-detects it + Invoke-WebRequest -Uri "https://huggingface.co/amd/whisper-tiny-onnx-npu/resolve/main/ggml-tiny-encoder-vitisai.rai" -OutFile "models\ggml-tiny-encoder-vitisai.rai" + Write-Host "Models:" + Get-ChildItem models | Format-Table Name, Length + + - name: Run and verify (NPU encoder + CPU decoder) + shell: pwsh + run: | + .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result + $text = Get-Content "jfk-result.txt" -Raw + Write-Host $text + # Also check stdout for the VitisAI inference confirmation line + if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS: transcription correct" -ForegroundColor Green } + else { Write-Error "FAIL: expected words not found"; exit 1 } + +# ════════════════════════════════════════════════════════════════════════════════ +# 16. Publish GitHub Release (only after all tests pass or are skipped) # ════════════════════════════════════════════════════════════════════════════════ release: - if: always() && needs.determine-tag.outputs.should_release == 'true' + if: | + always() && + needs.determine-tag.outputs.should_release == 'true' && + (needs.test-cpu-windows.result == 'success' || needs.test-cpu-windows.result == 'skipped') && + (needs.test-cpu-linux.result == 'success' || needs.test-cpu-linux.result == 'skipped') && + (needs.test-vulkan-windows.result == 'success' || needs.test-vulkan-windows.result == 'skipped') && + (needs.test-vulkan-linux.result == 'success' || needs.test-vulkan-linux.result == 'skipped') && + (needs.test-rocm-windows.result == 'success' || needs.test-rocm-windows.result == 'skipped') && + (needs.test-rocm-linux.result == 'success' || needs.test-rocm-linux.result == 'skipped') && + (needs.test-npu-windows.result == 'success' || needs.test-npu-windows.result == 'skipped') runs-on: ubuntu-latest needs: - determine-tag @@ -685,6 +982,13 @@ jobs: - windows-npu - linux-cpu - windows-cpu + - test-cpu-windows + - test-cpu-linux + - test-vulkan-windows + - test-vulkan-linux + - test-rocm-windows + - test-rocm-linux + - test-npu-windows steps: - uses: actions/checkout@v4 From 642fc7cc3d12f24d35b7b1a792f930e2ba86c343 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 9 Jun 2026 19:31:15 -0700 Subject: [PATCH 29/55] workflow: updated with specific tests --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 59e7d42b330..f5cb2cf4720 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -893,6 +893,7 @@ jobs: test-npu-windows: runs-on: [self-hosted, Windows, rai300_400] needs: [determine-tag, windows-npu] + if: needs.windows-npu.result == 'success' continue-on-error: true steps: From be38480d7b5c3f38472fee30265958efeb4d5097 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 9 Jun 2026 19:40:02 -0700 Subject: [PATCH 30/55] workflow: updated with specific tests --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f5cb2cf4720..25a8c049c47 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -471,7 +471,7 @@ jobs: # 6. NPU (VitisAI / RyzenAI) — Windows only (self-hosted runner) # ════════════════════════════════════════════════════════════════════════════════ windows-npu: - runs-on: [self-hosted, Windows, rai300_400] + runs-on: [self-hosted, Windows, stx, rai300_400] needs: determine-tag continue-on-error: true # runner may be offline; don't block release @@ -891,7 +891,7 @@ jobs: # 15. Test — NPU Windows (rai300_400 runner, needs FlexML + .rai model) # ════════════════════════════════════════════════════════════════════════════════ test-npu-windows: - runs-on: [self-hosted, Windows, rai300_400] + runs-on: [self-hosted, Windows, stx, rai300_400] needs: [determine-tag, windows-npu] if: needs.windows-npu.result == 'success' From 4319677ca7581c1b7a45be9af159127418a830ae Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Tue, 9 Jun 2026 19:59:40 -0700 Subject: [PATCH 31/55] workflow: updated with specific tests --- .github/workflows/build.yml | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 25a8c049c47..445ca481e58 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -906,13 +906,13 @@ jobs: path: ./artifact - name: Extract - shell: pwsh + shell: powershell run: | New-Item -ItemType Directory -Force -Path bin | Out-Null Expand-Archive -Path (Get-ChildItem artifact -Filter "*.zip" | Select-Object -First 1).FullName -DestinationPath bin -Force - name: Download FlexML Runtime and setup environment - shell: pwsh + shell: powershell run: | Invoke-WebRequest -Uri "${{ env.FLEXML_URL }}" -OutFile flexmlrt.zip tar xvf flexmlrt.zip @@ -926,7 +926,7 @@ jobs: if errorlevel 1 ( echo ERROR: FlexML setup failed! & exit /b 1 ) - name: Copy FlexML DLLs to bin - shell: pwsh + shell: powershell run: | $flexml = Get-ChildItem -Directory | Where-Object { $_.Name -like "flexmlrt*" } | Select-Object -First 1 foreach ($sub in @("bin","lib")) { @@ -938,24 +938,20 @@ jobs: } - name: Download models (ggml weights + .rai NPU encoder) - shell: pwsh + shell: powershell run: | New-Item -ItemType Directory -Force -Path models | Out-Null - # GGML weights for decoder (runs on CPU) Invoke-WebRequest -Uri "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -OutFile "models\ggml-tiny.bin" - # Pre-compiled NPU encoder -- must be named ggml-tiny-encoder-vitisai.rai - # alongside ggml-tiny.bin so whisper-cli auto-detects it Invoke-WebRequest -Uri "https://huggingface.co/amd/whisper-tiny-onnx-npu/resolve/main/ggml-tiny-encoder-vitisai.rai" -OutFile "models\ggml-tiny-encoder-vitisai.rai" Write-Host "Models:" Get-ChildItem models | Format-Table Name, Length - name: Run and verify (NPU encoder + CPU decoder) - shell: pwsh + shell: powershell run: | .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result $text = Get-Content "jfk-result.txt" -Raw Write-Host $text - # Also check stdout for the VitisAI inference confirmation line if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS: transcription correct" -ForegroundColor Green } else { Write-Error "FAIL: expected words not found"; exit 1 } From 906009aee0c260b2574ec419f2bf7cd27c09c801 Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Tue, 9 Jun 2026 20:47:30 -0700 Subject: [PATCH 32/55] temp runner --- .github/workflows/build.yml | 76 +++++++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 25 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 445ca481e58..8934808387f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -141,6 +141,7 @@ jobs: # 2. ROCm — Linux # ════════════════════════════════════════════════════════════════════════════════ linux-rocm: + if: false # temporarily disabled — windows-rocm gfx1151 only runs-on: ubuntu-22.04 needs: [determine-tag, prepare-rocm-matrix] strategy: @@ -203,7 +204,7 @@ jobs: mkdir -p "$BIN" [ -d "$LIB/rocblas/library" ] && { mkdir -p "$BIN/rocblas"; cp -r "$LIB/rocblas/library" "$BIN/rocblas/"; } [ -d "$LIB/hipblaslt/library" ] && { mkdir -p "$BIN/hipblaslt"; cp -r "$LIB/hipblaslt/library" "$BIN/hipblaslt/"; } - for so in libhipblas librocblas libamdhip64 librocsolver libroctx64 libhipblaslt libamd_comgr libhsa-runtime64; do + for so in libhipblas librocblas libamdhip64 librocsolver libroctx64 libhipblaslt libamd_comgr libamd_comgr_loader libhsa-runtime64; do cp -v $LIB/${so}.so* "$BIN/" 2>/dev/null || true done cp -v $LIB/llvm/lib/libLLVM.so* "$BIN/" 2>/dev/null || true @@ -212,11 +213,18 @@ jobs: - name: Bundle linked libraries run: | - ldd build/bin/whisper-cli | grep "=> /" | while read -r line; do - soname=$(echo "$line" | awk '{print $1}') - path=$(echo "$line" | awk '{print $3}') - [[ "$soname" =~ ^(libc|libm|libdl|librt|libpthread|libstdc\+\+|libgcc_s|ld-linux) ]] && continue - cp -L "$path" "build/bin/$soname" 2>/dev/null || true + BIN="build/bin" + SKIP="^(libc|libm|libdl|librt|libpthread|libstdc\+\+|libgcc_s|ld-linux)" + # scan whisper-cli and all already-bundled .so files for missing deps + for target in "$BIN/whisper-cli" "$BIN"/*.so*; do + [ -f "$target" ] && [ ! -L "$target" ] || continue + ldd "$target" 2>/dev/null | grep "=> /" | while read -r line; do + soname=$(echo "$line" | awk '{print $1}') + path=$(echo "$line" | awk '{print $3}') + [[ "$soname" =~ $SKIP ]] && continue + [ -f "$BIN/$soname" ] && continue + cp -L "$path" "$BIN/$soname" 2>/dev/null || true + done done - name: Set portable RPATH @@ -247,7 +255,13 @@ jobs: runs-on: windows-latest needs: [determine-tag, prepare-rocm-matrix] strategy: - matrix: ${{ fromJson(needs.prepare-rocm-matrix.outputs.windows_matrix) }} + matrix: + gfx_target: [gfx1151] # temporarily pinned — full matrix restored once CI passes + build: [Release] + sdl2: [ON] + arch: [x64] + s2arc: [x64] + s2ver: ["2.28.5"] fail-fast: false steps: @@ -356,6 +370,7 @@ jobs: # 4. Vulkan — Linux # ════════════════════════════════════════════════════════════════════════════════ linux-vulkan: + if: false # temporarily disabled — windows-rocm gfx1151 only runs-on: ubuntu-latest needs: determine-tag @@ -415,6 +430,7 @@ jobs: # 5. Vulkan — Windows # ════════════════════════════════════════════════════════════════════════════════ windows-vulkan: + if: false # temporarily disabled — focus CI on ROCm + linux-vulkan runs-on: windows-latest needs: determine-tag @@ -471,6 +487,7 @@ jobs: # 6. NPU (VitisAI / RyzenAI) — Windows only (self-hosted runner) # ════════════════════════════════════════════════════════════════════════════════ windows-npu: + if: false # temporarily disabled — focus CI on ROCm + linux-vulkan runs-on: [self-hosted, Windows, stx, rai300_400] needs: determine-tag continue-on-error: true # runner may be offline; don't block release @@ -570,6 +587,7 @@ jobs: # 7. CPU — Linux # ════════════════════════════════════════════════════════════════════════════════ linux-cpu: + if: false # temporarily disabled — focus CI on ROCm + linux-vulkan runs-on: ubuntu-latest needs: determine-tag @@ -587,6 +605,10 @@ jobs: run: | cmake -B build \ -DCMAKE_BUILD_TYPE=Release \ + -DGGML_NATIVE=OFF \ + -DGGML_AVX=ON \ + -DGGML_AVX2=ON \ + -DGGML_FMA=ON \ -DWHISPER_BUILD_EXAMPLES=ON \ -DWHISPER_BUILD_TESTS=OFF \ -DWHISPER_BUILD_SERVER=ON @@ -621,6 +643,7 @@ jobs: # 8. CPU — Windows # ════════════════════════════════════════════════════════════════════════════════ windows-cpu: + if: false # temporarily disabled — focus CI on ROCm + linux-vulkan runs-on: windows-latest needs: determine-tag @@ -641,6 +664,10 @@ jobs: run: | cmake -S . -B ./build -A x64 ` -DCMAKE_BUILD_TYPE=Release ` + -DGGML_NATIVE=OFF ` + -DGGML_AVX=ON ` + -DGGML_AVX2=ON ` + -DGGML_FMA=ON ` -DBUILD_SHARED_LIBS=ON ` -DWHISPER_SDL2=ON @@ -675,9 +702,9 @@ jobs: # 9. Test — CPU Windows (GitHub-hosted, no GPU needed) # ════════════════════════════════════════════════════════════════════════════════ test-cpu-windows: + if: false # temporarily disabled runs-on: windows-latest needs: [determine-tag, windows-cpu] - if: needs.windows-cpu.result == 'success' continue-on-error: true steps: - uses: actions/checkout@v4 @@ -713,9 +740,9 @@ jobs: # 10. Test — CPU Linux (GitHub-hosted, no GPU needed) # ════════════════════════════════════════════════════════════════════════════════ test-cpu-linux: + if: false # temporarily disabled runs-on: ubuntu-latest needs: [determine-tag, linux-cpu] - if: needs.linux-cpu.result == 'success' continue-on-error: true steps: - uses: actions/checkout@v4 @@ -747,9 +774,9 @@ jobs: # 11. Test — Vulkan Windows (stx-halo, has Vulkan driver) # ════════════════════════════════════════════════════════════════════════════════ test-vulkan-windows: + if: false # temporarily disabled runs-on: [self-hosted, Windows, stx-halo] needs: [determine-tag, windows-vulkan] - if: needs.windows-vulkan.result == 'success' continue-on-error: true steps: - uses: actions/checkout@v4 @@ -761,33 +788,33 @@ jobs: path: ./artifact - name: Extract - shell: pwsh + shell: powershell run: | New-Item -ItemType Directory -Force -Path bin | Out-Null Expand-Archive -Path (Get-ChildItem artifact -Filter "*.zip" | Select-Object -First 1).FullName -DestinationPath bin -Force - name: Download tiny model - shell: pwsh + shell: powershell run: | New-Item -ItemType Directory -Force -Path models | Out-Null Invoke-WebRequest -Uri "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -OutFile "models\ggml-tiny.bin" - name: Run and verify - shell: pwsh + shell: powershell run: | .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result $text = Get-Content "jfk-result.txt" -Raw Write-Host $text - if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS" -ForegroundColor Green } + if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS" } else { Write-Error "FAIL: expected words not found"; exit 1 } # ════════════════════════════════════════════════════════════════════════════════ # 12. Test — Vulkan Linux (stx-halo, has Vulkan driver) # ════════════════════════════════════════════════════════════════════════════════ test-vulkan-linux: + if: false # temporarily disabled runs-on: [self-hosted, Linux, stx-halo] needs: [determine-tag, linux-vulkan] - if: needs.linux-vulkan.result == 'success' continue-on-error: true steps: - uses: actions/checkout@v4 @@ -816,7 +843,7 @@ jobs: grep -qi "country\|ask not\|nation\|kennedy" jfk-result.txt && echo "PASS" || { echo "FAIL"; exit 1; } # ════════════════════════════════════════════════════════════════════════════════ -# 13. Test — ROCm Windows (stx-halo, gfx1150 — tests gfx1151 artifact on halo) +# 13. Test — ROCm Windows (stx-halo, gfx1151) # ════════════════════════════════════════════════════════════════════════════════ test-rocm-windows: runs-on: [self-hosted, Windows, stx-halo] @@ -826,40 +853,40 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Download ROCm artifact (gfx1151 - closest to stx-halo gfx1150) + - name: Download ROCm artifact (gfx1151 — stx-halo GPU target) uses: actions/download-artifact@v4 with: name: whisper-${{ needs.determine-tag.outputs.version }}-windows-rocm-gfx1151.zip path: ./artifact - name: Extract - shell: pwsh + shell: powershell run: | New-Item -ItemType Directory -Force -Path bin | Out-Null Expand-Archive -Path (Get-ChildItem artifact -Filter "*.zip" | Select-Object -First 1).FullName -DestinationPath bin -Force - name: Download tiny model - shell: pwsh + shell: powershell run: | New-Item -ItemType Directory -Force -Path models | Out-Null Invoke-WebRequest -Uri "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" -OutFile "models\ggml-tiny.bin" - name: Run and verify - shell: pwsh + shell: powershell run: | .\bin\whisper-cli.exe -m models\ggml-tiny.bin -f samples\jfk.wav -otxt -of jfk-result $text = Get-Content "jfk-result.txt" -Raw Write-Host $text - if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS" -ForegroundColor Green } + if ($text -match "country|ask not|nation|kennedy") { Write-Host "PASS" } else { Write-Error "FAIL: expected words not found"; exit 1 } # ════════════════════════════════════════════════════════════════════════════════ -# 14. Test — ROCm Linux (stx-halo Linux, gfx1151) +# 14. Test — ROCm Linux (stx-halo, gfx1151) # ════════════════════════════════════════════════════════════════════════════════ test-rocm-linux: + if: false # temporarily disabled runs-on: [self-hosted, Linux, stx-halo] needs: [determine-tag, linux-rocm] - if: needs.linux-rocm.result == 'success' continue-on-error: true steps: - uses: actions/checkout@v4 @@ -891,10 +918,9 @@ jobs: # 15. Test — NPU Windows (rai300_400 runner, needs FlexML + .rai model) # ════════════════════════════════════════════════════════════════════════════════ test-npu-windows: + if: false # temporarily disabled runs-on: [self-hosted, Windows, stx, rai300_400] needs: [determine-tag, windows-npu] - - if: needs.windows-npu.result == 'success' continue-on-error: true steps: - uses: actions/checkout@v4 From 3a2c22f940dff7d782690353ea387e9ba913bc82 Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Tue, 9 Jun 2026 21:21:31 -0700 Subject: [PATCH 33/55] temp runner --- .github/workflows/build.yml | 107 +++++++++++++++++++++++++++--------- 1 file changed, 80 insertions(+), 27 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8934808387f..44571b44a4c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -141,11 +141,14 @@ jobs: # 2. ROCm — Linux # ════════════════════════════════════════════════════════════════════════════════ linux-rocm: - if: false # temporarily disabled — windows-rocm gfx1151 only runs-on: ubuntu-22.04 needs: [determine-tag, prepare-rocm-matrix] strategy: - matrix: ${{ fromJson(needs.prepare-rocm-matrix.outputs.ubuntu_matrix) }} + matrix: + gfx_target: [gfx1151] # temporarily pinned — full matrix restored once CI passes + build: [Release] + sdl2: [ON] + arch: [linux/amd64] fail-fast: false steps: @@ -195,7 +198,26 @@ jobs: -DWHISPER_SDL2=${{ matrix.sdl2 }} - name: Build - run: cmake --build build --config ${{ matrix.build }} -j$(nproc) + run: | + cmake --build build --config ${{ matrix.build }} -j$(nproc) > build.log 2>&1 + exit_code=$? + grep -E "error:|FAILED|Linking|Built target|warning:" build.log || true + if [ $exit_code -ne 0 ]; then + echo "--- Last 100 lines of build log ---" + tail -100 build.log + echo "Build failed with exit code $exit_code" + exit $exit_code + fi + echo "Build succeeded." + + - name: Verify build output + run: | + if [ ! -f build/bin/whisper-cli ]; then + echo "::error::whisper-cli not found — build likely truncated" + ls -lh build/bin/ 2>/dev/null || true + exit 1 + fi + echo "Build output:"; ls -lh build/bin/whisper-cli - name: Copy ROCm runtime libs run: | @@ -268,11 +290,12 @@ jobs: - uses: actions/checkout@v4 - name: Install Ninja - run: choco install ninja + shell: powershell + run: choco install ninja -y - name: Fetch SDL2 and patch header if: matrix.sdl2 == 'ON' - shell: pwsh + shell: powershell run: | $url = "https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip" Invoke-WebRequest -Uri $url -OutFile sdl2.zip @@ -289,28 +312,36 @@ jobs: } } else { Write-Error "SDL_endian.h not found"; exit 1 } - - name: Download ROCm tarball - shell: bash - run: | - source ci/resolve-rocm-version.sh windows "${{ matrix.gfx_target }}" "${{ env.ROCM_VERSION }}" - echo "DETECTED_ROCM_VERSION=$ROCM_RESOLVED_VERSION" >> $GITHUB_ENV - curl -sL "$ROCM_TARBALL_URL" -o rocm.tar.gz - - - name: Extract ROCm - shell: pwsh + - name: Download and extract ROCm tarball + shell: powershell run: | - New-Item -ItemType Directory -Force -Path "C:\opt\rocm" + $gfx = "${{ matrix.gfx_target }}" + $ver = "${{ env.ROCM_VERSION }}" + # resolve base target (group targets always use gfx1151 tarball) + $base = if ($gfx -in @("gfx110X","gfx120X","gfx1150","gfx1100")) { "gfx1151" } else { $gfx } + $url = "https://repo.amd.com/rocm/tarball/therock-dist-windows-${base}-${ver}.tar.gz" + Write-Host "ROCm version: $ver" + Write-Host "ROCm URL: $url" + "DETECTED_ROCM_VERSION=$ver" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 + Invoke-WebRequest -Uri $url -OutFile rocm.tar.gz + New-Item -ItemType Directory -Force -Path "C:\opt\rocm" | Out-Null tar -xzf rocm.tar.gz -C C:\opt\rocm --strip-components=1 - name: Map GPU target id: gpu - shell: bash + shell: powershell run: | - source ci/map-gpu-target.sh "${{ matrix.gfx_target }}" - echo "mapped=$MAPPED_GPU_TARGET" >> $GITHUB_OUTPUT + $gfx = "${{ matrix.gfx_target }}" + $mapped = switch ($gfx) { + "gfx110X" { "gfx1100;gfx1101;gfx1102" } + "gfx120X" { "gfx1200;gfx1201" } + default { $gfx } + } + Write-Host "Mapped GPU target: $gfx -> $mapped" + "mapped=$mapped" | Out-File $env:GITHUB_OUTPUT -Append -Encoding utf8 - name: Configure CMake - shell: pwsh + shell: powershell run: | $env:HIP_PATH = "C:\opt\rocm" $env:HIP_PLATFORM = "amd" @@ -331,11 +362,22 @@ jobs: -DWHISPER_SDL2=${{ matrix.sdl2 }} - name: Build - shell: pwsh - run: cmake --build build --config ${{ matrix.build }} -j $env:NUMBER_OF_PROCESSORS + shell: powershell + run: | + cmake --build build --config ${{ matrix.build }} -j $env:NUMBER_OF_PROCESSORS > build.log 2>&1 + $exit = $LASTEXITCODE + # Show only errors and link steps — keeps log under GitHub's line limit + Get-Content build.log | Select-String -Pattern "error:|FAILED|Linking|Built target|warning: " | Write-Host + if ($exit -ne 0) { + Write-Host "--- Last 100 lines of build log ---" + Get-Content build.log -Tail 100 + Write-Error "Build failed with exit code $exit" + exit $exit + } + Write-Host "Build succeeded." - name: Copy ROCm DLLs - shell: pwsh + shell: powershell run: | $bin = "build/bin/${{ matrix.build }}" $rocBin = "C:\opt\rocm\bin" @@ -351,11 +393,23 @@ jobs: - name: Copy SDL2.dll if: matrix.sdl2 == 'ON' - shell: pwsh + shell: powershell run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" "build/bin/${{ matrix.build }}" + - name: Verify build output + shell: powershell + run: | + $bin = "build/bin/${{ matrix.build }}" + if (-not (Test-Path "$bin/whisper-cli.exe")) { + Write-Error "whisper-cli.exe not found in $bin — build likely truncated" + Get-ChildItem $bin -ErrorAction SilentlyContinue | Format-Table Name, Length + exit 1 + } + Write-Host "Build output:" + Get-ChildItem $bin -Filter "*.exe" | Format-Table Name, Length + - name: Package - shell: pwsh + shell: powershell run: | $a = "whisper-${{ needs.determine-tag.outputs.version }}-windows-rocm-${{ matrix.gfx_target }}.zip" Compress-Archive -Path "build/bin/${{ matrix.build }}/*" -DestinationPath $a -Force @@ -370,7 +424,6 @@ jobs: # 4. Vulkan — Linux # ════════════════════════════════════════════════════════════════════════════════ linux-vulkan: - if: false # temporarily disabled — windows-rocm gfx1151 only runs-on: ubuntu-latest needs: determine-tag @@ -812,9 +865,9 @@ jobs: # 12. Test — Vulkan Linux (stx-halo, has Vulkan driver) # ════════════════════════════════════════════════════════════════════════════════ test-vulkan-linux: - if: false # temporarily disabled runs-on: [self-hosted, Linux, stx-halo] needs: [determine-tag, linux-vulkan] + if: needs.linux-vulkan.result == 'success' continue-on-error: true steps: - uses: actions/checkout@v4 @@ -884,9 +937,9 @@ jobs: # 14. Test — ROCm Linux (stx-halo, gfx1151) # ════════════════════════════════════════════════════════════════════════════════ test-rocm-linux: - if: false # temporarily disabled runs-on: [self-hosted, Linux, stx-halo] needs: [determine-tag, linux-rocm] + if: needs.linux-rocm.result == 'success' continue-on-error: true steps: - uses: actions/checkout@v4 From e831c8418736a1f5bdc6a2f66b0e6bf5e9ce78f0 Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Tue, 9 Jun 2026 22:40:40 -0700 Subject: [PATCH 34/55] temp runner --- .github/workflows/build.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 44571b44a4c..91efdfec610 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -168,7 +168,7 @@ jobs: source ci/resolve-rocm-version.sh linux "${{ matrix.gfx_target }}" "${{ env.ROCM_VERSION }}" echo "DETECTED_ROCM_VERSION=$ROCM_RESOLVED_VERSION" >> $GITHUB_ENV sudo mkdir -p /opt/rocm - curl -sL "$ROCM_TARBALL_URL" | sudo tar --use-compress-program=gzip -xf - -C /opt/rocm --strip-components=1 + curl -L "$ROCM_TARBALL_URL" | sudo tar --use-compress-program=gzip -xf - -C /opt/rocm --strip-components=1 - name: Set ROCm env run: | @@ -366,7 +366,7 @@ jobs: run: | cmake --build build --config ${{ matrix.build }} -j $env:NUMBER_OF_PROCESSORS > build.log 2>&1 $exit = $LASTEXITCODE - # Show only errors and link steps — keeps log under GitHub's line limit + # Show only errors and link steps - keeps log under GitHub's line limit Get-Content build.log | Select-String -Pattern "error:|FAILED|Linking|Built target|warning: " | Write-Host if ($exit -ne 0) { Write-Host "--- Last 100 lines of build log ---" @@ -401,7 +401,7 @@ jobs: run: | $bin = "build/bin/${{ matrix.build }}" if (-not (Test-Path "$bin/whisper-cli.exe")) { - Write-Error "whisper-cli.exe not found in $bin — build likely truncated" + Write-Error "whisper-cli.exe not found in $bin - build likely truncated" Get-ChildItem $bin -ErrorAction SilentlyContinue | Format-Table Name, Length exit 1 } @@ -906,7 +906,7 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Download ROCm artifact (gfx1151 — stx-halo GPU target) + - name: Download ROCm artifact (gfx1151 - stx-halo GPU target) uses: actions/download-artifact@v4 with: name: whisper-${{ needs.determine-tag.outputs.version }}-windows-rocm-gfx1151.zip @@ -1090,7 +1090,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: tag_name: ${{ needs.determine-tag.outputs.tag_name }} - release_name: "whisper.cpp ${{ needs.determine-tag.outputs.tag_name }} — AMD Builds" + release_name: "whisper.cpp ${{ needs.determine-tag.outputs.tag_name }} - AMD Builds" prerelease: ${{ github.event.inputs.pre_release_tag != '' }} draft: false From dcf64a38eedc1455bd84debebd67483a8586a720 Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Tue, 9 Jun 2026 23:55:37 -0700 Subject: [PATCH 35/55] ci: disable sync update --- .github/workflows/build.yml | 40 +++++++++++-------------------------- .github/workflows/sync.yml | 35 ++++++++++++++++---------------- ci/resolve-rocm-version.sh | 26 ++++++++++++++---------- 3 files changed, 46 insertions(+), 55 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 91efdfec610..8d62d0de485 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -141,14 +141,10 @@ jobs: # 2. ROCm — Linux # ════════════════════════════════════════════════════════════════════════════════ linux-rocm: - runs-on: ubuntu-22.04 + runs-on: [self-hosted, Linux, stx-halo] needs: [determine-tag, prepare-rocm-matrix] strategy: - matrix: - gfx_target: [gfx1151] # temporarily pinned — full matrix restored once CI passes - build: [Release] - sdl2: [ON] - arch: [linux/amd64] + matrix: ${{ fromJson(needs.prepare-rocm-matrix.outputs.ubuntu_matrix) }} fail-fast: false steps: @@ -274,16 +270,10 @@ jobs: # 3. ROCm — Windows # ════════════════════════════════════════════════════════════════════════════════ windows-rocm: - runs-on: windows-latest + runs-on: [self-hosted, Windows, stx-halo] needs: [determine-tag, prepare-rocm-matrix] strategy: - matrix: - gfx_target: [gfx1151] # temporarily pinned — full matrix restored once CI passes - build: [Release] - sdl2: [ON] - arch: [x64] - s2arc: [x64] - s2ver: ["2.28.5"] + matrix: ${{ fromJson(needs.prepare-rocm-matrix.outputs.windows_matrix) }} fail-fast: false steps: @@ -315,12 +305,10 @@ jobs: - name: Download and extract ROCm tarball shell: powershell run: | - $gfx = "${{ matrix.gfx_target }}" - $ver = "${{ env.ROCM_VERSION }}" - # resolve base target (group targets always use gfx1151 tarball) - $base = if ($gfx -in @("gfx110X","gfx120X","gfx1150","gfx1100")) { "gfx1151" } else { $gfx } - $url = "https://repo.amd.com/rocm/tarball/therock-dist-windows-${base}-${ver}.tar.gz" - Write-Host "ROCm version: $ver" + $gfx = "${{ matrix.gfx_target }}" + $ver = "${{ env.ROCM_VERSION }}" + $base = $gfx + $url = "https://repo.amd.com/rocm/tarball/therock-dist-windows-${base}-${ver}.tar.gz" Write-Host "ROCm URL: $url" "DETECTED_ROCM_VERSION=$ver" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 Invoke-WebRequest -Uri $url -OutFile rocm.tar.gz @@ -483,7 +471,6 @@ jobs: # 5. Vulkan — Windows # ════════════════════════════════════════════════════════════════════════════════ windows-vulkan: - if: false # temporarily disabled — focus CI on ROCm + linux-vulkan runs-on: windows-latest needs: determine-tag @@ -540,7 +527,6 @@ jobs: # 6. NPU (VitisAI / RyzenAI) — Windows only (self-hosted runner) # ════════════════════════════════════════════════════════════════════════════════ windows-npu: - if: false # temporarily disabled — focus CI on ROCm + linux-vulkan runs-on: [self-hosted, Windows, stx, rai300_400] needs: determine-tag continue-on-error: true # runner may be offline; don't block release @@ -640,7 +626,6 @@ jobs: # 7. CPU — Linux # ════════════════════════════════════════════════════════════════════════════════ linux-cpu: - if: false # temporarily disabled — focus CI on ROCm + linux-vulkan runs-on: ubuntu-latest needs: determine-tag @@ -696,7 +681,6 @@ jobs: # 8. CPU — Windows # ════════════════════════════════════════════════════════════════════════════════ windows-cpu: - if: false # temporarily disabled — focus CI on ROCm + linux-vulkan runs-on: windows-latest needs: determine-tag @@ -755,9 +739,9 @@ jobs: # 9. Test — CPU Windows (GitHub-hosted, no GPU needed) # ════════════════════════════════════════════════════════════════════════════════ test-cpu-windows: - if: false # temporarily disabled runs-on: windows-latest needs: [determine-tag, windows-cpu] + if: needs.windows-cpu.result == 'success' continue-on-error: true steps: - uses: actions/checkout@v4 @@ -793,9 +777,9 @@ jobs: # 10. Test — CPU Linux (GitHub-hosted, no GPU needed) # ════════════════════════════════════════════════════════════════════════════════ test-cpu-linux: - if: false # temporarily disabled runs-on: ubuntu-latest needs: [determine-tag, linux-cpu] + if: needs.linux-cpu.result == 'success' continue-on-error: true steps: - uses: actions/checkout@v4 @@ -827,9 +811,9 @@ jobs: # 11. Test — Vulkan Windows (stx-halo, has Vulkan driver) # ════════════════════════════════════════════════════════════════════════════════ test-vulkan-windows: - if: false # temporarily disabled runs-on: [self-hosted, Windows, stx-halo] needs: [determine-tag, windows-vulkan] + if: needs.windows-vulkan.result == 'success' continue-on-error: true steps: - uses: actions/checkout@v4 @@ -971,9 +955,9 @@ jobs: # 15. Test — NPU Windows (rai300_400 runner, needs FlexML + .rai model) # ════════════════════════════════════════════════════════════════════════════════ test-npu-windows: - if: false # temporarily disabled runs-on: [self-hosted, Windows, stx, rai300_400] needs: [determine-tag, windows-npu] + if: needs.windows-npu.result == 'success' continue-on-error: true steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/sync.yml b/.github/workflows/sync.yml index b4d1411518a..6867bf5fe35 100644 --- a/.github/workflows/sync.yml +++ b/.github/workflows/sync.yml @@ -6,8 +6,10 @@ name: Sync Upstream & Auto-Release # - conflict → opens a PR for manual resolution, does NOT tag on: - schedule: - - cron: '0 6 * * *' # daily at 06:00 UTC + # schedule disabled until v1.8.4 release is confirmed good + # re-enable by uncommenting the schedule block below + # schedule: + # - cron: '0 6 * * *' # daily at 06:00 UTC workflow_dispatch: inputs: upstream_tag: @@ -106,21 +108,20 @@ jobs: git push origin "$BRANCH" TAG="${{ steps.upstream.outputs.tag }}" - cat > /tmp/pr-body.md << EOF -## Upstream sync: ${TAG} - -Conflicts were detected during automatic merge. Files affected: - - ${CONFLICT_FILES} - -This PR was auto-resolved using upstream (theirs) as a baseline. -Please review the diff carefully before merging. - -Once merged, manually create the release tag on main to trigger the build: - - git tag ${TAG} - git push origin ${TAG} -EOF + python3 -c " +import sys +tag = sys.argv[1] +files = sys.argv[2] +body = '## Upstream sync: ' + tag + '\n\n' +body += 'Conflicts were detected during automatic merge. Files affected:\n\n' +body += ' ' + files + '\n\n' +body += 'This PR was auto-resolved using upstream (theirs) as a baseline.\n' +body += 'Please review the diff carefully before merging.\n\n' +body += 'Once merged, manually create the release tag on main to trigger the build:\n\n' +body += ' git tag ' + tag + '\n' +body += ' git push origin ' + tag + '\n' +open('/tmp/pr-body.md', 'w').write(body) +" "$TAG" "$CONFLICT_FILES" gh pr create \ --title "Sync upstream ${TAG} - conflict resolution needed" \ diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index fbfe5e68e1d..996e40c5abf 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -41,18 +41,24 @@ if ! echo "$rocm_version" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then return 1 2>/dev/null || exit 1 fi -# For the AMD tarball distribution, use gfx1151 as the base target -# The tarball contains ROCm tools/libraries for all supported GPUs -# GPU targets are specified during build via GPU_TARGETS CMake variable -# Group targets (gfx110X, gfx120X) should use gfx1151 as the base -base_target="gfx1151" -if [ "$gfx_target" != "gfx110X" ] && [ "$gfx_target" != "gfx120X" ] && [ "$gfx_target" != "gfx1150" ] && [ "$gfx_target" != "gfx1100" ]; then - # Use the specific target if it's an individual target - base_target="$gfx_target" -fi +# Map our GFX target shorthand to the exact tarball name AMD publishes. +# Each GPU family has its own tarball with the right prebuilt kernel libraries. +# Using the wrong tarball (e.g. gfx1151 for gfx110X) gives you gfx1151 rocBLAS/hipBLASLT +# kernels in a gfx1100 package — builds succeed but fail silently on user hardware. +case "$gfx_target" in + gfx110X) tarball_target="gfx110X" ;; # RDNA3 dGPU: RX 7900/7800/7700/7600 + gfx120X) tarball_target="gfx120X" ;; # RDNA4 dGPU: RX 9070/9060 + gfx1150) tarball_target="gfx1150" ;; # RDNA3.5 APU: Strix Point + gfx1151) tarball_target="gfx1151" ;; # RDNA3.5 APU: Strix Halo + gfx1100) tarball_target="gfx1100" ;; # RDNA3 dGPU specific + gfx1101) tarball_target="gfx1101" ;; + gfx1200) tarball_target="gfx1200" ;; + gfx1201) tarball_target="gfx1201" ;; + *) tarball_target="$gfx_target" ;; +esac # Construct the AMD official repo URL -ROCM_TARBALL_URL="https://repo.amd.com/rocm/tarball/therock-dist-${platform}-${base_target}-${rocm_version}.tar.gz" +ROCM_TARBALL_URL="https://repo.amd.com/rocm/tarball/therock-dist-${platform}-${tarball_target}-${rocm_version}.tar.gz" export ROCM_RESOLVED_VERSION="$rocm_version" echo "ROCm version: $ROCM_RESOLVED_VERSION" From 604a357d84f4e0a519b948c331a3e87b8848a446 Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Wed, 10 Jun 2026 00:02:26 -0700 Subject: [PATCH 36/55] ci: fix failure --- .github/workflows/build.yml | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8d62d0de485..4e1abf98e4e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -148,23 +148,25 @@ jobs: fail-fast: false steps: - - name: Free disk space - run: | - sudo rm -rf /usr/local/lib/android /opt/ghc /usr/local/share/boost \ - /usr/share/dotnet /usr/local/.ghcup /opt/hostedtoolcache/CodeQL - sudo docker image prune --all --force 2>/dev/null || true - - uses: actions/checkout@v4 - name: Install dependencies - run: sudo apt update && sudo apt install -y cmake ninja-build curl build-essential libsdl2-dev git patchelf + run: | + sudo apt-get update && sudo apt-get install -y cmake ninja-build curl build-essential libsdl2-dev git patchelf + echo "--- Verifying installs ---" + cmake --version + ninja --version + curl --version | head -1 + gcc --version | head -1 + patchelf --version + git --version - name: Download ROCm tarball run: | source ci/resolve-rocm-version.sh linux "${{ matrix.gfx_target }}" "${{ env.ROCM_VERSION }}" echo "DETECTED_ROCM_VERSION=$ROCM_RESOLVED_VERSION" >> $GITHUB_ENV - sudo mkdir -p /opt/rocm - curl -L "$ROCM_TARBALL_URL" | sudo tar --use-compress-program=gzip -xf - -C /opt/rocm --strip-components=1 + mkdir -p /opt/rocm + curl -L "$ROCM_TARBALL_URL" | tar --use-compress-program=gzip -xf - -C /opt/rocm --strip-components=1 - name: Set ROCm env run: | From ce9af216afe8966619c76bef9b30ae153ff6ddf6 Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Wed, 10 Jun 2026 00:04:44 -0700 Subject: [PATCH 37/55] ci: fix failure --- .github/workflows/build.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4e1abf98e4e..9c5d52ee12c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -152,13 +152,17 @@ jobs: - name: Install dependencies run: | - sudo apt-get update && sudo apt-get install -y cmake ninja-build curl build-essential libsdl2-dev git patchelf + APT="apt-get" + if command -v sudo &>/dev/null && sudo -n apt-get -h &>/dev/null 2>&1; then + APT="sudo apt-get" + fi + $APT update && $APT install -y cmake ninja-build curl build-essential libsdl2-dev git patchelf echo "--- Verifying installs ---" - cmake --version - ninja --version + cmake --version || { echo "::error::cmake not found"; exit 1; } + ninja --version || { echo "::error::ninja not found"; exit 1; } curl --version | head -1 gcc --version | head -1 - patchelf --version + patchelf --version || { echo "::error::patchelf not found"; exit 1; } git --version - name: Download ROCm tarball From 96520a57bc9ca24cd002a64970a33bdf1b8913a4 Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Wed, 10 Jun 2026 00:08:52 -0700 Subject: [PATCH 38/55] ci: revert to github hosted runner --- .github/workflows/build.yml | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9c5d52ee12c..c1f77cd233a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -141,36 +141,30 @@ jobs: # 2. ROCm — Linux # ════════════════════════════════════════════════════════════════════════════════ linux-rocm: - runs-on: [self-hosted, Linux, stx-halo] + runs-on: ubuntu-22.04 needs: [determine-tag, prepare-rocm-matrix] strategy: matrix: ${{ fromJson(needs.prepare-rocm-matrix.outputs.ubuntu_matrix) }} fail-fast: false steps: + - name: Free disk space + run: | + sudo rm -rf /usr/local/lib/android /opt/ghc /usr/local/share/boost \ + /usr/share/dotnet /usr/local/.ghcup /opt/hostedtoolcache/CodeQL + sudo docker image prune --all --force 2>/dev/null || true + - uses: actions/checkout@v4 - name: Install dependencies - run: | - APT="apt-get" - if command -v sudo &>/dev/null && sudo -n apt-get -h &>/dev/null 2>&1; then - APT="sudo apt-get" - fi - $APT update && $APT install -y cmake ninja-build curl build-essential libsdl2-dev git patchelf - echo "--- Verifying installs ---" - cmake --version || { echo "::error::cmake not found"; exit 1; } - ninja --version || { echo "::error::ninja not found"; exit 1; } - curl --version | head -1 - gcc --version | head -1 - patchelf --version || { echo "::error::patchelf not found"; exit 1; } - git --version + run: sudo apt-get update && sudo apt-get install -y cmake ninja-build curl build-essential libsdl2-dev git patchelf - name: Download ROCm tarball run: | source ci/resolve-rocm-version.sh linux "${{ matrix.gfx_target }}" "${{ env.ROCM_VERSION }}" echo "DETECTED_ROCM_VERSION=$ROCM_RESOLVED_VERSION" >> $GITHUB_ENV - mkdir -p /opt/rocm - curl -L "$ROCM_TARBALL_URL" | tar --use-compress-program=gzip -xf - -C /opt/rocm --strip-components=1 + sudo mkdir -p /opt/rocm + curl -L "$ROCM_TARBALL_URL" | sudo tar --use-compress-program=gzip -xf - -C /opt/rocm --strip-components=1 - name: Set ROCm env run: | @@ -276,7 +270,7 @@ jobs: # 3. ROCm — Windows # ════════════════════════════════════════════════════════════════════════════════ windows-rocm: - runs-on: [self-hosted, Windows, stx-halo] + runs-on: windows-latest needs: [determine-tag, prepare-rocm-matrix] strategy: matrix: ${{ fromJson(needs.prepare-rocm-matrix.outputs.windows_matrix) }} From ba8517b8982e9b92b1272b010cc862d5e0222a5b Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Wed, 10 Jun 2026 00:29:59 -0700 Subject: [PATCH 39/55] ci: revert to github hosted runner --- .github/workflows/build.yml | 9 ++++++++- ci/resolve-rocm-version.sh | 20 ++++++++------------ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c1f77cd233a..830f976b6a2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -307,7 +307,14 @@ jobs: run: | $gfx = "${{ matrix.gfx_target }}" $ver = "${{ env.ROCM_VERSION }}" - $base = $gfx + $base = switch ($gfx) { + "gfx110X" { "gfx110X-all" } + "gfx120X" { "gfx120X-all" } + "gfx1150" { "gfx1150" } + "gfx1151" { "gfx1151" } + "gfx1152" { "gfx1152" } + default { $gfx } + } $url = "https://repo.amd.com/rocm/tarball/therock-dist-windows-${base}-${ver}.tar.gz" Write-Host "ROCm URL: $url" "DETECTED_ROCM_VERSION=$ver" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 diff --git a/ci/resolve-rocm-version.sh b/ci/resolve-rocm-version.sh index 996e40c5abf..cf3bccbe778 100755 --- a/ci/resolve-rocm-version.sh +++ b/ci/resolve-rocm-version.sh @@ -41,19 +41,15 @@ if ! echo "$rocm_version" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+'; then return 1 2>/dev/null || exit 1 fi -# Map our GFX target shorthand to the exact tarball name AMD publishes. -# Each GPU family has its own tarball with the right prebuilt kernel libraries. -# Using the wrong tarball (e.g. gfx1151 for gfx110X) gives you gfx1151 rocBLAS/hipBLASLT -# kernels in a gfx1100 package — builds succeed but fail silently on user hardware. +# Exact tarball names published at repo.amd.com/rocm/tarball/ for 7.12.0: +# linux: gfx110X-all, gfx120X-all, gfx1150, gfx1151, gfx1152 +# windows: gfx110X-all, gfx120X-all, gfx1150, gfx1151, gfx1152 case "$gfx_target" in - gfx110X) tarball_target="gfx110X" ;; # RDNA3 dGPU: RX 7900/7800/7700/7600 - gfx120X) tarball_target="gfx120X" ;; # RDNA4 dGPU: RX 9070/9060 - gfx1150) tarball_target="gfx1150" ;; # RDNA3.5 APU: Strix Point - gfx1151) tarball_target="gfx1151" ;; # RDNA3.5 APU: Strix Halo - gfx1100) tarball_target="gfx1100" ;; # RDNA3 dGPU specific - gfx1101) tarball_target="gfx1101" ;; - gfx1200) tarball_target="gfx1200" ;; - gfx1201) tarball_target="gfx1201" ;; + gfx110X) tarball_target="gfx110X-all" ;; + gfx120X) tarball_target="gfx120X-all" ;; + gfx1150) tarball_target="gfx1150" ;; + gfx1151) tarball_target="gfx1151" ;; + gfx1152) tarball_target="gfx1152" ;; *) tarball_target="$gfx_target" ;; esac From f258e401c967f61555f66843c650ac262c37cff6 Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Wed, 10 Jun 2026 00:55:34 -0700 Subject: [PATCH 40/55] fix: apply -march=x86-64-v3 and disable AVX-512 for portable Linux CPU/Vulkan builds --- .github/workflows/build.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 830f976b6a2..97753ed3107 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -441,6 +441,13 @@ jobs: run: | cmake -B build \ -DCMAKE_BUILD_TYPE=Release \ + -DGGML_NATIVE=OFF \ + -DGGML_AVX=ON \ + -DGGML_AVX2=ON \ + -DGGML_FMA=ON \ + -DGGML_AVX512=OFF \ + -DCMAKE_C_FLAGS="-march=x86-64-v3" \ + -DCMAKE_CXX_FLAGS="-march=x86-64-v3" \ -DGGML_VULKAN=ON \ -DWHISPER_BUILD_EXAMPLES=ON \ -DWHISPER_BUILD_TESTS=OFF \ @@ -654,6 +661,9 @@ jobs: -DGGML_AVX=ON \ -DGGML_AVX2=ON \ -DGGML_FMA=ON \ + -DGGML_AVX512=OFF \ + -DCMAKE_C_FLAGS="-march=x86-64-v3" \ + -DCMAKE_CXX_FLAGS="-march=x86-64-v3" \ -DWHISPER_BUILD_EXAMPLES=ON \ -DWHISPER_BUILD_TESTS=OFF \ -DWHISPER_BUILD_SERVER=ON From 6af771ec2feed0000ec837595d4ff56df4bb0f8a Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Wed, 10 Jun 2026 09:37:00 -0700 Subject: [PATCH 41/55] ci: use curl --- .github/workflows/build.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 97753ed3107..2bc1c358189 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -318,7 +318,8 @@ jobs: $url = "https://repo.amd.com/rocm/tarball/therock-dist-windows-${base}-${ver}.tar.gz" Write-Host "ROCm URL: $url" "DETECTED_ROCM_VERSION=$ver" | Out-File $env:GITHUB_ENV -Append -Encoding utf8 - Invoke-WebRequest -Uri $url -OutFile rocm.tar.gz + curl.exe -L --retry 3 --retry-delay 5 -o rocm.tar.gz $url + if ($LASTEXITCODE -ne 0) { Write-Error "curl failed with exit code $LASTEXITCODE"; exit 1 } New-Item -ItemType Directory -Force -Path "C:\opt\rocm" | Out-Null tar -xzf rocm.tar.gz -C C:\opt\rocm --strip-components=1 From 49744bd9a617c0fe13cf6ba7ee89cd0387aa00f1 Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Wed, 10 Jun 2026 17:36:52 -0700 Subject: [PATCH 42/55] ci: add metal builds --- .github/workflows/build.yml | 63 +++++++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2bc1c358189..e4248752ea6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -638,7 +638,64 @@ jobs: path: ${{ env.ARCHIVE }} # ════════════════════════════════════════════════════════════════════════════════ -# 7. CPU — Linux +# 7. Metal — macOS (arm64) +# ════════════════════════════════════════════════════════════════════════════════ + macos-metal: + runs-on: macos-latest + needs: determine-tag + + steps: + - uses: actions/checkout@v4 + + - name: Install dependencies + run: brew install cmake ninja + + - name: Configure CMake + run: | + cmake -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_METAL=ON \ + -DWHISPER_BUILD_EXAMPLES=ON \ + -DWHISPER_BUILD_TESTS=OFF \ + -DWHISPER_BUILD_SERVER=ON + + - name: Build + run: | + cmake --build build --config Release -j$(sysctl -n hw.logicalcpu) > build.log 2>&1 + exit_code=$? + grep -E "error:|FAILED|Linking|Built target" build.log || true + if [ $exit_code -ne 0 ]; then + tail -100 build.log + exit $exit_code + fi + echo "Build succeeded." + + - name: Verify build output + run: | + if [ ! -f build/bin/whisper-cli ]; then + echo "::error::whisper-cli not found" + ls -lh build/bin/ 2>/dev/null || true + exit 1 + fi + ls -lh build/bin/whisper-cli + + - name: Package + run: | + VER="${{ needs.determine-tag.outputs.version }}" + ARCHIVE="whisper-${VER}-darwin-metal-arm64.tar.gz" + STAGE="whisper-${VER}-darwin-metal-arm64" + mkdir -p "$STAGE" + cp -r build/bin/* "$STAGE/" 2>/dev/null || true + tar -czf "$ARCHIVE" "$STAGE" + echo "ARCHIVE=$ARCHIVE" >> $GITHUB_ENV + + - uses: actions/upload-artifact@v4 + with: + name: ${{ env.ARCHIVE }} + path: ${{ env.ARCHIVE }} + +# ════════════════════════════════════════════════════════════════════════════════ +# 8. CPU — Linux # ════════════════════════════════════════════════════════════════════════════════ linux-cpu: runs-on: ubuntu-latest @@ -1049,7 +1106,8 @@ jobs: (needs.test-vulkan-linux.result == 'success' || needs.test-vulkan-linux.result == 'skipped') && (needs.test-rocm-windows.result == 'success' || needs.test-rocm-windows.result == 'skipped') && (needs.test-rocm-linux.result == 'success' || needs.test-rocm-linux.result == 'skipped') && - (needs.test-npu-windows.result == 'success' || needs.test-npu-windows.result == 'skipped') + (needs.test-npu-windows.result == 'success' || needs.test-npu-windows.result == 'skipped') && + (needs.macos-metal.result == 'success' || needs.macos-metal.result == 'skipped') runs-on: ubuntu-latest needs: - determine-tag @@ -1058,6 +1116,7 @@ jobs: - linux-vulkan - windows-vulkan - windows-npu + - macos-metal - linux-cpu - windows-cpu - test-cpu-windows From 26519571fc04467e5629923c201ff5128d723305 Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Wed, 10 Jun 2026 18:22:18 -0700 Subject: [PATCH 43/55] ci: versioning update --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e4248752ea6..2c667cf882d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -105,9 +105,9 @@ jobs: SHOULD_RELEASE="false" fi - # Version = tag without leading 'v', used in artifact filenames - # e.g. v1.8.4 → 1.8.4, b1234 → b1234 (unchanged for dev builds) - VERSION="${TAG_NAME#v}" + # Version used in artifact filenames — keep leading 'v' to match lemonade expectations + # e.g. v1.8.4 → v1.8.4, b1234 → b1234 + VERSION="${TAG_NAME}" echo "name=$TAG_NAME" >> $GITHUB_OUTPUT echo "version=$VERSION" >> $GITHUB_OUTPUT From c67c1d10d788b2d1f871a04cd65e89f5ca82a627 Mon Sep 17 00:00:00 2001 From: Iswarya Alex <47045679+iswaryaalex@users.noreply.github.com> Date: Thu, 11 Jun 2026 00:04:06 -0700 Subject: [PATCH 44/55] Update README.md --- README.md | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 92160e40276..8ee00d3bc47 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,16 @@ -# whisper-cpp-amd +# whisper.cpp-rocm - - GitHub release (latest by date) + + GitHub release (latest by date) - - Latest release date + + Latest release date - License + License - ROCm 7.x + ROCm 7.12 Powered by whisper.cpp @@ -72,22 +72,22 @@ All builds are self-contained — no separate driver or runtime installation nee | GPU Target | Linux | Windows | |---|---|---| -| **gfx1151** (Ryzen AI MAX+ Pro 395) | [![Linux gfx1151](https://img.shields.io/badge/Download-Linux%20gfx1151-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | [![Windows gfx1151](https://img.shields.io/badge/Download-Windows%20gfx1151-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | -| **gfx1150** (Ryzen AI 300) | [![Linux gfx1150](https://img.shields.io/badge/Download-Linux%20gfx1150-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | [![Windows gfx1150](https://img.shields.io/badge/Download-Windows%20gfx1150-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | -| **gfx120X** (RDNA4 dGPU) | [![Linux gfx120X](https://img.shields.io/badge/Download-Linux%20gfx120X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | [![Windows gfx120X](https://img.shields.io/badge/Download-Windows%20gfx120X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | -| **gfx110X** (RDNA3 dGPU & iGPU) | [![Linux gfx110X](https://img.shields.io/badge/Download-Linux%20gfx110X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | [![Windows gfx110X](https://img.shields.io/badge/Download-Windows%20gfx110X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | +| **gfx1151** (Ryzen AI MAX+ Pro 395) | [![Linux gfx1151](https://img.shields.io/badge/Download-Linux%20gfx1151-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | [![Windows gfx1151](https://img.shields.io/badge/Download-Windows%20gfx1151-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | +| **gfx1150** (Ryzen AI 300) | [![Linux gfx1150](https://img.shields.io/badge/Download-Linux%20gfx1150-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | [![Windows gfx1150](https://img.shields.io/badge/Download-Windows%20gfx1150-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | +| **gfx120X** (RDNA4 dGPU) | [![Linux gfx120X](https://img.shields.io/badge/Download-Linux%20gfx120X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | [![Windows gfx120X](https://img.shields.io/badge/Download-Windows%20gfx120X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | +| **gfx110X** (RDNA3 dGPU & iGPU) | [![Linux gfx110X](https://img.shields.io/badge/Download-Linux%20gfx110X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | [![Windows gfx110X](https://img.shields.io/badge/Download-Windows%20gfx110X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | ### Vulkan — Cross-Vendor GPU | Linux | Windows | |---|---| -| [![Linux Vulkan](https://img.shields.io/badge/Download-Linux%20Vulkan-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | [![Windows Vulkan](https://img.shields.io/badge/Download-Windows%20Vulkan-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | +| [![Linux Vulkan](https://img.shields.io/badge/Download-Linux%20Vulkan-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | [![Windows Vulkan](https://img.shields.io/badge/Download-Windows%20Vulkan-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | ### NPU — RyzenAI (Windows only) | Windows | |---| -| [![Windows NPU](https://img.shields.io/badge/Download-Windows%20NPU%20(RyzenAI)-red?logo=amd&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | +| [![Windows NPU](https://img.shields.io/badge/Download-Windows%20NPU%20(RyzenAI)-red?logo=amd&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | > Requires NPU driver ≥ `.280` and a pre-compiled `.rai` encoder model from [AMD's Hugging Face collection](https://huggingface.co/collections/amd/ryzen-ai-16-whisper-npu-optimized-onnx-models). Place the `.rai` file alongside your `ggml-*.bin` model — whisper-cli picks it up automatically. @@ -95,7 +95,7 @@ All builds are self-contained — no separate driver or runtime installation nee | Linux | Windows | |---|---| -| [![Linux CPU](https://img.shields.io/badge/Download-Linux%20CPU-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | [![Windows CPU](https://img.shields.io/badge/Download-Windows%20CPU-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | +| [![Linux CPU](https://img.shields.io/badge/Download-Linux%20CPU-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | [![Windows CPU](https://img.shields.io/badge/Download-Windows%20CPU-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | --- @@ -245,7 +245,7 @@ Reproduce any CI build locally using the bundled PowerShell script. Produces ide ## 🏗️ Repository Structure ``` -whisper-cpp-amd/ +whisper.cpp-rocm/ ├── .github/ │ └── workflows/ │ ├── build.yml # All AMD backends — builds + publishes releases From e66a3500cb9332991e2f3bfcc852286c3bd0903f Mon Sep 17 00:00:00 2001 From: Iswarya Alex <47045679+iswaryaalex@users.noreply.github.com> Date: Thu, 11 Jun 2026 10:25:28 -0700 Subject: [PATCH 45/55] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8ee00d3bc47..017410cfc5f 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ License - ROCm 7.12 + ROCm 7.12 Powered by whisper.cpp From 3afa341241b953b472cd454e7437f93060cb1965 Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Thu, 11 Jun 2026 11:39:37 -0700 Subject: [PATCH 46/55] Add README update --- .github/workflows/build.yml | 16 ++++++++++++++-- .github/workflows/sync.yml | 16 ++-------------- README.md | 28 ++++++++++++++-------------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2c667cf882d..cfadd48502c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -191,6 +191,7 @@ jobs: -DGPU_TARGETS="$MAPPED_GPU_TARGET" \ -DBUILD_SHARED_LIBS=ON \ -DGGML_HIP=ON \ + -DWHISPER_BUILD_SERVER=ON \ -DWHISPER_SDL2=${{ matrix.sdl2 }} - name: Build @@ -209,11 +210,16 @@ jobs: - name: Verify build output run: | if [ ! -f build/bin/whisper-cli ]; then - echo "::error::whisper-cli not found — build likely truncated" + echo "::error::whisper-cli not found - build likely truncated" ls -lh build/bin/ 2>/dev/null || true exit 1 fi - echo "Build output:"; ls -lh build/bin/whisper-cli + if [ ! -f build/bin/whisper-server ]; then + echo "::error::whisper-server not found - build likely truncated" + ls -lh build/bin/ 2>/dev/null || true + exit 1 + fi + echo "Build output:"; ls -lh build/bin/whisper-cli build/bin/whisper-server - name: Copy ROCm runtime libs run: | @@ -355,6 +361,7 @@ jobs: -DCMAKE_PREFIX_PATH="$env:HIP_PATH" ` -DCMAKE_BUILD_TYPE=${{ matrix.build }} ` -DBUILD_SHARED_LIBS=ON ` + -DWHISPER_BUILD_SERVER=ON ` -DWHISPER_SDL2=${{ matrix.sdl2 }} - name: Build @@ -401,6 +408,11 @@ jobs: Get-ChildItem $bin -ErrorAction SilentlyContinue | Format-Table Name, Length exit 1 } + if (-not (Test-Path "$bin/whisper-server.exe")) { + Write-Error "whisper-server.exe not found in $bin - build likely truncated" + Get-ChildItem $bin -ErrorAction SilentlyContinue | Format-Table Name, Length + exit 1 + } Write-Host "Build output:" Get-ChildItem $bin -Filter "*.exe" | Format-Table Name, Length diff --git a/.github/workflows/sync.yml b/.github/workflows/sync.yml index 6867bf5fe35..84342cfe527 100644 --- a/.github/workflows/sync.yml +++ b/.github/workflows/sync.yml @@ -108,20 +108,8 @@ jobs: git push origin "$BRANCH" TAG="${{ steps.upstream.outputs.tag }}" - python3 -c " -import sys -tag = sys.argv[1] -files = sys.argv[2] -body = '## Upstream sync: ' + tag + '\n\n' -body += 'Conflicts were detected during automatic merge. Files affected:\n\n' -body += ' ' + files + '\n\n' -body += 'This PR was auto-resolved using upstream (theirs) as a baseline.\n' -body += 'Please review the diff carefully before merging.\n\n' -body += 'Once merged, manually create the release tag on main to trigger the build:\n\n' -body += ' git tag ' + tag + '\n' -body += ' git push origin ' + tag + '\n' -open('/tmp/pr-body.md', 'w').write(body) -" "$TAG" "$CONFLICT_FILES" + printf '## Upstream sync: %s\n\nConflicts were detected during automatic merge. Files affected:\n\n %s\n\nThis PR was auto-resolved using upstream (theirs) as a baseline.\nPlease review the diff carefully before merging.\n\nOnce merged, manually create the release tag on main to trigger the build:\n\n git tag %s\n git push origin %s\n' \ + "$TAG" "$CONFLICT_FILES" "$TAG" "$TAG" > /tmp/pr-body.md gh pr create \ --title "Sync upstream ${TAG} - conflict resolution needed" \ diff --git a/README.md b/README.md index 92160e40276..10886368d83 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ -# whisper-cpp-amd +# whisper.cpp-rocm - - GitHub release (latest by date) + + GitHub release (latest by date) - - Latest release date + + Latest release date - License + License ROCm 7.x @@ -72,22 +72,22 @@ All builds are self-contained — no separate driver or runtime installation nee | GPU Target | Linux | Windows | |---|---|---| -| **gfx1151** (Ryzen AI MAX+ Pro 395) | [![Linux gfx1151](https://img.shields.io/badge/Download-Linux%20gfx1151-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | [![Windows gfx1151](https://img.shields.io/badge/Download-Windows%20gfx1151-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | -| **gfx1150** (Ryzen AI 300) | [![Linux gfx1150](https://img.shields.io/badge/Download-Linux%20gfx1150-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | [![Windows gfx1150](https://img.shields.io/badge/Download-Windows%20gfx1150-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | -| **gfx120X** (RDNA4 dGPU) | [![Linux gfx120X](https://img.shields.io/badge/Download-Linux%20gfx120X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | [![Windows gfx120X](https://img.shields.io/badge/Download-Windows%20gfx120X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | -| **gfx110X** (RDNA3 dGPU & iGPU) | [![Linux gfx110X](https://img.shields.io/badge/Download-Linux%20gfx110X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | [![Windows gfx110X](https://img.shields.io/badge/Download-Windows%20gfx110X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | +| **gfx1151** (Ryzen AI MAX+ Pro 395) | [![Linux gfx1151](https://img.shields.io/badge/Download-Linux%20gfx1151-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | [![Windows gfx1151](https://img.shields.io/badge/Download-Windows%20gfx1151-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | +| **gfx1150** (Ryzen AI 300) | [![Linux gfx1150](https://img.shields.io/badge/Download-Linux%20gfx1150-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | [![Windows gfx1150](https://img.shields.io/badge/Download-Windows%20gfx1150-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | +| **gfx120X** (RDNA4 dGPU) | [![Linux gfx120X](https://img.shields.io/badge/Download-Linux%20gfx120X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | [![Windows gfx120X](https://img.shields.io/badge/Download-Windows%20gfx120X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | +| **gfx110X** (RDNA3 dGPU & iGPU) | [![Linux gfx110X](https://img.shields.io/badge/Download-Linux%20gfx110X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | [![Windows gfx110X](https://img.shields.io/badge/Download-Windows%20gfx110X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | ### Vulkan — Cross-Vendor GPU | Linux | Windows | |---|---| -| [![Linux Vulkan](https://img.shields.io/badge/Download-Linux%20Vulkan-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | [![Windows Vulkan](https://img.shields.io/badge/Download-Windows%20Vulkan-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | +| [![Linux Vulkan](https://img.shields.io/badge/Download-Linux%20Vulkan-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | [![Windows Vulkan](https://img.shields.io/badge/Download-Windows%20Vulkan-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | ### NPU — RyzenAI (Windows only) | Windows | |---| -| [![Windows NPU](https://img.shields.io/badge/Download-Windows%20NPU%20(RyzenAI)-red?logo=amd&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | +| [![Windows NPU](https://img.shields.io/badge/Download-Windows%20NPU%20(RyzenAI)-red?logo=amd&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | > Requires NPU driver ≥ `.280` and a pre-compiled `.rai` encoder model from [AMD's Hugging Face collection](https://huggingface.co/collections/amd/ryzen-ai-16-whisper-npu-optimized-onnx-models). Place the `.rai` file alongside your `ggml-*.bin` model — whisper-cli picks it up automatically. @@ -95,7 +95,7 @@ All builds are self-contained — no separate driver or runtime installation nee | Linux | Windows | |---|---| -| [![Linux CPU](https://img.shields.io/badge/Download-Linux%20CPU-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | [![Windows CPU](https://img.shields.io/badge/Download-Windows%20CPU-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper-cpp-amd/releases/latest) | +| [![Linux CPU](https://img.shields.io/badge/Download-Linux%20CPU-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | [![Windows CPU](https://img.shields.io/badge/Download-Windows%20CPU-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | --- @@ -245,7 +245,7 @@ Reproduce any CI build locally using the bundled PowerShell script. Produces ide ## 🏗️ Repository Structure ``` -whisper-cpp-amd/ +whisper.cpp-rocm/ ├── .github/ │ └── workflows/ │ ├── build.yml # All AMD backends — builds + publishes releases From 53715767e8bed7da134045f3bddc279ea48cd1fd Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Thu, 11 Jun 2026 15:06:06 -0700 Subject: [PATCH 47/55] ci: sync --- .github/workflows/build.yml | 28 ++++++++++++---------------- .github/workflows/sync.yml | 21 ++++++++++++++------- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index cfadd48502c..bf3cf7ee42d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -225,22 +225,11 @@ jobs: run: | BIN="build/bin" LIB="/opt/rocm/lib" + SKIP="^(libc|libm|libdl|librt|libpthread|libstdc\+\+|libgcc_s|ld-linux)" mkdir -p "$BIN" - [ -d "$LIB/rocblas/library" ] && { mkdir -p "$BIN/rocblas"; cp -r "$LIB/rocblas/library" "$BIN/rocblas/"; } - [ -d "$LIB/hipblaslt/library" ] && { mkdir -p "$BIN/hipblaslt"; cp -r "$LIB/hipblaslt/library" "$BIN/hipblaslt/"; } - for so in libhipblas librocblas libamdhip64 librocsolver libroctx64 libhipblaslt libamd_comgr libamd_comgr_loader libhsa-runtime64; do - cp -v $LIB/${so}.so* "$BIN/" 2>/dev/null || true - done - cp -v $LIB/llvm/lib/libLLVM.so* "$BIN/" 2>/dev/null || true - cp -v $LIB/llvm/lib/libclang-cpp.so* "$BIN/" 2>/dev/null || true - [ -d "$LIB/rocm_sysdeps/lib" ] && cp -v $LIB/rocm_sysdeps/lib/librocm_sysdeps_*.so* "$BIN/" || true - - name: Bundle linked libraries - run: | - BIN="build/bin" - SKIP="^(libc|libm|libdl|librt|libpthread|libstdc\+\+|libgcc_s|ld-linux)" - # scan whisper-cli and all already-bundled .so files for missing deps - for target in "$BIN/whisper-cli" "$BIN"/*.so*; do + # ldd-based: scan all whisper binaries and bundled .so files + for target in "$BIN"/whisper-* "$BIN"/*.so*; do [ -f "$target" ] && [ ! -L "$target" ] || continue ldd "$target" 2>/dev/null | grep "=> /" | while read -r line; do soname=$(echo "$line" | awk '{print $1}') @@ -251,6 +240,10 @@ jobs: done done + # Kernel library data dirs (not picked up by ldd — loaded at runtime by path) + [ -d "$LIB/rocblas/library" ] && { mkdir -p "$BIN/rocblas"; cp -r "$LIB/rocblas/library" "$BIN/rocblas/"; } + [ -d "$LIB/hipblaslt/library" ] && { mkdir -p "$BIN/hipblaslt"; cp -r "$LIB/hipblaslt/library" "$BIN/hipblaslt/"; } + - name: Set portable RPATH run: | cd build/bin @@ -276,7 +269,7 @@ jobs: # 3. ROCm — Windows # ════════════════════════════════════════════════════════════════════════════════ windows-rocm: - runs-on: windows-latest + runs-on: windows-2022 needs: [determine-tag, prepare-rocm-matrix] strategy: matrix: ${{ fromJson(needs.prepare-rocm-matrix.outputs.windows_matrix) }} @@ -528,6 +521,7 @@ jobs: -DCMAKE_BUILD_TYPE=Release ` -DBUILD_SHARED_LIBS=ON ` -DGGML_VULKAN=ON ` + -DWHISPER_BUILD_SERVER=ON ` -DWHISPER_SDL2=ON ` -DVULKAN_SDK="$env:VULKAN_SDK" @@ -603,7 +597,7 @@ jobs: call setup.bat if errorlevel 1 ( echo ERROR: FlexML setup.bat failed! & exit /b 1 ) cd .. - cmake -B build -A x64 -DCMAKE_BUILD_TYPE=Release -DWHISPER_VITISAI=ON + cmake -B build -A x64 -DCMAKE_BUILD_TYPE=Release -DWHISPER_VITISAI=ON -DWHISPER_BUILD_SERVER=ON if errorlevel 1 ( echo ERROR: CMake configure failed! & exit /b 1 ) cmake --build build --config Release -j if errorlevel 1 ( echo ERROR: Build failed! & exit /b 1 ) @@ -792,7 +786,9 @@ jobs: -DGGML_AVX=ON ` -DGGML_AVX2=ON ` -DGGML_FMA=ON ` + -DGGML_AVX512=OFF ` -DBUILD_SHARED_LIBS=ON ` + -DWHISPER_BUILD_SERVER=ON ` -DWHISPER_SDL2=ON - name: Build diff --git a/.github/workflows/sync.yml b/.github/workflows/sync.yml index 84342cfe527..47693822a04 100644 --- a/.github/workflows/sync.yml +++ b/.github/workflows/sync.yml @@ -6,10 +6,6 @@ name: Sync Upstream & Auto-Release # - conflict → opens a PR for manual resolution, does NOT tag on: - # schedule disabled until v1.8.4 release is confirmed good - # re-enable by uncommenting the schedule block below - # schedule: - # - cron: '0 6 * * *' # daily at 06:00 UTC workflow_dispatch: inputs: upstream_tag: @@ -108,8 +104,19 @@ jobs: git push origin "$BRANCH" TAG="${{ steps.upstream.outputs.tag }}" - printf '## Upstream sync: %s\n\nConflicts were detected during automatic merge. Files affected:\n\n %s\n\nThis PR was auto-resolved using upstream (theirs) as a baseline.\nPlease review the diff carefully before merging.\n\nOnce merged, manually create the release tag on main to trigger the build:\n\n git tag %s\n git push origin %s\n' \ - "$TAG" "$CONFLICT_FILES" "$TAG" "$TAG" > /tmp/pr-body.md + echo "## Upstream sync: ${TAG}" > /tmp/pr-body.md + echo "" >> /tmp/pr-body.md + echo "Conflicts were detected during automatic merge. Files affected:" >> /tmp/pr-body.md + echo "" >> /tmp/pr-body.md + echo " ${CONFLICT_FILES}" >> /tmp/pr-body.md + echo "" >> /tmp/pr-body.md + echo "This PR was auto-resolved using upstream (theirs) as a baseline." >> /tmp/pr-body.md + echo "Please review the diff carefully before merging." >> /tmp/pr-body.md + echo "" >> /tmp/pr-body.md + echo "Once merged, manually create the release tag on main to trigger the build:" >> /tmp/pr-body.md + echo "" >> /tmp/pr-body.md + echo " git tag ${TAG}" >> /tmp/pr-body.md + echo " git push origin ${TAG}" >> /tmp/pr-body.md gh pr create \ --title "Sync upstream ${TAG} - conflict resolution needed" \ @@ -117,7 +124,7 @@ jobs: --base main \ --head "$BRANCH" - echo "::warning::Merge conflict detected — PR opened for manual resolution. Release build NOT triggered." + echo "::warning::Merge conflict detected - PR opened for manual resolution. Release build NOT triggered." # ── Clean merge path: push main + tag → triggers build.yml ─────────── - name: Push merged main From b1649d181ead605b74c4cf690cc609d98f034fca Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Thu, 11 Jun 2026 20:16:01 -0700 Subject: [PATCH 48/55] ci: new lite --- .github/workflows/build.yml | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bf3cf7ee42d..9be2cd9dcda 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -144,7 +144,11 @@ jobs: runs-on: ubuntu-22.04 needs: [determine-tag, prepare-rocm-matrix] strategy: - matrix: ${{ fromJson(needs.prepare-rocm-matrix.outputs.ubuntu_matrix) }} + matrix: + gfx_target: [gfx1151] + build: [Release] + sdl2: [ON] + arch: [linux/amd64] fail-fast: false steps: @@ -228,19 +232,12 @@ jobs: SKIP="^(libc|libm|libdl|librt|libpthread|libstdc\+\+|libgcc_s|ld-linux)" mkdir -p "$BIN" - # ldd-based: scan all whisper binaries and bundled .so files - for target in "$BIN"/whisper-* "$BIN"/*.so*; do - [ -f "$target" ] && [ ! -L "$target" ] || continue - ldd "$target" 2>/dev/null | grep "=> /" | while read -r line; do - soname=$(echo "$line" | awk '{print $1}') - path=$(echo "$line" | awk '{print $3}') - [[ "$soname" =~ $SKIP ]] && continue - [ -f "$BIN/$soname" ] && continue - cp -L "$path" "$BIN/$soname" 2>/dev/null || true - done - done + # Copy all .so* from ROCm lib dir — preserves symlinks so soname chains work + find "$LIB" -maxdepth 1 -name "*.so*" -exec cp -P {} "$BIN/" \; 2>/dev/null || true + find "$LIB/llvm/lib" -maxdepth 1 -name "*.so*" -exec cp -P {} "$BIN/" \; 2>/dev/null || true + [ -d "$LIB/rocm_sysdeps/lib" ] && find "$LIB/rocm_sysdeps/lib" -maxdepth 1 -name "*.so*" -exec cp -P {} "$BIN/" \; 2>/dev/null || true - # Kernel library data dirs (not picked up by ldd — loaded at runtime by path) + # Kernel library data dirs (loaded at runtime by path, not via soname) [ -d "$LIB/rocblas/library" ] && { mkdir -p "$BIN/rocblas"; cp -r "$LIB/rocblas/library" "$BIN/rocblas/"; } [ -d "$LIB/hipblaslt/library" ] && { mkdir -p "$BIN/hipblaslt"; cp -r "$LIB/hipblaslt/library" "$BIN/hipblaslt/"; } @@ -269,6 +266,7 @@ jobs: # 3. ROCm — Windows # ════════════════════════════════════════════════════════════════════════════════ windows-rocm: + if: false runs-on: windows-2022 needs: [determine-tag, prepare-rocm-matrix] strategy: @@ -425,6 +423,7 @@ jobs: # 4. Vulkan — Linux # ════════════════════════════════════════════════════════════════════════════════ linux-vulkan: + if: false runs-on: ubuntu-latest needs: determine-tag @@ -491,6 +490,7 @@ jobs: # 5. Vulkan — Windows # ════════════════════════════════════════════════════════════════════════════════ windows-vulkan: + if: false runs-on: windows-latest needs: determine-tag @@ -548,6 +548,7 @@ jobs: # 6. NPU (VitisAI / RyzenAI) — Windows only (self-hosted runner) # ════════════════════════════════════════════════════════════════════════════════ windows-npu: + if: false runs-on: [self-hosted, Windows, stx, rai300_400] needs: determine-tag continue-on-error: true # runner may be offline; don't block release @@ -647,6 +648,7 @@ jobs: # 7. Metal — macOS (arm64) # ════════════════════════════════════════════════════════════════════════════════ macos-metal: + if: false runs-on: macos-latest needs: determine-tag From ef9cee0be576167ce1fe8d1df5923d6b2969a13d Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Thu, 11 Jun 2026 21:47:11 -0700 Subject: [PATCH 49/55] ci: setup lightweight --- .github/workflows/build.yml | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9be2cd9dcda..7c982c2ed54 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -193,7 +193,6 @@ jobs: -DCMAKE_PREFIX_PATH=/opt/rocm \ -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ -DGPU_TARGETS="$MAPPED_GPU_TARGET" \ - -DBUILD_SHARED_LIBS=ON \ -DGGML_HIP=ON \ -DWHISPER_BUILD_SERVER=ON \ -DWHISPER_SDL2=${{ matrix.sdl2 }} @@ -225,6 +224,35 @@ jobs: fi echo "Build output:"; ls -lh build/bin/whisper-cli build/bin/whisper-server + - name: Inspect shared library dependencies + run: | + echo "======================================================" + echo " ldd whisper-cli" + echo "======================================================" + ldd build/bin/whisper-cli || true + echo "" + echo "======================================================" + echo " ldd whisper-server" + echo "======================================================" + ldd build/bin/whisper-server || true + echo "" + echo "======================================================" + echo " MISSING dependencies (not found)" + echo "======================================================" + MISSING_CLI=$(ldd build/bin/whisper-cli 2>/dev/null | grep "not found" || true) + MISSING_SRV=$(ldd build/bin/whisper-server 2>/dev/null | grep "not found" || true) + if [ -z "$MISSING_CLI" ] && [ -z "$MISSING_SRV" ]; then + echo "All dependencies resolved — no missing libs." + else + [ -n "$MISSING_CLI" ] && echo "whisper-cli missing:" && echo "$MISSING_CLI" + [ -n "$MISSING_SRV" ] && echo "whisper-server missing:" && echo "$MISSING_SRV" + fi + echo "" + echo "======================================================" + echo " All .so* files currently in build/bin/" + echo "======================================================" + ls -lh build/bin/*.so* 2>/dev/null || echo "(none)" + - name: Copy ROCm runtime libs run: | BIN="build/bin" From af95c386ff43565b074521e93b83fe1c928bee4b Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Thu, 11 Jun 2026 22:24:58 -0700 Subject: [PATCH 50/55] ci: reduce linked bins; Update README --- .github/workflows/build.yml | 110 ++++++++++++++++++++++++------------ README.md | 26 ++++++--- 2 files changed, 92 insertions(+), 44 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7c982c2ed54..11e92e0e138 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -144,11 +144,7 @@ jobs: runs-on: ubuntu-22.04 needs: [determine-tag, prepare-rocm-matrix] strategy: - matrix: - gfx_target: [gfx1151] - build: [Release] - sdl2: [ON] - arch: [linux/amd64] + matrix: ${{ fromJson(needs.prepare-rocm-matrix.outputs.ubuntu_matrix) }} fail-fast: false steps: @@ -226,48 +222,58 @@ jobs: - name: Inspect shared library dependencies run: | - echo "======================================================" - echo " ldd whisper-cli" - echo "======================================================" + echo "--- ldd whisper-cli ---" ldd build/bin/whisper-cli || true - echo "" - echo "======================================================" - echo " ldd whisper-server" - echo "======================================================" + echo "--- ldd whisper-server ---" ldd build/bin/whisper-server || true - echo "" - echo "======================================================" - echo " MISSING dependencies (not found)" - echo "======================================================" + echo "--- missing libs ---" MISSING_CLI=$(ldd build/bin/whisper-cli 2>/dev/null | grep "not found" || true) MISSING_SRV=$(ldd build/bin/whisper-server 2>/dev/null | grep "not found" || true) if [ -z "$MISSING_CLI" ] && [ -z "$MISSING_SRV" ]; then - echo "All dependencies resolved — no missing libs." + echo "All dependencies resolved." else [ -n "$MISSING_CLI" ] && echo "whisper-cli missing:" && echo "$MISSING_CLI" [ -n "$MISSING_SRV" ] && echo "whisper-server missing:" && echo "$MISSING_SRV" fi - echo "" - echo "======================================================" - echo " All .so* files currently in build/bin/" - echo "======================================================" + echo "--- .so* files in build/bin/ ---" ls -lh build/bin/*.so* 2>/dev/null || echo "(none)" - name: Copy ROCm runtime libs run: | BIN="build/bin" - LIB="/opt/rocm/lib" - SKIP="^(libc|libm|libdl|librt|libpthread|libstdc\+\+|libgcc_s|ld-linux)" mkdir -p "$BIN" - # Copy all .so* from ROCm lib dir — preserves symlinks so soname chains work - find "$LIB" -maxdepth 1 -name "*.so*" -exec cp -P {} "$BIN/" \; 2>/dev/null || true - find "$LIB/llvm/lib" -maxdepth 1 -name "*.so*" -exec cp -P {} "$BIN/" \; 2>/dev/null || true - [ -d "$LIB/rocm_sysdeps/lib" ] && find "$LIB/rocm_sysdeps/lib" -maxdepth 1 -name "*.so*" -exec cp -P {} "$BIN/" \; 2>/dev/null || true + # Collect only the ROCm libs actually needed by our binaries via ldd. + # This avoids bundling massive unused libs (libMIOpen, libMLIR, librocroller, etc.) + # that would inflate the artifact by 1+ GB. + ROCM_ROOTS="/opt/rocm/lib /opt/rocm/lib/llvm/lib /opt/rocm/lib/rocm_sysdeps/lib" + + for binary in "$BIN"/*; do + [ -f "$binary" ] && [ -x "$binary" ] && file "$binary" | grep -q ELF || continue + ldd "$binary" 2>/dev/null | awk '{print $3}' | grep -E "^/opt/rocm" | while read lib; do + [ -f "$lib" ] || continue + cp -n "$lib" "$BIN/" 2>/dev/null || true + # Also copy the soname symlink if it exists alongside the real file + dir=$(dirname "$lib") + base=$(basename "$lib") + find "$dir" -maxdepth 1 -name "${base%%.*}.so*" -exec cp -Pn {} "$BIN/" \; 2>/dev/null || true + done + done + + # Transitive deps: repeat ldd over any newly copied ROCm .so to catch indirect deps + for pass in 1 2; do + for lib in "$BIN"/lib*.so*; do + [ -f "$lib" ] && [ ! -L "$lib" ] || continue + ldd "$lib" 2>/dev/null | awk '{print $3}' | grep -E "^/opt/rocm" | while read dep; do + [ -f "$dep" ] || continue + cp -n "$dep" "$BIN/" 2>/dev/null || true + done + done + done # Kernel library data dirs (loaded at runtime by path, not via soname) - [ -d "$LIB/rocblas/library" ] && { mkdir -p "$BIN/rocblas"; cp -r "$LIB/rocblas/library" "$BIN/rocblas/"; } - [ -d "$LIB/hipblaslt/library" ] && { mkdir -p "$BIN/hipblaslt"; cp -r "$LIB/hipblaslt/library" "$BIN/hipblaslt/"; } + [ -d /opt/rocm/lib/rocblas/library ] && { mkdir -p "$BIN/rocblas"; cp -r /opt/rocm/lib/rocblas/library "$BIN/rocblas/"; } + [ -d /opt/rocm/lib/hipblaslt/library ] && { mkdir -p "$BIN/hipblaslt"; cp -r /opt/rocm/lib/hipblaslt/library "$BIN/hipblaslt/"; } - name: Set portable RPATH run: | @@ -294,7 +300,6 @@ jobs: # 3. ROCm — Windows # ════════════════════════════════════════════════════════════════════════════════ windows-rocm: - if: false runs-on: windows-2022 needs: [determine-tag, prepare-rocm-matrix] strategy: @@ -379,7 +384,6 @@ jobs: "-DCMAKE_HIP_FLAGS=--rocm-path=C:/opt/rocm" ` -DCMAKE_PREFIX_PATH="$env:HIP_PATH" ` -DCMAKE_BUILD_TYPE=${{ matrix.build }} ` - -DBUILD_SHARED_LIBS=ON ` -DWHISPER_BUILD_SERVER=ON ` -DWHISPER_SDL2=${{ matrix.sdl2 }} @@ -451,7 +455,6 @@ jobs: # 4. Vulkan — Linux # ════════════════════════════════════════════════════════════════════════════════ linux-vulkan: - if: false runs-on: ubuntu-latest needs: determine-tag @@ -518,7 +521,6 @@ jobs: # 5. Vulkan — Windows # ════════════════════════════════════════════════════════════════════════════════ windows-vulkan: - if: false runs-on: windows-latest needs: determine-tag @@ -576,7 +578,6 @@ jobs: # 6. NPU (VitisAI / RyzenAI) — Windows only (self-hosted runner) # ════════════════════════════════════════════════════════════════════════════════ windows-npu: - if: false runs-on: [self-hosted, Windows, stx, rai300_400] needs: determine-tag continue-on-error: true # runner may be offline; don't block release @@ -676,7 +677,6 @@ jobs: # 7. Metal — macOS (arm64) # ════════════════════════════════════════════════════════════════════════════════ macos-metal: - if: false runs-on: macos-latest needs: determine-tag @@ -1182,6 +1182,34 @@ jobs: echo "Release assets:" ls -lh release/ + - name: Generate release notes + id: notes + run: | + TAG="${{ needs.determine-tag.outputs.tag_name }}" + ROCM_VER="${{ env.ROCM_VERSION }}" + + cat > /tmp/release-notes.md <> $GITHUB_OUTPUT + - name: Create release id: create_release uses: ggml-org/action-create-release@v1 @@ -1191,6 +1219,7 @@ jobs: tag_name: ${{ needs.determine-tag.outputs.tag_name }} release_name: "whisper.cpp ${{ needs.determine-tag.outputs.tag_name }} - AMD Builds" prerelease: ${{ github.event.inputs.pre_release_tag != '' }} + body_path: ${{ steps.notes.outputs.notes_file }} draft: false - name: Upload release assets @@ -1212,3 +1241,14 @@ jobs: data: fs.readFileSync(`./release/${file}`), }); } + + - name: Update README download links + run: | + TAG="${{ needs.determine-tag.outputs.tag_name }}" + # Replace the placeholder tag in all download URLs with the actual release tag + sed -i "s|/releases/download/[^/]*/whisper-[^-]*-|/releases/download/${TAG}/whisper-${TAG}-|g" README.md + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add README.md + git diff --cached --quiet || git commit -m "docs: update download links to ${TAG}" + git push diff --git a/README.md b/README.md index 017410cfc5f..2d4f9ea1826 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Powered by whisper.cpp - Platform: Windows | Linux + Platform: Windows | Linux | macOS GPU Targets @@ -72,30 +72,36 @@ All builds are self-contained — no separate driver or runtime installation nee | GPU Target | Linux | Windows | |---|---|---| -| **gfx1151** (Ryzen AI MAX+ Pro 395) | [![Linux gfx1151](https://img.shields.io/badge/Download-Linux%20gfx1151-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | [![Windows gfx1151](https://img.shields.io/badge/Download-Windows%20gfx1151-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | -| **gfx1150** (Ryzen AI 300) | [![Linux gfx1150](https://img.shields.io/badge/Download-Linux%20gfx1150-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | [![Windows gfx1150](https://img.shields.io/badge/Download-Windows%20gfx1150-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | -| **gfx120X** (RDNA4 dGPU) | [![Linux gfx120X](https://img.shields.io/badge/Download-Linux%20gfx120X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | [![Windows gfx120X](https://img.shields.io/badge/Download-Windows%20gfx120X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | -| **gfx110X** (RDNA3 dGPU & iGPU) | [![Linux gfx110X](https://img.shields.io/badge/Download-Linux%20gfx110X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | [![Windows gfx110X](https://img.shields.io/badge/Download-Windows%20gfx110X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | +| **gfx1151** (Ryzen AI MAX+ Pro 395) | [![Linux gfx1151](https://img.shields.io/badge/Download-Linux%20gfx1151-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-linux-rocm-gfx1151.tar.gz) | [![Windows gfx1151](https://img.shields.io/badge/Download-Windows%20gfx1151-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-windows-rocm-gfx1151.zip) | +| **gfx1150** (Ryzen AI 300) | [![Linux gfx1150](https://img.shields.io/badge/Download-Linux%20gfx1150-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-linux-rocm-gfx1150.tar.gz) | [![Windows gfx1150](https://img.shields.io/badge/Download-Windows%20gfx1150-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-windows-rocm-gfx1150.zip) | +| **gfx120X** (RDNA4 dGPU) | [![Linux gfx120X](https://img.shields.io/badge/Download-Linux%20gfx120X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-linux-rocm-gfx120X.tar.gz) | [![Windows gfx120X](https://img.shields.io/badge/Download-Windows%20gfx120X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-windows-rocm-gfx120X.zip) | +| **gfx110X** (RDNA3 dGPU & iGPU) | [![Linux gfx110X](https://img.shields.io/badge/Download-Linux%20gfx110X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-linux-rocm-gfx110X.tar.gz) | [![Windows gfx110X](https://img.shields.io/badge/Download-Windows%20gfx110X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-windows-rocm-gfx110X.zip) | ### Vulkan — Cross-Vendor GPU | Linux | Windows | |---|---| -| [![Linux Vulkan](https://img.shields.io/badge/Download-Linux%20Vulkan-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | [![Windows Vulkan](https://img.shields.io/badge/Download-Windows%20Vulkan-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | +| [![Linux Vulkan](https://img.shields.io/badge/Download-Linux%20Vulkan-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-linux-vulkan-x86_64.tar.gz) | [![Windows Vulkan](https://img.shields.io/badge/Download-Windows%20Vulkan-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-windows-vulkan-x64.zip) | ### NPU — RyzenAI (Windows only) | Windows | |---| -| [![Windows NPU](https://img.shields.io/badge/Download-Windows%20NPU%20(RyzenAI)-red?logo=amd&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | +| [![Windows NPU](https://img.shields.io/badge/Download-Windows%20NPU%20(RyzenAI)-red?logo=amd&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-windows-npu-x64.zip) | > Requires NPU driver ≥ `.280` and a pre-compiled `.rai` encoder model from [AMD's Hugging Face collection](https://huggingface.co/collections/amd/ryzen-ai-16-whisper-npu-optimized-onnx-models). Place the `.rai` file alongside your `ggml-*.bin` model — whisper-cli picks it up automatically. +### macOS — Metal GPU + +| macOS (Apple Silicon) | +|---| +| [![macOS Metal](https://img.shields.io/badge/Download-macOS%20Metal%20(arm64)-lightgrey?logo=apple&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-darwin-metal-arm64.tar.gz) | + ### CPU — No GPU Required | Linux | Windows | |---|---| -| [![Linux CPU](https://img.shields.io/badge/Download-Linux%20CPU-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | [![Windows CPU](https://img.shields.io/badge/Download-Windows%20CPU-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/latest) | +| [![Linux CPU](https://img.shields.io/badge/Download-Linux%20CPU-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-linux-cpu-x86_64.tar.gz) | [![Windows CPU](https://img.shields.io/badge/Download-Windows%20CPU-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-windows-cpu-x64.zip) | --- @@ -167,7 +173,7 @@ upstream whisper.cpp releases vX.Y.Z with 13 artifacts across all backends and OS targets ``` -**Every release ships 13 artifacts:** +**Every release ships up to 14 artifacts:** ``` whisper-{version}-linux-rocm-gfx1151.tar.gz @@ -183,6 +189,7 @@ whisper-{version}-windows-vulkan-x64.zip whisper-{version}-windows-npu-x64.zip (may be absent if NPU runner offline) whisper-{version}-linux-cpu-x86_64.tar.gz whisper-{version}-windows-cpu-x64.zip +whisper-{version}-darwin-metal-arm64.tar.gz ``` > [!TIP] @@ -225,6 +232,7 @@ Reproduce any CI build locally using the bundled PowerShell script. Produces ide |---|---| | ROCm | `amdhip64`, `rocblas`, `hipblaslt` + library data, LLVM runtime, all system deps; RPATH=`$ORIGIN` on Linux | | Vulkan | SPIR-V shaders embedded at build time; links against system Vulkan loader | +| Metal | Uses macOS system Metal framework; no extra bundling needed | | NPU | FlexML Runtime DLLs (`flexmlrt/bin` + `flexmlrt/lib`) | | CPU | SDL2.dll included on Windows | From 776f36ec29701971916f3e9f4a08ba53094b1fc1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 12 Jun 2026 06:17:42 +0000 Subject: [PATCH 51/55] docs: update download links to v1.8.3 --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 2d4f9ea1826..46d7a67a09e 100644 --- a/README.md +++ b/README.md @@ -72,22 +72,22 @@ All builds are self-contained — no separate driver or runtime installation nee | GPU Target | Linux | Windows | |---|---|---| -| **gfx1151** (Ryzen AI MAX+ Pro 395) | [![Linux gfx1151](https://img.shields.io/badge/Download-Linux%20gfx1151-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-linux-rocm-gfx1151.tar.gz) | [![Windows gfx1151](https://img.shields.io/badge/Download-Windows%20gfx1151-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-windows-rocm-gfx1151.zip) | -| **gfx1150** (Ryzen AI 300) | [![Linux gfx1150](https://img.shields.io/badge/Download-Linux%20gfx1150-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-linux-rocm-gfx1150.tar.gz) | [![Windows gfx1150](https://img.shields.io/badge/Download-Windows%20gfx1150-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-windows-rocm-gfx1150.zip) | -| **gfx120X** (RDNA4 dGPU) | [![Linux gfx120X](https://img.shields.io/badge/Download-Linux%20gfx120X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-linux-rocm-gfx120X.tar.gz) | [![Windows gfx120X](https://img.shields.io/badge/Download-Windows%20gfx120X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-windows-rocm-gfx120X.zip) | -| **gfx110X** (RDNA3 dGPU & iGPU) | [![Linux gfx110X](https://img.shields.io/badge/Download-Linux%20gfx110X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-linux-rocm-gfx110X.tar.gz) | [![Windows gfx110X](https://img.shields.io/badge/Download-Windows%20gfx110X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-windows-rocm-gfx110X.zip) | +| **gfx1151** (Ryzen AI MAX+ Pro 395) | [![Linux gfx1151](https://img.shields.io/badge/Download-Linux%20gfx1151-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-linux-rocm-gfx1151.tar.gz) | [![Windows gfx1151](https://img.shields.io/badge/Download-Windows%20gfx1151-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-windows-rocm-gfx1151.zip) | +| **gfx1150** (Ryzen AI 300) | [![Linux gfx1150](https://img.shields.io/badge/Download-Linux%20gfx1150-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-linux-rocm-gfx1150.tar.gz) | [![Windows gfx1150](https://img.shields.io/badge/Download-Windows%20gfx1150-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-windows-rocm-gfx1150.zip) | +| **gfx120X** (RDNA4 dGPU) | [![Linux gfx120X](https://img.shields.io/badge/Download-Linux%20gfx120X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-linux-rocm-gfx120X.tar.gz) | [![Windows gfx120X](https://img.shields.io/badge/Download-Windows%20gfx120X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-windows-rocm-gfx120X.zip) | +| **gfx110X** (RDNA3 dGPU & iGPU) | [![Linux gfx110X](https://img.shields.io/badge/Download-Linux%20gfx110X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-linux-rocm-gfx110X.tar.gz) | [![Windows gfx110X](https://img.shields.io/badge/Download-Windows%20gfx110X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-windows-rocm-gfx110X.zip) | ### Vulkan — Cross-Vendor GPU | Linux | Windows | |---|---| -| [![Linux Vulkan](https://img.shields.io/badge/Download-Linux%20Vulkan-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-linux-vulkan-x86_64.tar.gz) | [![Windows Vulkan](https://img.shields.io/badge/Download-Windows%20Vulkan-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-windows-vulkan-x64.zip) | +| [![Linux Vulkan](https://img.shields.io/badge/Download-Linux%20Vulkan-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-linux-vulkan-x86_64.tar.gz) | [![Windows Vulkan](https://img.shields.io/badge/Download-Windows%20Vulkan-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-windows-vulkan-x64.zip) | ### NPU — RyzenAI (Windows only) | Windows | |---| -| [![Windows NPU](https://img.shields.io/badge/Download-Windows%20NPU%20(RyzenAI)-red?logo=amd&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-windows-npu-x64.zip) | +| [![Windows NPU](https://img.shields.io/badge/Download-Windows%20NPU%20(RyzenAI)-red?logo=amd&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-windows-npu-x64.zip) | > Requires NPU driver ≥ `.280` and a pre-compiled `.rai` encoder model from [AMD's Hugging Face collection](https://huggingface.co/collections/amd/ryzen-ai-16-whisper-npu-optimized-onnx-models). Place the `.rai` file alongside your `ggml-*.bin` model — whisper-cli picks it up automatically. @@ -95,13 +95,13 @@ All builds are self-contained — no separate driver or runtime installation nee | macOS (Apple Silicon) | |---| -| [![macOS Metal](https://img.shields.io/badge/Download-macOS%20Metal%20(arm64)-lightgrey?logo=apple&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-darwin-metal-arm64.tar.gz) | +| [![macOS Metal](https://img.shields.io/badge/Download-macOS%20Metal%20(arm64)-lightgrey?logo=apple&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-darwin-metal-arm64.tar.gz) | ### CPU — No GPU Required | Linux | Windows | |---|---| -| [![Linux CPU](https://img.shields.io/badge/Download-Linux%20CPU-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-linux-cpu-x86_64.tar.gz) | [![Windows CPU](https://img.shields.io/badge/Download-Windows%20CPU-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/latest-release/whisper-latest-release-windows-cpu-x64.zip) | +| [![Linux CPU](https://img.shields.io/badge/Download-Linux%20CPU-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-linux-cpu-x86_64.tar.gz) | [![Windows CPU](https://img.shields.io/badge/Download-Windows%20CPU-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-windows-cpu-x64.zip) | --- From 460f5be16d91dfbf7a4cd3a3a717f63cdd8786ba Mon Sep 17 00:00:00 2001 From: Iswarya Alex <47045679+iswaryaalex@users.noreply.github.com> Date: Thu, 11 Jun 2026 23:59:33 -0700 Subject: [PATCH 52/55] Optimize library collection in build workflow Updated build process to copy specific shared libraries to the build/bin directory, improving artifact size by avoiding unnecessary libraries. --- .github/workflows/build.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 11e92e0e138..3dc709f5634 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -243,9 +243,13 @@ jobs: BIN="build/bin" mkdir -p "$BIN" - # Collect only the ROCm libs actually needed by our binaries via ldd. - # This avoids bundling massive unused libs (libMIOpen, libMLIR, librocroller, etc.) - # that would inflate the artifact by 1+ GB. + cp -a build/src/libwhisper.so* "$BIN"/ 2>/dev/null || true + cp -a build/ggml/src/libggml.so* "$BIN"/ 2>/dev/null || true + cp -a build/ggml/src/libggml-base.so* "$BIN"/ 2>/dev/null || true + cp -a build/ggml/src/libggml-cpu.so* "$BIN"/ 2>/dev/null || true + cp -a build/ggml/src/ggml-hip/libggml-hip.so* "$BIN"/ 2>/dev/null || true + export LD_LIBRARY_PATH="$PWD/$BIN:/opt/rocm/lib:/opt/rocm/lib/llvm/lib:/opt/rocm/lib/rocm_sysdeps/lib:${LD_LIBRARY_PATH:-}" + ROCM_ROOTS="/opt/rocm/lib /opt/rocm/lib/llvm/lib /opt/rocm/lib/rocm_sysdeps/lib" for binary in "$BIN"/*; do From 56119b46c8d726a4404cfbf4da6a8ea7b9bbcbae Mon Sep 17 00:00:00 2001 From: fl0rianr <226492742+fl0rianr@users.noreply.github.com> Date: Fri, 19 Jun 2026 19:25:37 +0200 Subject: [PATCH 53/55] fix(macos): package whisper Metal dylibs with portable rpath --- .github/workflows/build.yml | 87 +++++++++++++++++++++++++++++++++++-- 1 file changed, 84 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3dc709f5634..57b3bd59900 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -694,6 +694,9 @@ jobs: run: | cmake -B build \ -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_MACOSX_RPATH=ON \ + -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ + -DCMAKE_INSTALL_RPATH="@loader_path" \ -DGGML_METAL=ON \ -DWHISPER_BUILD_EXAMPLES=ON \ -DWHISPER_BUILD_TESTS=OFF \ @@ -717,17 +720,95 @@ jobs: ls -lh build/bin/ 2>/dev/null || true exit 1 fi - ls -lh build/bin/whisper-cli + + if [ ! -f build/bin/whisper-server ]; then + echo "::error::whisper-server not found" + ls -lh build/bin/ 2>/dev/null || true + exit 1 + fi + + echo "--- build/bin ---" + ls -lh build/bin/ + + echo "--- macOS dylibs produced by build ---" + find build -name "*.dylib" -print | sort + + echo "--- whisper-server dependencies before packaging ---" + otool -L build/bin/whisper-server - name: Package run: | + set -euo pipefail + VER="${{ needs.determine-tag.outputs.version }}" ARCHIVE="whisper-${VER}-darwin-metal-arm64.tar.gz" STAGE="whisper-${VER}-darwin-metal-arm64" + + rm -rf "$STAGE" "$ARCHIVE" mkdir -p "$STAGE" - cp -r build/bin/* "$STAGE/" 2>/dev/null || true + + cp -R build/bin/* "$STAGE/" 2>/dev/null || true + + # whisper-server depends on libwhisper / ggml dylibs that CMake may + # leave under build/src and build/ggml/src rather than build/bin. + # Package all produced dylibs next to the executables so @loader_path + # can resolve them on downstream machines and GitHub macOS runners. + while IFS= read -r lib; do + cp -P "$lib" "$STAGE/" + done < <(find build -name "*.dylib" -print | sort) + + # Make dylib lookup portable inside the extracted archive. + for target in "$STAGE"/whisper-* "$STAGE"/*.dylib; do + [ -e "$target" ] || continue + + install_name_tool -add_rpath "@loader_path" "$target" 2>/dev/null || true + + if [ "${target##*.}" = "dylib" ] && [ ! -L "$target" ]; then + install_name_tool -id "@rpath/$(basename "$target")" "$target" 2>/dev/null || true + fi + + while IFS= read -r dep; do + case "$dep" in + "$PWD"/build/*|/Users/runner/work/whisper.cpp-rocm/*) + install_name_tool -change "$dep" "@rpath/$(basename "$dep")" "$target" 2>/dev/null || true + ;; + esac + done < <(otool -L "$target" 2>/dev/null | awk 'NR > 1 {print $1}') + done + + echo "--- packaged files ---" + find "$STAGE" -maxdepth 1 -type f -o -type l | sort + + echo "--- whisper-server dependencies after packaging ---" + otool -L "$STAGE/whisper-server" + otool -l "$STAGE/whisper-server" | grep -A2 LC_RPATH || true + + if otool -L "$STAGE/whisper-server" | grep -q "/Users/runner/work/whisper.cpp-rocm"; then + echo "::error::whisper-server still references non-portable build paths" + exit 1 + fi + + if ! find "$STAGE" -maxdepth 1 \( -type f -o -type l \) -name "libwhisper*.dylib" | grep -q .; then + echo "::error::packaged archive is missing libwhisper dylib" + exit 1 + fi + + set +e + DYLD_LIBRARY_PATH="$PWD/$STAGE" "$STAGE/whisper-server" --help > whisper-server-smoke.log 2>&1 + smoke_status=$? + set -e + + cat whisper-server-smoke.log + + if grep -q "Library not loaded" whisper-server-smoke.log; then + echo "::error::whisper-server has unresolved dylib dependencies" + exit 1 + fi + + echo "whisper-server smoke command exited with status ${smoke_status}" + tar -czf "$ARCHIVE" "$STAGE" - echo "ARCHIVE=$ARCHIVE" >> $GITHUB_ENV + echo "ARCHIVE=$ARCHIVE" >> "$GITHUB_ENV" - uses: actions/upload-artifact@v4 with: From d30ec4614411d10f29a306b32b39ab885b69ebed Mon Sep 17 00:00:00 2001 From: Iswarya Alex Date: Fri, 19 Jun 2026 16:23:21 -0700 Subject: [PATCH 54/55] Update release process --- .github/workflows/build.yml | 53 +++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 57b3bd59900..5c4710b1663 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1267,33 +1267,17 @@ jobs: echo "Release assets:" ls -lh release/ - - name: Generate release notes - id: notes + - name: Delete existing release/tag if present + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | TAG="${{ needs.determine-tag.outputs.tag_name }}" - ROCM_VER="${{ env.ROCM_VERSION }}" - - cat > /tmp/release-notes.md <> $GITHUB_OUTPUT + RELEASE_ID=$(gh api repos/${{ github.repository }}/releases/tags/"$TAG" --jq '.id' 2>/dev/null || true) + if [ -n "$RELEASE_ID" ]; then + echo "Deleting existing release $RELEASE_ID for tag $TAG" + gh api -X DELETE repos/${{ github.repository }}/releases/"$RELEASE_ID" + fi + git push --delete origin "refs/tags/$TAG" 2>/dev/null || true - name: Create release id: create_release @@ -1304,8 +1288,25 @@ jobs: tag_name: ${{ needs.determine-tag.outputs.tag_name }} release_name: "whisper.cpp ${{ needs.determine-tag.outputs.tag_name }} - AMD Builds" prerelease: ${{ github.event.inputs.pre_release_tag != '' }} - body_path: ${{ steps.notes.outputs.notes_file }} draft: false + body: | + ## AMD whisper.cpp ${{ needs.determine-tag.outputs.tag_name }} + + AMD-based pre-built binaries of [whisper.cpp ${{ needs.determine-tag.outputs.tag_name }}](https://github.com/ggerganov/whisper.cpp/releases/tag/${{ needs.determine-tag.outputs.tag_name }}) with full hardware acceleration across ROCm GPU (iGPU and dGPU), NPU (RyzenAI), and CPU — for Linux and Windows. + All ROCm runtime libraries (ROCm ${{ env.ROCM_VERSION }}) are bundled. No drivers or separate installs required — download, extract, and run. + + ### Packages + + | Target | Linux | Windows | + |---|---|---| + | ROCm gfx1151 (Ryzen AI MAX+ Pro 395) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-rocm-gfx1151.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-rocm-gfx1151.zip` | + | ROCm gfx1150 (Ryzen AI 300) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-rocm-gfx1150.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-rocm-gfx1150.zip` | + | ROCm gfx120X (RDNA4 dGPU) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-rocm-gfx120X.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-rocm-gfx120X.zip` | + | ROCm gfx110X (RDNA3 dGPU & iGPU) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-rocm-gfx110X.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-rocm-gfx110X.zip` | + | Vulkan (cross-vendor) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-vulkan-x86_64.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-vulkan-x64.zip` | + | NPU (RyzenAI) | — | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-npu-x64.zip` | + | Metal (Apple Silicon) | `whisper-${{ needs.determine-tag.outputs.tag_name }}-darwin-metal-arm64.tar.gz` | — | + | CPU only | `whisper-${{ needs.determine-tag.outputs.tag_name }}-linux-cpu-x86_64.tar.gz` | `whisper-${{ needs.determine-tag.outputs.tag_name }}-windows-cpu-x64.zip` | - name: Upload release assets uses: actions/github-script@v7 From dc298b819f14d404dd1de28479814ad84ed77f04 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 20 Jun 2026 18:32:34 +0000 Subject: [PATCH 55/55] docs: update download links to v1.8.4 --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 46d7a67a09e..ef7dd801b24 100644 --- a/README.md +++ b/README.md @@ -72,22 +72,22 @@ All builds are self-contained — no separate driver or runtime installation nee | GPU Target | Linux | Windows | |---|---|---| -| **gfx1151** (Ryzen AI MAX+ Pro 395) | [![Linux gfx1151](https://img.shields.io/badge/Download-Linux%20gfx1151-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-linux-rocm-gfx1151.tar.gz) | [![Windows gfx1151](https://img.shields.io/badge/Download-Windows%20gfx1151-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-windows-rocm-gfx1151.zip) | -| **gfx1150** (Ryzen AI 300) | [![Linux gfx1150](https://img.shields.io/badge/Download-Linux%20gfx1150-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-linux-rocm-gfx1150.tar.gz) | [![Windows gfx1150](https://img.shields.io/badge/Download-Windows%20gfx1150-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-windows-rocm-gfx1150.zip) | -| **gfx120X** (RDNA4 dGPU) | [![Linux gfx120X](https://img.shields.io/badge/Download-Linux%20gfx120X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-linux-rocm-gfx120X.tar.gz) | [![Windows gfx120X](https://img.shields.io/badge/Download-Windows%20gfx120X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-windows-rocm-gfx120X.zip) | -| **gfx110X** (RDNA3 dGPU & iGPU) | [![Linux gfx110X](https://img.shields.io/badge/Download-Linux%20gfx110X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-linux-rocm-gfx110X.tar.gz) | [![Windows gfx110X](https://img.shields.io/badge/Download-Windows%20gfx110X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-windows-rocm-gfx110X.zip) | +| **gfx1151** (Ryzen AI MAX+ Pro 395) | [![Linux gfx1151](https://img.shields.io/badge/Download-Linux%20gfx1151-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-rocm-gfx1151.tar.gz) | [![Windows gfx1151](https://img.shields.io/badge/Download-Windows%20gfx1151-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-rocm-gfx1151.zip) | +| **gfx1150** (Ryzen AI 300) | [![Linux gfx1150](https://img.shields.io/badge/Download-Linux%20gfx1150-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-rocm-gfx1150.tar.gz) | [![Windows gfx1150](https://img.shields.io/badge/Download-Windows%20gfx1150-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-rocm-gfx1150.zip) | +| **gfx120X** (RDNA4 dGPU) | [![Linux gfx120X](https://img.shields.io/badge/Download-Linux%20gfx120X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-rocm-gfx120X.tar.gz) | [![Windows gfx120X](https://img.shields.io/badge/Download-Windows%20gfx120X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-rocm-gfx120X.zip) | +| **gfx110X** (RDNA3 dGPU & iGPU) | [![Linux gfx110X](https://img.shields.io/badge/Download-Linux%20gfx110X-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-rocm-gfx110X.tar.gz) | [![Windows gfx110X](https://img.shields.io/badge/Download-Windows%20gfx110X-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-rocm-gfx110X.zip) | ### Vulkan — Cross-Vendor GPU | Linux | Windows | |---|---| -| [![Linux Vulkan](https://img.shields.io/badge/Download-Linux%20Vulkan-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-linux-vulkan-x86_64.tar.gz) | [![Windows Vulkan](https://img.shields.io/badge/Download-Windows%20Vulkan-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-windows-vulkan-x64.zip) | +| [![Linux Vulkan](https://img.shields.io/badge/Download-Linux%20Vulkan-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-vulkan-x86_64.tar.gz) | [![Windows Vulkan](https://img.shields.io/badge/Download-Windows%20Vulkan-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-vulkan-x64.zip) | ### NPU — RyzenAI (Windows only) | Windows | |---| -| [![Windows NPU](https://img.shields.io/badge/Download-Windows%20NPU%20(RyzenAI)-red?logo=amd&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-windows-npu-x64.zip) | +| [![Windows NPU](https://img.shields.io/badge/Download-Windows%20NPU%20(RyzenAI)-red?logo=amd&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-npu-x64.zip) | > Requires NPU driver ≥ `.280` and a pre-compiled `.rai` encoder model from [AMD's Hugging Face collection](https://huggingface.co/collections/amd/ryzen-ai-16-whisper-npu-optimized-onnx-models). Place the `.rai` file alongside your `ggml-*.bin` model — whisper-cli picks it up automatically. @@ -95,13 +95,13 @@ All builds are self-contained — no separate driver or runtime installation nee | macOS (Apple Silicon) | |---| -| [![macOS Metal](https://img.shields.io/badge/Download-macOS%20Metal%20(arm64)-lightgrey?logo=apple&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-darwin-metal-arm64.tar.gz) | +| [![macOS Metal](https://img.shields.io/badge/Download-macOS%20Metal%20(arm64)-lightgrey?logo=apple&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-darwin-metal-arm64.tar.gz) | ### CPU — No GPU Required | Linux | Windows | |---|---| -| [![Linux CPU](https://img.shields.io/badge/Download-Linux%20CPU-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-linux-cpu-x86_64.tar.gz) | [![Windows CPU](https://img.shields.io/badge/Download-Windows%20CPU-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.3/whisper-v1.8.3-release-windows-cpu-x64.zip) | +| [![Linux CPU](https://img.shields.io/badge/Download-Linux%20CPU-blue?logo=linux&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-linux-cpu-x86_64.tar.gz) | [![Windows CPU](https://img.shields.io/badge/Download-Windows%20CPU-green?logo=windows&logoColor=white)](https://github.com/lemonade-sdk/whisper.cpp-rocm/releases/download/v1.8.4/whisper-v1.8.4-release-windows-cpu-x64.zip) | ---