Release (Prism) #8
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Release (Prism) | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| create_release: | |
| description: 'Create new release' | |
| required: true | |
| type: boolean | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} | |
| cancel-in-progress: true | |
| env: | |
| BRANCH_NAME: ${{ github.head_ref || github.ref_name }} | |
| CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON" | |
| jobs: | |
| macOS-arm64: | |
| runs-on: macos-14 | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 0 | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: macOS-latest-cmake-arm64 | |
| evict-old-files: 1d | |
| - name: Build | |
| run: | | |
| cmake -B build \ | |
| -DCMAKE_INSTALL_RPATH='@loader_path' \ | |
| -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ | |
| -DLLAMA_FATAL_WARNINGS=ON \ | |
| -DGGML_METAL_USE_BF16=ON \ | |
| -DGGML_METAL_EMBED_LIBRARY=ON \ | |
| -DGGML_RPC=ON \ | |
| ${{ env.CMAKE_ARGS }} | |
| cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) | |
| - name: Determine tag name | |
| id: tag | |
| uses: ./.github/actions/get-tag-name | |
| - name: Pack artifacts | |
| run: | | |
| cp LICENSE ./build/bin/ | |
| tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz -s ",./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin . | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz | |
| name: llama-bin-macos-arm64.tar.gz | |
| linux-cuda: | |
| runs-on: ubuntu-22.04 | |
| strategy: | |
| matrix: | |
| include: | |
| - cuda: '12.4' | |
| cuda_pkg: '12-4' | |
| - cuda: '12.8' | |
| cuda_pkg: '12-8' | |
| - cuda: '13.1' | |
| cuda_pkg: '13-1' | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 0 | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: ubuntu-22-cmake-cuda-${{ matrix.cuda }} | |
| evict-old-files: 1d | |
| - name: Install CUDA toolkit | |
| run: | | |
| wget -q https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb | |
| sudo dpkg -i cuda-keyring_1.1-1_all.deb | |
| sudo apt-get update | |
| sudo apt-get -y install cuda-toolkit-${{ matrix.cuda_pkg }} | |
| echo "/usr/local/cuda-${{ matrix.cuda }}/bin" >> $GITHUB_PATH | |
| echo "CUDA_PATH=/usr/local/cuda-${{ matrix.cuda }}" >> $GITHUB_ENV | |
| echo "LD_LIBRARY_PATH=/usr/local/cuda-${{ matrix.cuda }}/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV | |
| - name: Build | |
| run: | | |
| cmake -B build \ | |
| -DCMAKE_INSTALL_RPATH='$ORIGIN' \ | |
| -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ | |
| -DGGML_NATIVE=OFF \ | |
| -DGGML_CUDA=ON \ | |
| ${{ env.CMAKE_ARGS }} | |
| cmake --build build --config Release -j $(nproc) 2>&1 | grep -v "^nvcc warning" | |
| - name: Determine tag name | |
| id: tag | |
| uses: ./.github/actions/get-tag-name | |
| - name: Pack artifacts | |
| run: | | |
| cp LICENSE ./build/bin/ | |
| tar -czvf llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-${{ matrix.cuda }}-x64.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin . | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-${{ matrix.cuda }}-x64.tar.gz | |
| name: llama-bin-linux-cuda-${{ matrix.cuda }}-x64.tar.gz | |
| windows-cuda: | |
| runs-on: windows-2022 | |
| strategy: | |
| matrix: | |
| cuda: ['12.4', '13.1'] | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v6 | |
| - name: Install ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: windows-cuda-${{ matrix.cuda }} | |
| variant: ccache | |
| evict-old-files: 1d | |
| - name: Install Cuda Toolkit | |
| uses: ./.github/actions/windows-setup-cuda | |
| with: | |
| cuda_version: ${{ matrix.cuda }} | |
| - name: Install Ninja | |
| run: choco install ninja | |
| - name: Build | |
| shell: cmd | |
| run: | | |
| call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 | |
| cmake -S . -B build -G "Ninja Multi-Config" ^ | |
| -DGGML_NATIVE=OFF ^ | |
| -DGGML_CUDA=ON ^ | |
| -DLLAMA_BUILD_BORINGSSL=ON ^ | |
| -DCMAKE_CUDA_FLAGS="-diag-suppress=221" ^ | |
| ${{ env.CMAKE_ARGS }} | |
| set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 | |
| cmake --build build --config Release -j %NINJA_JOBS% | |
| - name: Determine tag name | |
| id: tag | |
| uses: ./.github/actions/get-tag-name | |
| - name: Pack artifacts | |
| run: | | |
| 7z a -snl llama-${{ steps.tag.outputs.name }}-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\* | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
| name: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
| - name: Copy and pack Cuda runtime | |
| run: | | |
| echo "Cuda install location: ${{ env.CUDA_PATH }}" | |
| $dst='.\build\bin\cudart\' | |
| robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll | |
| robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll | |
| robocopy "${{env.CUDA_PATH}}\bin\x64" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll | |
| 7z a cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip $dst\* | |
| - name: Upload Cuda runtime | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| path: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
| name: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
| ubuntu-22-rocm: | |
| runs-on: ubuntu-22.04 | |
| strategy: | |
| matrix: | |
| include: | |
| - ROCM_VERSION: "7.2" | |
| gpu_targets: "gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1151;gfx1150;gfx1200;gfx1201" | |
| build: 'x64' | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 0 | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: ubuntu-rocm-cmake-${{ matrix.ROCM_VERSION }}-${{ matrix.build }} | |
| evict-old-files: 1d | |
| - name: Dependencies | |
| run: | | |
| sudo apt install -y build-essential git cmake wget | |
| - name: Setup Legacy ROCm | |
| if: matrix.ROCM_VERSION == '7.2' | |
| run: | | |
| sudo mkdir --parents --mode=0755 /etc/apt/keyrings | |
| wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \ | |
| gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null | |
| sudo tee /etc/apt/sources.list.d/rocm.list << EOF | |
| deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ matrix.ROCM_VERSION }} jammy main | |
| EOF | |
| sudo tee /etc/apt/preferences.d/rocm-pin-600 << EOF | |
| Package: * | |
| Pin: release o=repo.radeon.com | |
| Pin-Priority: 600 | |
| EOF | |
| sudo apt update | |
| sudo apt-get install -y libssl-dev rocm-hip-sdk | |
| - name: Setup TheRock | |
| if: matrix.ROCM_VERSION != '7.2' | |
| run: | | |
| wget https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx1151-${{ matrix.ROCM_VERSION }}.tar.gz | |
| mkdir install | |
| tar -xf *.tar.gz -C install | |
| export ROCM_PATH=$(pwd)/install | |
| echo ROCM_PATH=$ROCM_PATH >> $GITHUB_ENV | |
| echo PATH=$PATH:$ROCM_PATH/bin >> $GITHUB_ENV | |
| echo LD_LIBRARY_PATH=$ROCM_PATH/lib:$ROCM_PATH/llvm/lib:$ROCM_PATH/lib/rocprofiler-systems >> $GITHUB_ENV | |
| - name: Build with native CMake HIP support | |
| run: | | |
| cmake -B build -S . \ | |
| -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \ | |
| -DCMAKE_HIP_FLAGS="-mllvm --amdgpu-unroll-threshold-local=600" \ | |
| -DCMAKE_BUILD_TYPE=Release \ | |
| -DGGML_BACKEND_DL=ON \ | |
| -DGGML_NATIVE=OFF \ | |
| -DCMAKE_INSTALL_RPATH='$ORIGIN' \ | |
| -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ | |
| -DGGML_CPU_ALL_VARIANTS=ON \ | |
| -DGPU_TARGETS="${{ matrix.gpu_targets }}" \ | |
| -DGGML_HIP=ON \ | |
| -DHIP_PLATFORM=amd \ | |
| -DGGML_HIP_ROCWMMA_FATTN=ON \ | |
| ${{ env.CMAKE_ARGS }} | |
| cmake --build build --config Release -j $(nproc) | |
| - name: Determine tag name | |
| id: tag | |
| uses: ./.github/actions/get-tag-name | |
| - name: Pack artifacts | |
| run: | | |
| cp LICENSE ./build/bin/ | |
| tar -czvf llama-${{ steps.tag.outputs.name }}-bin-linux-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin . | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-bin-linux-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}.tar.gz | |
| name: llama-bin-linux-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}.tar.gz | |
| windows-hip: | |
| runs-on: windows-2022 | |
| env: | |
| HIPSDK_INSTALLER_VERSION: "26.Q1" | |
| strategy: | |
| matrix: | |
| include: | |
| - name: "radeon" | |
| gpu_targets: "gfx1150;gfx1151;gfx1200;gfx1201;gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032" | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v6 | |
| - name: Grab rocWMMA package | |
| run: | | |
| curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/7.2/pool/main/r/rocwmma-dev/rocwmma-dev_2.2.0.70200-43~24.04_amd64.deb" | |
| 7z x rocwmma.deb | |
| 7z x data.tar | |
| - name: Cache ROCm Installation | |
| id: cache-rocm | |
| uses: actions/cache@v5 | |
| with: | |
| path: C:\Program Files\AMD\ROCm | |
| key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }} | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: windows-latest-cmake-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64 | |
| evict-old-files: 1d | |
| - name: Install ROCm | |
| if: steps.cache-rocm.outputs.cache-hit != 'true' | |
| run: | | |
| $ErrorActionPreference = "Stop" | |
| write-host "Downloading AMD HIP SDK Installer" | |
| Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ env.HIPSDK_INSTALLER_VERSION }}-Win11-For-HIP.exe" -OutFile "${{env.RUNNER_TEMP}}\rocm-install.exe" | |
| write-host "Installing AMD HIP SDK" | |
| $proc = Start-Process "${{env.RUNNER_TEMP}}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru | |
| $completed = $proc.WaitForExit(600000) | |
| if (-not $completed) { | |
| Write-Error "ROCm installation timed out after 10 minutes. Killing the process" | |
| $proc.Kill() | |
| exit 1 | |
| } | |
| if ($proc.ExitCode -ne 0) { | |
| Write-Error "ROCm installation failed with exit code $($proc.ExitCode)" | |
| exit 1 | |
| } | |
| write-host "Completed AMD HIP SDK installation" | |
| - name: Verify ROCm | |
| run: | | |
| $clangPath = Get-ChildItem 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Select-Object -First 1 | |
| if (-not $clangPath) { | |
| Write-Error "ROCm installation not found" | |
| exit 1 | |
| } | |
| & $clangPath.FullName --version | |
| - name: Build | |
| run: | | |
| $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) | |
| $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" | |
| cmake -G "Unix Makefiles" -B build -S . ` | |
| -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" ` | |
| -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` | |
| -DCMAKE_CXX_FLAGS="-I$(($PWD.Path.Replace('\', '/')))/opt/rocm-7.2.0/include/ -Wno-ignored-attributes -Wno-nested-anonymous-types" ` | |
| -DCMAKE_BUILD_TYPE=Release ` | |
| -DGGML_BACKEND_DL=ON ` | |
| -DGGML_NATIVE=OFF ` | |
| -DGGML_CPU=OFF ` | |
| -DGPU_TARGETS="${{ matrix.gpu_targets }}" ` | |
| -DGGML_HIP_ROCWMMA_FATTN=ON ` | |
| -DGGML_HIP=ON ` | |
| -DLLAMA_BUILD_BORINGSSL=ON | |
| $env:NINJA_JOBS=$env:NUMBER_OF_PROCESSORS-1 | |
| cmake --build build --target ggml-hip -j $env:NINJA_JOBS --config Release | |
| - name: Pack artifacts | |
| run: | | |
| 7z a -snl llama-bin-win-hip-${{ matrix.name }}-x64.zip .\build\bin\ggml-hip.dll | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| path: llama-bin-win-hip-${{ matrix.name }}-x64.zip | |
| name: llama-bin-win-hip-${{ matrix.name }}-x64.zip | |
| release: | |
| if: ${{ github.event.inputs.create_release == 'true' }} | |
| permissions: | |
| contents: write | |
| runs-on: ubuntu-latest | |
| needs: | |
| - macOS-arm64 | |
| - linux-cuda | |
| - windows-cuda | |
| - ubuntu-22-rocm | |
| - windows-hip | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 0 | |
| - name: Determine tag name | |
| id: tag | |
| uses: ./.github/actions/get-tag-name | |
| - name: Download artifacts | |
| uses: actions/download-artifact@v7 | |
| with: | |
| path: ./artifact | |
| merge-multiple: true | |
| - name: Move artifacts | |
| run: | | |
| mkdir -p release | |
| mv -v artifact/*.tar.gz release/ 2>/dev/null || true | |
| mv -v artifact/*.zip release/ 2>/dev/null || true | |
| ls -lh release/ | |
| - name: Create release | |
| id: create_release | |
| uses: ggml-org/action-create-release@v1 | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| with: | |
| tag_name: ${{ steps.tag.outputs.name }} | |
| body: | | |
| Pre-built binaries (PrismML fork with Q1_0 1-bit quantization support). | |
| **macOS:** | |
| - [macOS Apple Silicon (arm64)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz) | |
| **Linux:** | |
| - [Linux x64 (CUDA 12.4)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-12.4-x64.tar.gz) | |
| - [Linux x64 (CUDA 12.8)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-12.8-x64.tar.gz) | |
| - [Linux x64 (CUDA 13.1)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-13.1-x64.tar.gz) | |
| **Linux (AMD):** | |
| - [Linux x64 (ROCm 7.2)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-linux-rocm-7.2-x64.tar.gz) | |
| **Windows:** | |
| - [Windows x64 (CUDA 12.4)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-12.4-x64.zip) - [CUDA 12.4 DLLs](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-12.4-x64.zip) | |
| - [Windows x64 (CUDA 13.1)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-13.1-x64.zip) - [CUDA 13.1 DLLs](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-13.1-x64.zip) | |
| - [Windows x64 (HIP/ROCm)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-bin-win-hip-radeon-x64.zip) | |
| - name: Upload release | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| for file in release/*; do | |
| echo "Uploading $(basename $file)..." | |
| gh release upload ${{ steps.tag.outputs.name }} "$file" --clobber | |
| done |