Release (Prism) #4
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Release (Prism) | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| create_release: | |
| description: 'Create new release' | |
| required: true | |
| type: boolean | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} | |
| cancel-in-progress: true | |
| env: | |
| BRANCH_NAME: ${{ github.head_ref || github.ref_name }} | |
| CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON" | |
| jobs: | |
| macOS-arm64: | |
| runs-on: macos-14 | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 0 | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: macOS-latest-cmake-arm64 | |
| evict-old-files: 1d | |
| - name: Build | |
| run: | | |
| cmake -B build \ | |
| -DCMAKE_INSTALL_RPATH='@loader_path' \ | |
| -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ | |
| -DLLAMA_FATAL_WARNINGS=ON \ | |
| -DGGML_METAL_USE_BF16=ON \ | |
| -DGGML_METAL_EMBED_LIBRARY=ON \ | |
| -DGGML_RPC=ON \ | |
| ${{ env.CMAKE_ARGS }} | |
| cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) | |
| - name: Determine tag name | |
| id: tag | |
| uses: ./.github/actions/get-tag-name | |
| - name: Pack artifacts | |
| run: | | |
| cp LICENSE ./build/bin/ | |
| tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz -s ",./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin . | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz | |
| name: llama-bin-macos-arm64.tar.gz | |
| linux-cuda: | |
| runs-on: ubuntu-22.04 | |
| strategy: | |
| matrix: | |
| include: | |
| - cuda: '12.4' | |
| cuda_pkg: '12-4' | |
| - cuda: '12.8' | |
| cuda_pkg: '12-8' | |
| - cuda: '13.1' | |
| cuda_pkg: '13-1' | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 0 | |
| - name: ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: ubuntu-22-cmake-cuda-${{ matrix.cuda }} | |
| evict-old-files: 1d | |
| - name: Install CUDA toolkit | |
| run: | | |
| wget -q https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb | |
| sudo dpkg -i cuda-keyring_1.1-1_all.deb | |
| sudo apt-get update | |
| sudo apt-get -y install cuda-toolkit-${{ matrix.cuda_pkg }} | |
| echo "/usr/local/cuda-${{ matrix.cuda }}/bin" >> $GITHUB_PATH | |
| echo "CUDA_PATH=/usr/local/cuda-${{ matrix.cuda }}" >> $GITHUB_ENV | |
| echo "LD_LIBRARY_PATH=/usr/local/cuda-${{ matrix.cuda }}/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV | |
| - name: Build | |
| run: | | |
| cmake -B build \ | |
| -DCMAKE_INSTALL_RPATH='$ORIGIN' \ | |
| -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ | |
| -DGGML_NATIVE=OFF \ | |
| -DGGML_CUDA=ON \ | |
| ${{ env.CMAKE_ARGS }} | |
| cmake --build build --config Release -j $(nproc) 2>&1 | grep -v "^nvcc warning" | |
| - name: Determine tag name | |
| id: tag | |
| uses: ./.github/actions/get-tag-name | |
| - name: Pack artifacts | |
| run: | | |
| cp LICENSE ./build/bin/ | |
| tar -czvf llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-${{ matrix.cuda }}-x64.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin . | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-${{ matrix.cuda }}-x64.tar.gz | |
| name: llama-bin-linux-cuda-${{ matrix.cuda }}-x64.tar.gz | |
| windows-cuda: | |
| runs-on: windows-2022 | |
| strategy: | |
| matrix: | |
| cuda: ['12.4', '13.1'] | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v6 | |
| - name: Install ccache | |
| uses: ggml-org/ccache-action@v1.2.16 | |
| with: | |
| key: windows-cuda-${{ matrix.cuda }} | |
| variant: ccache | |
| evict-old-files: 1d | |
| - name: Install Cuda Toolkit | |
| uses: ./.github/actions/windows-setup-cuda | |
| with: | |
| cuda_version: ${{ matrix.cuda }} | |
| - name: Install Ninja | |
| run: choco install ninja | |
| - name: Build | |
| shell: cmd | |
| run: | | |
| call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 | |
| cmake -S . -B build -G "Ninja Multi-Config" ^ | |
| -DGGML_NATIVE=OFF ^ | |
| -DGGML_CUDA=ON ^ | |
| -DLLAMA_BUILD_BORINGSSL=ON ^ | |
| -DCMAKE_CUDA_FLAGS="-diag-suppress=221" ^ | |
| ${{ env.CMAKE_ARGS }} | |
| set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 | |
| cmake --build build --config Release -j %NINJA_JOBS% | |
| - name: Determine tag name | |
| id: tag | |
| uses: ./.github/actions/get-tag-name | |
| - name: Pack artifacts | |
| run: | | |
| 7z a -snl llama-${{ steps.tag.outputs.name }}-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\* | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| path: llama-${{ steps.tag.outputs.name }}-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
| name: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
| - name: Copy and pack Cuda runtime | |
| run: | | |
| echo "Cuda install location: ${{ env.CUDA_PATH }}" | |
| $dst='.\build\bin\cudart\' | |
| robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll | |
| robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll | |
| robocopy "${{env.CUDA_PATH}}\bin\x64" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll | |
| 7z a cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip $dst\* | |
| - name: Upload Cuda runtime | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| path: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
| name: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip | |
| release: | |
| if: ${{ github.event.inputs.create_release == 'true' }} | |
| permissions: | |
| contents: write | |
| runs-on: ubuntu-latest | |
| needs: | |
| - macOS-arm64 | |
| - linux-cuda | |
| - windows-cuda | |
| steps: | |
| - name: Clone | |
| uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 0 | |
| - name: Determine tag name | |
| id: tag | |
| uses: ./.github/actions/get-tag-name | |
| - name: Download artifacts | |
| uses: actions/download-artifact@v7 | |
| with: | |
| path: ./artifact | |
| merge-multiple: true | |
| - name: Move artifacts | |
| run: | | |
| mkdir -p release | |
| mv -v artifact/*.tar.gz release/ 2>/dev/null || true | |
| mv -v artifact/*.zip release/ 2>/dev/null || true | |
| ls -lh release/ | |
| - name: Create release | |
| id: create_release | |
| uses: ggml-org/action-create-release@v1 | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| with: | |
| tag_name: ${{ steps.tag.outputs.name }} | |
| body: | | |
| Pre-built binaries (PrismML fork with Q1_0 1-bit quantization support). | |
| **macOS:** | |
| - [macOS Apple Silicon (arm64)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz) | |
| **Linux:** | |
| - [Linux x64 (CUDA 12.4)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-12.4-x64.tar.gz) | |
| - [Linux x64 (CUDA 12.8)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-12.8-x64.tar.gz) | |
| - [Linux x64 (CUDA 13.1)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-linux-cuda-13.1-x64.tar.gz) | |
| **Windows:** | |
| - [Windows x64 (CUDA 12.4)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-12.4-x64.zip) - [CUDA 12.4 DLLs](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-12.4-x64.zip) | |
| - [Windows x64 (CUDA 13.1)](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-13.1-x64.zip) - [CUDA 13.1 DLLs](https://github.com/${{ github.repository }}/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-13.1-x64.zip) | |
| - name: Upload release | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| for file in release/*; do | |
| echo "Uploading $(basename $file)..." | |
| gh release upload ${{ steps.tag.outputs.name }} "$file" --clobber | |
| done |