|
| 1 | +name: Build & Release TurboQuant (macOS ARM64) |
| 2 | + |
| 3 | +on: |
| 4 | + workflow_dispatch: |
| 5 | + push: |
| 6 | + branches: |
| 7 | + - feature/turboquant-kv-cache |
| 8 | + paths: |
| 9 | + - '.github/workflows/build-turboquant-macos.yml' |
| 10 | + - '**/CMakeLists.txt' |
| 11 | + - '**/*.h' |
| 12 | + - '**/*.hpp' |
| 13 | + - '**/*.c' |
| 14 | + - '**/*.cpp' |
| 15 | + - '**/*.metal' |
| 16 | + |
| 17 | +concurrency: |
| 18 | + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} |
| 19 | + cancel-in-progress: true |
| 20 | + |
| 21 | +env: |
| 22 | + LLAMA_LOG_COLORS: 1 |
| 23 | + LLAMA_LOG_PREFIX: 1 |
| 24 | + LLAMA_LOG_TIMESTAMPS: 1 |
| 25 | + |
| 26 | +jobs: |
| 27 | + macOS-arm64-metal: |
| 28 | + runs-on: macos-latest |
| 29 | + permissions: |
| 30 | + contents: write |
| 31 | + |
| 32 | + steps: |
| 33 | + - name: Clone |
| 34 | + uses: actions/checkout@v6 |
| 35 | + |
| 36 | + - name: Set version tag |
| 37 | + id: version |
| 38 | + run: | |
| 39 | + SHORT_SHA=$(git rev-parse --short HEAD) |
| 40 | + TAG="turboquant-macos-arm64-${SHORT_SHA}" |
| 41 | + echo "tag=${TAG}" >> "$GITHUB_OUTPUT" |
| 42 | + echo "short_sha=${SHORT_SHA}" >> "$GITHUB_OUTPUT" |
| 43 | +
|
| 44 | + - name: Import code signing certificate |
| 45 | + env: |
| 46 | + MACOS_CERTIFICATE_P12: ${{ secrets.MACOS_CERTIFICATE_P12 }} |
| 47 | + MACOS_CERTIFICATE_PASSWORD: ${{ secrets.MACOS_CERTIFICATE_PASSWORD }} |
| 48 | + KEYCHAIN_PASSWORD: ${{ secrets.KEYCHAIN_PASSWORD }} |
| 49 | + run: | |
| 50 | + CERTIFICATE_PATH=$RUNNER_TEMP/build_certificate.p12 |
| 51 | + KEYCHAIN_PATH=$RUNNER_TEMP/app-signing.keychain-db |
| 52 | +
|
| 53 | + echo -n "$MACOS_CERTIFICATE_P12" | base64 --decode -o $CERTIFICATE_PATH |
| 54 | +
|
| 55 | + security create-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH |
| 56 | + security set-keychain-settings -lut 21600 $KEYCHAIN_PATH |
| 57 | + security unlock-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH |
| 58 | +
|
| 59 | + security import $CERTIFICATE_PATH -P "$MACOS_CERTIFICATE_PASSWORD" \ |
| 60 | + -A -t cert -f pkcs12 -k $KEYCHAIN_PATH |
| 61 | + security set-key-partition-list -S apple-tool:,apple: \ |
| 62 | + -k "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH |
| 63 | + security list-keychain -d user -s $KEYCHAIN_PATH |
| 64 | +
|
| 65 | + echo "KEYCHAIN_PATH=$KEYCHAIN_PATH" >> "$GITHUB_ENV" |
| 66 | +
|
| 67 | + IDENTITY=$(security find-identity -v -p codesigning $KEYCHAIN_PATH | head -1 | grep -o '".*"' | tr -d '"') |
| 68 | + echo "CODESIGN_IDENTITY=$IDENTITY" >> "$GITHUB_ENV" |
| 69 | + echo "Signing identity: $IDENTITY" |
| 70 | +
|
| 71 | + - name: Build |
| 72 | + id: cmake_build |
| 73 | + run: | |
| 74 | + sysctl -a |
| 75 | + cmake -B build \ |
| 76 | + -DCMAKE_BUILD_TYPE=Release \ |
| 77 | + -DBUILD_SHARED_LIBS=OFF \ |
| 78 | + -DLLAMA_CURL=OFF \ |
| 79 | + -DLLAMA_OPENSSL=OFF \ |
| 80 | + -DGGML_METAL=ON \ |
| 81 | + -DGGML_METAL_USE_BF16=ON \ |
| 82 | + -DGGML_METAL_EMBED_LIBRARY=ON \ |
| 83 | + -DLLAMA_BUILD_SERVER=ON \ |
| 84 | + -DLLAMA_BUILD_TOOLS=ON \ |
| 85 | + -DLLAMA_BUILD_TESTS=OFF \ |
| 86 | + -DLLAMA_BUILD_EXAMPLES=OFF |
| 87 | + cmake --build build --config Release -j $(sysctl -n hw.ncpu) |
| 88 | +
|
| 89 | + - name: Verify turbo3 support |
| 90 | + run: | |
| 91 | + ./build/bin/llama-server --help 2>&1 | grep -A2 "cache-type-k" || true |
| 92 | + echo "---" |
| 93 | + file ./build/bin/llama-server |
| 94 | + ./build/bin/llama-server --version 2>&1 || true |
| 95 | +
|
| 96 | + - name: Verify static binary |
| 97 | + run: | |
| 98 | + echo "=== All dynamic dependencies (should be system-only) ===" |
| 99 | + otool -L build/bin/llama-server |
| 100 | + echo "---" |
| 101 | + echo "=== Checking for non-system dylibs (should be empty) ===" |
| 102 | + if otool -L build/bin/llama-server | grep -vE '/usr/lib|/System|llama-server' | grep '\.dylib'; then |
| 103 | + echo "ERROR: Found non-system dynamic dependency!" |
| 104 | + exit 1 |
| 105 | + fi |
| 106 | + echo "OK: Only system dylibs — fully self-contained binary" |
| 107 | + echo "---" |
| 108 | + echo "=== Binary size ===" |
| 109 | + ls -lh build/bin/llama-server |
| 110 | +
|
| 111 | + - name: Sign binaries |
| 112 | + run: | |
| 113 | + for bin in build/bin/llama-server build/bin/llama-cli build/bin/llama-bench build/bin/llama-perplexity; do |
| 114 | + if [ -f "$bin" ]; then |
| 115 | + echo "Signing $bin ..." |
| 116 | + codesign --force --options runtime --timestamp \ |
| 117 | + --entitlements .github/entitlements.plist \ |
| 118 | + --sign "$CODESIGN_IDENTITY" "$bin" |
| 119 | + codesign --verify --verbose "$bin" |
| 120 | + fi |
| 121 | + done |
| 122 | + for lib in $(find build -name "*.dylib" 2>/dev/null); do |
| 123 | + echo "Signing dylib $lib ..." |
| 124 | + codesign --force --options runtime --timestamp \ |
| 125 | + --sign "$CODESIGN_IDENTITY" "$lib" |
| 126 | + done |
| 127 | +
|
| 128 | + - name: Prepare release archive |
| 129 | + run: | |
| 130 | + mkdir -p release/build/bin |
| 131 | + cp build/bin/llama-server release/build/bin/ |
| 132 | + cp build/bin/llama-cli release/build/bin/ 2>/dev/null || true |
| 133 | + cp build/bin/llama-bench release/build/bin/ 2>/dev/null || true |
| 134 | + cp build/bin/llama-perplexity release/build/bin/ 2>/dev/null || true |
| 135 | + find build -name "*.dylib" -exec cp {} release/build/bin/ \; 2>/dev/null || true |
| 136 | + find build -name "*.metal" -path "*/bin/*" -exec cp {} release/build/bin/ \; 2>/dev/null || true |
| 137 | + cd release |
| 138 | + zip -r ../llama-turboquant-macos-arm64.zip . |
| 139 | + tar -czf ../llama-turboquant-macos-arm64.tar.gz . |
| 140 | + cd .. |
| 141 | + ls -lh llama-turboquant-macos-arm64.zip llama-turboquant-macos-arm64.tar.gz |
| 142 | +
|
| 143 | + - name: Notarize release archive |
| 144 | + env: |
| 145 | + APPLE_ID: ${{ secrets.APPLE_ID }} |
| 146 | + APPLE_ID_PASSWORD: ${{ secrets.APPLE_ID_PASSWORD }} |
| 147 | + APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }} |
| 148 | + run: | |
| 149 | + echo "=== Notarizing zip archive ===" |
| 150 | + xcrun notarytool submit llama-turboquant-macos-arm64.zip \ |
| 151 | + --apple-id "$APPLE_ID" \ |
| 152 | + --password "$APPLE_ID_PASSWORD" \ |
| 153 | + --team-id "$APPLE_TEAM_ID" \ |
| 154 | + --wait --timeout 10m |
| 155 | + echo "=== Notarizing individual binaries (for tar.gz users) ===" |
| 156 | + for bin in build/bin/llama-server build/bin/llama-cli build/bin/llama-bench build/bin/llama-perplexity; do |
| 157 | + if [ -f "$bin" ]; then |
| 158 | + name=$(basename "$bin") |
| 159 | + echo "--- Notarizing $name ---" |
| 160 | + zip -j "${name}.zip" "$bin" |
| 161 | + xcrun notarytool submit "${name}.zip" \ |
| 162 | + --apple-id "$APPLE_ID" \ |
| 163 | + --password "$APPLE_ID_PASSWORD" \ |
| 164 | + --team-id "$APPLE_TEAM_ID" \ |
| 165 | + --wait --timeout 10m |
| 166 | + rm "${name}.zip" |
| 167 | + fi |
| 168 | + done |
| 169 | + echo "=== All notarization complete ===" |
| 170 | +
|
| 171 | + - name: Clean up keychain |
| 172 | + if: always() |
| 173 | + run: | |
| 174 | + security delete-keychain $KEYCHAIN_PATH 2>/dev/null || true |
| 175 | +
|
| 176 | + - name: Upload artifact |
| 177 | + uses: actions/upload-artifact@v4 |
| 178 | + with: |
| 179 | + name: llama-turboquant-macos-arm64 |
| 180 | + path: release/ |
| 181 | + retention-days: 30 |
| 182 | + |
| 183 | + - name: Create GitHub Release |
| 184 | + uses: softprops/action-gh-release@v2 |
| 185 | + with: |
| 186 | + tag_name: ${{ steps.version.outputs.tag }} |
| 187 | + target_commitish: ${{ github.sha }} |
| 188 | + name: "TurboQuant macOS ARM64 (${{ steps.version.outputs.short_sha }})" |
| 189 | + body: | |
| 190 | + ## TurboQuant KV Cache — macOS ARM64 (Metal) |
| 191 | +
|
| 192 | + Built from `feature/turboquant-kv-cache` branch at commit `${{ steps.version.outputs.short_sha }}`. |
| 193 | +
|
| 194 | + ### What's included |
| 195 | + - `llama-server` with `--cache-type-k turbo3` / `turbo4` support |
| 196 | + - `llama-cli`, `llama-bench`, `llama-perplexity` |
| 197 | + - Metal backend with BF16 + embedded shader library |
| 198 | +
|
| 199 | + ### Usage |
| 200 | + ```bash |
| 201 | + # Option 1: zip (notarized + stapled) |
| 202 | + unzip llama-turboquant-macos-arm64.zip |
| 203 | + # Option 2: tar.gz |
| 204 | + tar -xzf llama-turboquant-macos-arm64.tar.gz |
| 205 | +
|
| 206 | + ./build/bin/llama-server -m model.gguf --cache-type-k turbo3 --cache-type-v turbo3 |
| 207 | + ``` |
| 208 | +
|
| 209 | + ### For Atomic Chat integration |
| 210 | + Replace the binary at: |
| 211 | + ``` |
| 212 | + ~/Library/Application Support/Atomic Chat/data/llamacpp/backends/<version>/macos-arm64/build/bin/llama-server |
| 213 | + ``` |
| 214 | + files: | |
| 215 | + llama-turboquant-macos-arm64.zip |
| 216 | + llama-turboquant-macos-arm64.tar.gz |
| 217 | + draft: false |
| 218 | + prerelease: true |
0 commit comments