Configurable fine-tuning API (TrainingParameters + Optimizer) #515
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com> | |
| # SPDX-FileCopyrightText: 2023-2025 Konstantin Herud | |
| # | |
| # SPDX-License-Identifier: MIT | |
| name: Publish | |
| on: | |
| push: | |
| branches: [ main ] | |
| tags: ['v*'] | |
| pull_request: | |
| workflow_dispatch: | |
| inputs: | |
| publish_to_central: | |
| description: "Deploy to Maven Central (snapshot if -SNAPSHOT, release if a vX.Y.Z tag)" | |
| type: boolean | |
| default: false | |
| use_cache: | |
| description: "Use the shared sccache/Depot compiler cache (faster incremental builds)" | |
| type: boolean | |
| default: true | |
| env: | |
| JAVA_VERSION: '21' | |
| MODEL_URL: "https://huggingface.co/TheBloke/CodeLlama-7B-GGUF/resolve/main/codellama-7b.Q2_K.gguf" | |
| MODEL_NAME: "codellama-7b.Q2_K.gguf" | |
| RERANKING_MODEL_URL: "https://huggingface.co/gpustack/jina-reranker-v1-tiny-en-GGUF/resolve/main/jina-reranker-v1-tiny-en-Q4_0.gguf" | |
| RERANKING_MODEL_NAME: "jina-reranker-v1-tiny-en-Q4_0.gguf" | |
| DRAFT_MODEL_URL: "https://huggingface.co/QuantFactory/AMD-Llama-135m-code-GGUF/resolve/main/AMD-Llama-135m-code.Q2_K.gguf" | |
| DRAFT_MODEL_NAME: "AMD-Llama-135m-code.Q2_K.gguf" | |
| REASONING_MODEL_URL: "https://huggingface.co/unsloth/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-Q4_K_M.gguf" | |
| REASONING_MODEL_NAME: "Qwen3-0.6B-Q4_K_M.gguf" | |
| TOOL_MODEL_URL: "https://huggingface.co/bartowski/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf" | |
| TOOL_MODEL_NAME: "Qwen2.5-1.5B-Instruct-Q4_K_M.gguf" | |
| NOMIC_EMBED_MODEL_URL: "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF/resolve/main/nomic-embed-text-v1.5.f16.gguf" | |
| NOMIC_EMBED_MODEL_NAME: "nomic-embed-text-v1.5.f16.gguf" | |
| # Vision model + mmproj for MultimodalIntegrationTest. | |
| # SmolVLM-500M is the smallest community vision GGUF that loads reliably | |
| # under the upstream mtmd pipeline. Total download ~600 MB across model | |
| # plus mmproj; matches the existing per-test-job download budget. | |
| VISION_MODEL_URL: "https://huggingface.co/ggml-org/SmolVLM-500M-Instruct-GGUF/resolve/main/SmolVLM-500M-Instruct-Q8_0.gguf" | |
| VISION_MODEL_NAME: "SmolVLM-500M-Instruct-Q8_0.gguf" | |
| VISION_MMPROJ_URL: "https://huggingface.co/ggml-org/SmolVLM-500M-Instruct-GGUF/resolve/main/mmproj-SmolVLM-500M-Instruct-Q8_0.gguf" | |
| VISION_MMPROJ_NAME: "mmproj-SmolVLM-500M-Instruct-Q8_0.gguf" | |
| # Text-to-speech models for AudioInputIntegrationTest's sibling TtsIntegrationTest (OuteTTS pipeline). | |
| TTS_MODEL_URL: "https://huggingface.co/second-state/OuteTTS-0.2-500M-GGUF/resolve/main/OuteTTS-0.2-500M-Q4_K_M.gguf" | |
| TTS_MODEL_NAME: "OuteTTS-0.2-500M-Q4_K_M.gguf" | |
| TTS_VOCODER_URL: "https://huggingface.co/ggml-org/WavTokenizer/resolve/main/WavTokenizer-Large-75-F16.gguf" | |
| TTS_VOCODER_NAME: "WavTokenizer-Large-75-F16.gguf" | |
| # Test image used by MultimodalIntegrationTest is committed to the repo | |
| # at src/test/resources/images/test-image.jpg (see the README in that | |
| # directory for licensing). No download step is needed; CI just points | |
| # mvn test at the committed path. | |
| VISION_IMAGE_PATH: "src/test/resources/images/test-image.jpg" | |
| permissions: | |
| contents: read | |
| jobs: | |
| # --------------------------------------------------------------------------- | |
| # Start gate — single cancellable abort window before the pipeline starts. | |
| # The wait duration lives in the `startgate` GitHub Environment (Settings → | |
| # Environments → startgate → Wait timer). | |
| # --------------------------------------------------------------------------- | |
| startgate: | |
| name: Start gate (abort window) | |
| runs-on: ubuntu-latest | |
| environment: startgate | |
| steps: | |
| - run: echo "Start gate elapsed — proceeding with pipeline." | |
| # --------------------------------------------------------------------------- | |
| # Cross-compile jobs (Docker / dockcross) — produce release artifacts, no testing | |
| # --------------------------------------------------------------------------- | |
| code-style: | |
| name: Code style (spotless) + package graph | |
| needs: startgate | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - uses: actions/setup-java@v5 | |
| with: | |
| java-version: '21' | |
| distribution: temurin | |
| - name: Spotless check (fail fast on format violations) | |
| run: mvn -B --no-transfer-progress spotless:check | |
| - name: SpotBugs check (fail fast on static-analysis findings) | |
| run: mvn -B --no-transfer-progress -DskipTests -Denforcer.skip=true compile spotbugs:check | |
| - name: Print internal package dependency graph (jdeps, informational) | |
| continue-on-error: true | |
| run: | | |
| mvn -B --no-transfer-progress -DskipTests -Denforcer.skip=true compile | |
| echo "=== internal package dependency graph (jdeps, bytecode) ===" | |
| jdeps -verbose:package target/classes | grep 'net.ladenthin.llama' || true | |
| # --------------------------------------------------------------------------- | |
| # Build the llama.cpp WebUI ONCE, from the same pinned tag CMakeLists.txt fetches, | |
| # and share it to every native build as the generated, platform-independent | |
| # ui.cpp/ui.h ("webui-generated" artifact). The native builds embed it into | |
| # libjllama (CMake's "WebUI assets" block); when this job's artifact is absent the | |
| # build falls back to the empty-asset stub. npm runs only here, in one controlled | |
| # job — never in the dockcross cross-compilers (which have no node) or per-platform. | |
| # --------------------------------------------------------------------------- | |
| build-webui: | |
| name: Build WebUI assets (shared) | |
| needs: startgate | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Resolve pinned llama.cpp tag from CMakeLists.txt | |
| id: tag | |
| shell: bash | |
| run: | | |
| TAG=$(grep -oE 'GIT_TAG[[:space:]]+b[0-9]+' CMakeLists.txt | grep -oE 'b[0-9]+' | head -1) | |
| if [ -z "$TAG" ]; then | |
| echo "could not resolve llama.cpp GIT_TAG (b<nnnn>) from CMakeLists.txt" >&2 | |
| exit 1 | |
| fi | |
| echo "tag=$TAG" >> "$GITHUB_OUTPUT" | |
| echo "Pinned llama.cpp WebUI tag: $TAG" | |
| - name: Checkout llama.cpp tools/ui at the pinned tag | |
| uses: actions/checkout@v7 | |
| with: | |
| repository: ggml-org/llama.cpp | |
| ref: ${{ steps.tag.outputs.tag }} | |
| path: llamacpp-ui | |
| sparse-checkout: tools/ui | |
| sparse-checkout-cone-mode: true | |
| - uses: actions/setup-node@v6 | |
| with: | |
| node-version: '24' | |
| cache: npm | |
| cache-dependency-path: llamacpp-ui/tools/ui/package-lock.json | |
| - name: Build WebUI (Svelte/Vite) | |
| working-directory: llamacpp-ui/tools/ui | |
| env: | |
| HF_UI_VERSION: ${{ steps.tag.outputs.tag }} | |
| LLAMA_BUILD_NUMBER: ${{ steps.tag.outputs.tag }} | |
| run: | | |
| npm ci --ignore-scripts | |
| npm run build | |
| test -f dist/index.html | |
| - name: Embed assets into ui.cpp / ui.h (gzip parity with upstream) | |
| working-directory: llamacpp-ui/tools/ui | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| # gzip every asset into dist/_gzip/<path> so llama-ui-embed embeds the | |
| # compressed bytes (LLAMA_UI_GZIP parity); embed auto-detects _gzip. | |
| ( cd dist && find . -type f -not -path './_gzip/*' | while read -r f; do | |
| mkdir -p "_gzip/$(dirname "$f")" | |
| gzip -9 -c "$f" > "_gzip/$f" | |
| done ) | |
| # llama-ui-embed is a self-contained C++17 host tool (no npm) — build + run it. | |
| g++ -O2 -std=c++17 -o llama-ui-embed embed.cpp | |
| mkdir -p "$GITHUB_WORKSPACE/webui-generated" | |
| ./llama-ui-embed \ | |
| "$GITHUB_WORKSPACE/webui-generated/ui.cpp" \ | |
| "$GITHUB_WORKSPACE/webui-generated/ui.h" \ | |
| dist | |
| echo "=== generated WebUI assets ===" | |
| ls -la "$GITHUB_WORKSPACE/webui-generated" | |
| if grep -q LLAMA_UI_HAS_ASSETS "$GITHUB_WORKSPACE/webui-generated/ui.h"; then | |
| echo "LLAMA_UI_HAS_ASSETS: present (real WebUI embedded)" | |
| else | |
| echo "ERROR: embed produced an empty asset table" >&2 | |
| exit 1 | |
| fi | |
| - name: Upload WebUI artifact | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| retention-days: 1 | |
| if-no-files-found: error | |
| crosscompile-linux-x86_64-cuda: | |
| name: Cross-Compile manylinux_2_28 x86_64 (CUDA) | |
| needs: [startgate, build-webui] | |
| runs-on: ubuntu-latest | |
| # CUDA cache. build_cuda_linux.sh execs build.sh, so the same sccache probe guards this job. | |
| # build.sh also wraps nvcc (CMAKE_CUDA_COMPILER_LAUNCHER=sccache) for CUDA builds, so the | |
| # per-arch .cu device passes — the dominant cost of this job — cache over Depot alongside the | |
| # gcc host TUs. Verified on a warm run: 100% hit on CUDA / CUBIN / device-code (139 CUDA hits, | |
| # 99.86% overall), cutting the job from ~51 min cold to ~15 min warm. The job therefore always | |
| # builds the FULL CMAKE_CUDA_ARCHITECTURES set (no single-arch shortcut) and leans on the warm | |
| # cache for speed, so every artifact stays release-safe (runs on every GPU generation) on PR / | |
| # push as well as publish. CUDA_FAST_BUILD still exists in build_cuda_linux.sh as a LOCAL-dev | |
| # knob, but CI no longer sets it. The first-run sccache debug diagnostics (SCCACHE_LOG / | |
| # SCCACHE_ERROR_LOG / RUST_BACKTRACE) were dropped now that caching is confirmed; build.sh still | |
| # prints the `sccache --show-stats` hit table at the end of every run. Inert without DEPOT_TOKEN | |
| # (fork PRs) or use_cache=false. | |
| env: | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| DOCKCROSS_ARGS: "-e SCCACHE_WEBDAV_ENDPOINT -e SCCACHE_WEBDAV_TOKEN -e USE_CACHE" | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== Host CPU Information ===" | |
| lscpu | |
| echo "" | |
| echo "=== CPU Details from /proc/cpuinfo ===" | |
| cat /proc/cpuinfo | |
| - name: Build libraries | |
| shell: bash | |
| run: | | |
| .github/dockcross/dockcross-manylinux_2_28-x64 .github/build_cuda_linux.sh "-DOS_NAME=Linux -DOS_ARCH=x86_64" | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: linux-libraries-cuda | |
| path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/ | |
| crosscompile-linux-x86_64: | |
| name: Cross-Compile manylinux2014 x86_64 | |
| needs: [startgate, build-webui] | |
| runs-on: ubuntu-latest | |
| # Phase 2 dockcross cache rollout — job 1, VERIFIED green in CI (PR #245): sccache v0.16.0 | |
| # probe passed in-container (devtoolset-10 gcc), cache ON over Depot WebDAV (cold run: 275 | |
| # objects stored). Steady-state env below — the first-run diagnostics (SCCACHE_LOG / | |
| # SCCACHE_ERROR_LOG / RUST_BACKTRACE) were dropped now that it is proven. Inert without | |
| # DEPOT_TOKEN (fork PRs) or with use_cache=false; a crashing sccache still falls back to a | |
| # green uncached build via the build.sh probe. | |
| env: | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| DOCKCROSS_ARGS: "-e SCCACHE_WEBDAV_ENDPOINT -e SCCACHE_WEBDAV_TOKEN -e USE_CACHE" | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== Host CPU Information ===" | |
| lscpu | |
| echo "" | |
| echo "=== CPU Details from /proc/cpuinfo ===" | |
| cat /proc/cpuinfo | |
| - name: Build libraries | |
| shell: bash | |
| run: | | |
| .github/dockcross/dockcross-manylinux2014-x64 .github/build.sh "-DOS_NAME=Linux -DOS_ARCH=x86_64" | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: Linux-x86_64-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| crosscompile-linux-aarch64: | |
| name: Build and Test Linux aarch64 | |
| needs: [startgate, build-webui] | |
| # Native ARM64 build on GitHub's free arm64 runner, mirroring upstream llama.cpp's | |
| # `ubuntu-cpu` aarch64 release job (ubuntu-24.04-arm + GCC 14). Replaces the former dockcross | |
| # `linux-arm64-lts` cross-compile (GCC 8.5, glibc 2.17), which can no longer compile llama.cpp | |
| # b9789 — its C++17 CTAD-in-`new` needs GCC >= 12. Building natively also lets us run the C++ | |
| # unit suite (ctest) on real ARM hardware for the first time (the cross build ran no tests). | |
| # Trade-off: the glibc floor rises 2.17 -> ~2.39, the same envelope upstream's own ARM binaries | |
| # require. GGML_NATIVE=OFF keeps the artifact portable across ARMv8 CPU generations (no | |
| # build-host -march baked in). The job id is kept (a `needs:` target downstream); only the | |
| # display name changed, so update any branch-protection required-check that pinned the old name. | |
| runs-on: ubuntu-24.04-arm | |
| env: | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Install toolchain (GCC 14, mirrors upstream llama.cpp ARM release) | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y gcc-14 g++-14 | |
| echo "CC=gcc-14" >> "$GITHUB_ENV" | |
| echo "CXX=g++-14" >> "$GITHUB_ENV" | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== Host CPU Information ===" | |
| lscpu | |
| echo "" | |
| echo "=== CPU Details from /proc/cpuinfo ===" | |
| cat /proc/cpuinfo | |
| - name: Build libraries | |
| shell: bash | |
| run: | | |
| mvn --no-transfer-progress compile | |
| .github/build.sh "-DOS_NAME=Linux -DOS_ARCH=aarch64 -DGGML_NATIVE=OFF -DBUILD_TESTING=ON" | |
| - name: Run C++ unit tests | |
| run: ctest --test-dir build --output-on-failure | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: Linux-aarch64-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| crosscompile-android-aarch64: | |
| name: Cross-Compile Android aarch64 | |
| needs: [startgate, build-webui] | |
| runs-on: ubuntu-latest | |
| # Phase 2 dockcross cache rollout — job 4. Same steady-state env as manylinux2014 (job 1); | |
| # the build.sh probe makes it safe to enable without a separate verification run. Inert | |
| # without DEPOT_TOKEN (fork PRs) or use_cache=false. | |
| env: | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| DOCKCROSS_ARGS: "-e SCCACHE_WEBDAV_ENDPOINT -e SCCACHE_WEBDAV_TOKEN -e USE_CACHE" | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== Host CPU Information ===" | |
| lscpu | |
| echo "" | |
| echo "=== CPU Details from /proc/cpuinfo ===" | |
| cat /proc/cpuinfo | |
| - name: Build libraries | |
| shell: bash | |
| run: | | |
| .github/dockcross/dockcross-android-arm64 .github/build.sh "-DOS_NAME=Linux-Android -DOS_ARCH=aarch64" | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: Linux-Android-aarch64-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| crosscompile-android-aarch64-opencl: | |
| name: Cross-Compile Android aarch64 (OpenCL/Adreno) | |
| needs: [startgate, build-webui] | |
| runs-on: ubuntu-latest | |
| # Phase 2 dockcross cache rollout — job 5. build_opencl_android.sh stages the OpenCL | |
| # headers/loader, then delegates the jllama cmake build to build.sh (which owns the | |
| # sccache probe + launcher). Same steady-state env as the other dockcross jobs. Inert | |
| # without DEPOT_TOKEN (fork PRs) or use_cache=false. | |
| env: | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| DOCKCROSS_ARGS: "-e SCCACHE_WEBDAV_ENDPOINT -e SCCACHE_WEBDAV_TOKEN -e USE_CACHE" | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - name: Build libraries | |
| shell: bash | |
| run: | | |
| .github/dockcross/dockcross-android-arm64 .github/build_opencl_android.sh "-DOS_NAME=Linux-Android -DOS_ARCH=aarch64 -DGGML_OPENCL=ON -DGGML_OPENCL_EMBED_KERNELS=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON" | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: android-libraries-opencl | |
| path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/ | |
| # --------------------------------------------------------------------------- | |
| # Native build jobs — produce release artifacts + run C++ unit tests | |
| # --------------------------------------------------------------------------- | |
| build-macos-arm64-no-metal: | |
| name: Build and Test macOS 15 arm64 (no Metal) | |
| needs: [startgate, build-webui] | |
| runs-on: macos-15 | |
| env: | |
| BUILD_JOBS: 2 | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== CPU Information ===" | |
| sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null || true | |
| echo "" | |
| echo "=== Processor Details ===" | |
| system_profiler SPHardwareDataType | |
| - name: Install sccache (shared compiler cache) | |
| if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != '' | |
| continue-on-error: true | |
| run: brew install sccache | |
| - name: Build libraries | |
| shell: bash | |
| run: | | |
| mvn --no-transfer-progress compile | |
| .github/build.sh -DLLAMA_METAL=OFF -DGGML_NATIVE=OFF -DBUILD_TESTING=ON | |
| - name: Run C++ unit tests | |
| run: ctest --test-dir build --output-on-failure | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: macos-15-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| build-macos-arm64-metal: | |
| name: Build and Test macOS 14 arm64 (Metal) | |
| needs: [startgate, build-webui] | |
| runs-on: macos-14 | |
| env: | |
| BUILD_JOBS: 2 | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== CPU Information ===" | |
| sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null || true | |
| echo "" | |
| echo "=== Processor Details ===" | |
| system_profiler SPHardwareDataType | |
| - name: Install sccache (shared compiler cache) | |
| if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != '' | |
| continue-on-error: true | |
| run: brew install sccache | |
| - name: Build libraries | |
| shell: bash | |
| run: | | |
| mvn --no-transfer-progress compile | |
| .github/build.sh -DLLAMA_METAL_EMBED_LIBRARY=ON -DBUILD_TESTING=ON | |
| - name: Run C++ unit tests | |
| run: ctest --test-dir build --output-on-failure | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: macos-14-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| build-windows-x86_64-msvc: | |
| name: Build and Test Windows 2025 x86_64 (MSVC / VS 2026, classifier) | |
| needs: [startgate, build-webui] | |
| runs-on: windows-2025-vs2026 | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - name: Display CPU Info | |
| shell: pwsh | |
| run: | | |
| Write-Host "=== CPU Information (Get-CimInstance - All Properties) ===" | |
| Get-CimInstance Win32_Processor | Select-Object * | Format-List | |
| Write-Host "" | |
| Write-Host "=== CPU Information (systeminfo) ===" | |
| systeminfo | Select-String "Processor" | |
| Write-Host "" | |
| Write-Host "=== CPU Information (Get-ComputerInfo) ===" | |
| Get-ComputerInfo -Property "CsProcessors*" 2>$null || Write-Host "Get-ComputerInfo not available" | |
| - name: Build libraries | |
| shell: cmd | |
| run: | | |
| .github\build.bat -G "Visual Studio 18 2026" -A "x64" -DOS_NAME=Windows -DOS_ARCH=x86_64 -DBUILD_TESTING=ON | |
| - name: Run C++ unit tests | |
| run: ctest --test-dir build --output-on-failure | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: Windows-x86_64-msvc | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| build-windows-x86-msvc: | |
| name: Build and Test Windows 2025 x86 (MSVC / VS 2026, classifier) | |
| needs: [startgate, build-webui] | |
| runs-on: windows-2025-vs2026 | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - name: Display CPU Info | |
| shell: pwsh | |
| run: | | |
| Write-Host "=== CPU Information (Get-CimInstance - All Properties) ===" | |
| Get-CimInstance Win32_Processor | Select-Object * | Format-List | |
| Write-Host "" | |
| Write-Host "=== CPU Information (systeminfo) ===" | |
| systeminfo | Select-String "Processor" | |
| Write-Host "" | |
| Write-Host "=== CPU Information (Get-ComputerInfo) ===" | |
| Get-ComputerInfo -Property "CsProcessors*" 2>$null || Write-Host "Get-ComputerInfo not available" | |
| - name: Build libraries | |
| shell: cmd | |
| run: | | |
| .github\build.bat -G "Visual Studio 18 2026" -A "Win32" -DOS_NAME=Windows -DOS_ARCH=x86 -DBUILD_TESTING=ON | |
| - name: Run C++ unit tests | |
| run: ctest --test-dir build --output-on-failure | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: Windows-x86-msvc | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| # --------------------------------------------------------------------------- | |
| # Windows Ninja Multi-Config + sccache — the DEFAULT Windows CPU natives. | |
| # The Visual Studio generator ignores CMAKE_{C,CXX}_COMPILER_LAUNCHER, so only the | |
| # Ninja Multi-Config generator can front cl.exe with sccache over Depot WebDAV | |
| # (build.bat probe-guards it). Both generators use the same MSVC toolchain (cl.exe, | |
| # static /MT CRT) on the same runner, so the produced jllama.dll/llama.dll/ggml.dll | |
| # are functionally equivalent with identical runtime dependencies — the only delta | |
| # is build-system plumbing + caching. The Ninja build is therefore the default JAR | |
| # (artifacts `Windows-*-libraries`, picked up by the package job's `pattern: | |
| # "*-libraries"`); the MSVC build above is shipped as the `msvc-windows` classifier | |
| # for anyone who wants the Visual-Studio-generator natives. Upstream llama.cpp also | |
| # builds its Windows artifacts with Ninja Multi-Config + MSVC. | |
| # --------------------------------------------------------------------------- | |
| build-windows-x86_64: | |
| name: Build and Test Windows 2025 x86_64 (Ninja Multi-Config + sccache, default) | |
| needs: [startgate, build-webui] | |
| runs-on: windows-2025-vs2026 | |
| env: | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - name: Set up MSVC developer environment (x64) | |
| uses: ilammy/msvc-dev-cmd@v1 | |
| with: | |
| arch: x64 | |
| - name: Install sccache (shared compiler cache) | |
| if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != '' | |
| continue-on-error: true | |
| shell: pwsh | |
| run: | | |
| $ver = "0.16.0" | |
| $rel = "sccache-v$ver-x86_64-pc-windows-msvc" | |
| $url = "https://github.com/mozilla/sccache/releases/download/v$ver/$rel.zip" | |
| Write-Host "Downloading $url" | |
| Invoke-WebRequest -Uri $url -OutFile "$env:RUNNER_TEMP\sccache.zip" | |
| Expand-Archive -Path "$env:RUNNER_TEMP\sccache.zip" -DestinationPath "$env:RUNNER_TEMP\sccache" -Force | |
| Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\sccache\$rel" | |
| - name: Display CPU Info | |
| shell: pwsh | |
| run: | | |
| Write-Host "=== CPU Information (Get-CimInstance - All Properties) ===" | |
| Get-CimInstance Win32_Processor | Select-Object * | Format-List | |
| Write-Host "" | |
| Write-Host "=== CPU Information (systeminfo) ===" | |
| systeminfo | Select-String "Processor" | |
| Write-Host "" | |
| Write-Host "=== CPU Information (Get-ComputerInfo) ===" | |
| Get-ComputerInfo -Property "CsProcessors*" 2>$null || Write-Host "Get-ComputerInfo not available" | |
| - name: Build libraries | |
| shell: cmd | |
| run: | | |
| .github\build.bat -G "Ninja Multi-Config" -DOS_NAME=Windows -DOS_ARCH=x86_64 -DBUILD_TESTING=ON | |
| - name: Run C++ unit tests | |
| run: ctest --test-dir build --output-on-failure | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: Windows-x86_64-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| build-windows-x86: | |
| name: Build and Test Windows 2025 x86 (Ninja Multi-Config + sccache, default) | |
| needs: [startgate, build-webui] | |
| runs-on: windows-2025-vs2026 | |
| env: | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - name: Set up MSVC developer environment (x86) | |
| uses: ilammy/msvc-dev-cmd@v1 | |
| with: | |
| arch: x86 | |
| - name: Install sccache (shared compiler cache) | |
| if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != '' | |
| continue-on-error: true | |
| shell: pwsh | |
| run: | | |
| $ver = "0.16.0" | |
| $rel = "sccache-v$ver-x86_64-pc-windows-msvc" | |
| $url = "https://github.com/mozilla/sccache/releases/download/v$ver/$rel.zip" | |
| Write-Host "Downloading $url" | |
| Invoke-WebRequest -Uri $url -OutFile "$env:RUNNER_TEMP\sccache.zip" | |
| Expand-Archive -Path "$env:RUNNER_TEMP\sccache.zip" -DestinationPath "$env:RUNNER_TEMP\sccache" -Force | |
| Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\sccache\$rel" | |
| - name: Display CPU Info | |
| shell: pwsh | |
| run: | | |
| Write-Host "=== CPU Information (Get-CimInstance - All Properties) ===" | |
| Get-CimInstance Win32_Processor | Select-Object * | Format-List | |
| Write-Host "" | |
| Write-Host "=== CPU Information (systeminfo) ===" | |
| systeminfo | Select-String "Processor" | |
| Write-Host "" | |
| Write-Host "=== CPU Information (Get-ComputerInfo) ===" | |
| Get-ComputerInfo -Property "CsProcessors*" 2>$null || Write-Host "Get-ComputerInfo not available" | |
| - name: Build libraries | |
| shell: cmd | |
| run: | | |
| .github\build.bat -G "Ninja Multi-Config" -DOS_NAME=Windows -DOS_ARCH=x86 -DBUILD_TESTING=ON | |
| - name: Run C++ unit tests | |
| run: ctest --test-dir build --output-on-failure | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: Windows-x86-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| # --------------------------------------------------------------------------- | |
| # Windows GPU classifiers (x86_64 only) — CUDA, Vulkan, OpenCL. | |
| # All three use the same Ninja Multi-Config + MSVC + sccache toolchain as the | |
| # default CPU build; they differ only by the GGML backend flag (and the build-time | |
| # SDK each needs). CMakeLists.txt routes each backend's output to its own | |
| # src/main/resources_windows_{cuda,vulkan,opencl}/ tree, which the matching Maven | |
| # profile (cuda-windows / vulkan-windows / opencl-windows) turns into a classifier | |
| # JAR. GPU runtime libraries are NOT bundled — the consumer's GPU driver / toolkit | |
| # provides them (CUDA: cudart64_13/cublas64_13 from the CUDA Toolkit; Vulkan: | |
| # vulkan-1.dll from the driver; OpenCL: System32\OpenCL.dll from the driver). | |
| # NOTE: GitHub-hosted Windows runners have NO GPU, so these jobs build + run the | |
| # C++ unit suite (ctest, CPU-only) but cannot run model-backed GPU inference; | |
| # end-to-end GPU validation is local / self-hosted. | |
| # --------------------------------------------------------------------------- | |
| build-windows-x86_64-cuda: | |
| name: Build Windows 2025 x86_64 CUDA (Ninja + sccache) | |
| needs: [startgate, build-webui] | |
| runs-on: windows-2025-vs2026 | |
| env: | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - name: Set up MSVC developer environment (x64) | |
| uses: ilammy/msvc-dev-cmd@v1 | |
| with: | |
| arch: x64 | |
| - name: Install CUDA Toolkit | |
| # Full toolkit install (default method: local, no sub-packages restriction). | |
| # A reduced network sub-package set ("nvcc","cudart","cublas",…) omitted the | |
| # nvcc crt headers (crt/host_config.h), so cmake's CUDA compiler detection | |
| # failed at configure. The full installer ships every header reliably. | |
| uses: Jimver/cuda-toolkit@v0.2.35 | |
| id: cuda-toolkit | |
| with: | |
| cuda: '13.2.0' | |
| - name: Install sccache (shared compiler cache) | |
| if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != '' | |
| continue-on-error: true | |
| shell: pwsh | |
| run: | | |
| $ver = "0.16.0" | |
| $rel = "sccache-v$ver-x86_64-pc-windows-msvc" | |
| $url = "https://github.com/mozilla/sccache/releases/download/v$ver/$rel.zip" | |
| Write-Host "Downloading $url" | |
| Invoke-WebRequest -Uri $url -OutFile "$env:RUNNER_TEMP\sccache.zip" | |
| Expand-Archive -Path "$env:RUNNER_TEMP\sccache.zip" -DestinationPath "$env:RUNNER_TEMP\sccache" -Force | |
| Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\sccache\$rel" | |
| - name: Build libraries | |
| shell: cmd | |
| # GPU jobs build the artifact only — no -DBUILD_TESTING / ctest. The C++ unit | |
| # suite is CPU-only and fully covered by the `C++ Tests` job + the CPU Windows | |
| # jobs; a GPU-linked jllama_test.exe cannot be discovered/run on a GPU-less | |
| # GitHub runner (it errors probing for a CUDA device -> ctest *_NOT_BUILT). | |
| run: | | |
| .github\build.bat -G "Ninja Multi-Config" -DGGML_CUDA=ON -DOS_NAME=Windows -DOS_ARCH=x86_64 | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: Windows-x86_64-cuda | |
| path: ${{ github.workspace }}/src/main/resources_windows_cuda/net/ladenthin/llama/ | |
| if-no-files-found: error | |
| build-windows-x86_64-vulkan: | |
| name: Build Windows 2025 x86_64 Vulkan (Ninja + sccache) | |
| needs: [startgate, build-webui] | |
| runs-on: windows-2025-vs2026 | |
| env: | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - name: Set up MSVC developer environment (x64) | |
| uses: ilammy/msvc-dev-cmd@v1 | |
| with: | |
| arch: x64 | |
| - name: Install Vulkan SDK | |
| uses: jakoch/install-vulkan-sdk-action@v1.6.0 | |
| with: | |
| vulkan_version: 1.4.350.0 | |
| cache: true | |
| - name: Install sccache (shared compiler cache) | |
| if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != '' | |
| continue-on-error: true | |
| shell: pwsh | |
| run: | | |
| $ver = "0.16.0" | |
| $rel = "sccache-v$ver-x86_64-pc-windows-msvc" | |
| $url = "https://github.com/mozilla/sccache/releases/download/v$ver/$rel.zip" | |
| Write-Host "Downloading $url" | |
| Invoke-WebRequest -Uri $url -OutFile "$env:RUNNER_TEMP\sccache.zip" | |
| Expand-Archive -Path "$env:RUNNER_TEMP\sccache.zip" -DestinationPath "$env:RUNNER_TEMP\sccache" -Force | |
| Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\sccache\$rel" | |
| - name: Build libraries | |
| shell: cmd | |
| # Build the artifact only (see the CUDA job's note: GPU-less runner can't run a | |
| # GPU-linked jllama_test; the C++ unit suite is covered by the CPU jobs). | |
| run: | | |
| .github\build.bat -G "Ninja Multi-Config" -DGGML_VULKAN=ON -DOS_NAME=Windows -DOS_ARCH=x86_64 | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: Windows-x86_64-vulkan | |
| path: ${{ github.workspace }}/src/main/resources_windows_vulkan/net/ladenthin/llama/ | |
| if-no-files-found: error | |
| build-windows-x86_64-opencl: | |
| name: Build Windows 2025 x86_64 OpenCL (Ninja + sccache) | |
| needs: [startgate, build-webui] | |
| runs-on: windows-2025-vs2026 | |
| env: | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - name: Set up MSVC developer environment (x64) | |
| uses: ilammy/msvc-dev-cmd@v1 | |
| with: | |
| arch: x64 | |
| - name: Install sccache (shared compiler cache) | |
| if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != '' | |
| continue-on-error: true | |
| shell: pwsh | |
| run: | | |
| $ver = "0.16.0" | |
| $rel = "sccache-v$ver-x86_64-pc-windows-msvc" | |
| $url = "https://github.com/mozilla/sccache/releases/download/v$ver/$rel.zip" | |
| Write-Host "Downloading $url" | |
| Invoke-WebRequest -Uri $url -OutFile "$env:RUNNER_TEMP\sccache.zip" | |
| Expand-Archive -Path "$env:RUNNER_TEMP\sccache.zip" -DestinationPath "$env:RUNNER_TEMP\sccache" -Force | |
| Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\sccache\$rel" | |
| - name: Build libraries | |
| shell: cmd | |
| # Build the artifact only (see the CUDA job's note: GPU-less runner can't run a | |
| # GPU-linked jllama_test; the C++ unit suite is covered by the CPU jobs). | |
| run: | | |
| .github\build_opencl_windows.bat -G "Ninja Multi-Config" -DGGML_OPENCL=ON -DGGML_OPENCL_EMBED_KERNELS=ON -DOS_NAME=Windows -DOS_ARCH=x86_64 | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: Windows-x86_64-opencl | |
| path: ${{ github.workspace }}/src/main/resources_windows_opencl/net/ladenthin/llama/ | |
| if-no-files-found: error | |
| # --------------------------------------------------------------------------- | |
| # CI-only jobs — no release artifact, purely for test coverage | |
| # --------------------------------------------------------------------------- | |
| test-cpp-linux-x86_64: | |
| name: C++ Tests Ubuntu Latest x86_64 | |
| needs: startgate | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Display CPU Info | |
| run: | | |
| echo "=== CPU Information ===" | |
| lscpu | |
| echo "" | |
| echo "=== CPU Details from /proc/cpuinfo ===" | |
| cat /proc/cpuinfo | |
| - name: Build libraries | |
| run: | | |
| mvn -q --no-transfer-progress compile | |
| .github/build.sh -DBUILD_TESTING=ON | |
| - name: Run C++ unit tests | |
| run: ctest --test-dir build --output-on-failure | |
| build-macos-arm64-metal-15: | |
| name: Build and Test macOS 15 arm64 (Metal) | |
| needs: [startgate, build-webui] | |
| runs-on: macos-15 | |
| env: | |
| BUILD_JOBS: 2 | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== CPU Information ===" | |
| sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null || true | |
| echo "" | |
| echo "=== Processor Details ===" | |
| system_profiler SPHardwareDataType | |
| - name: Install sccache (shared compiler cache) | |
| if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != '' | |
| continue-on-error: true | |
| run: brew install sccache | |
| - name: Build libraries | |
| shell: bash | |
| run: | | |
| mvn --no-transfer-progress compile | |
| .github/build.sh -DLLAMA_METAL_EMBED_LIBRARY=ON -DGGML_NATIVE=OFF -DBUILD_TESTING=ON | |
| - name: Run C++ unit tests | |
| run: ctest --test-dir build --output-on-failure | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: macos-15-metal-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| # --------------------------------------------------------------------------- | |
| # Java test jobs — download release artifact, run mvn test | |
| # --------------------------------------------------------------------------- | |
| test-java-linux-x86_64: | |
| name: Java Tests Ubuntu Latest x86_64 | |
| needs: crosscompile-linux-x86_64 | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== CPU Information ===" | |
| lscpu | |
| echo "" | |
| echo "=== CPU Details from /proc/cpuinfo ===" | |
| cat /proc/cpuinfo | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Linux-x86_64-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| # GGUF model cache — introduced to stop re-downloading ~5 GB of test models from | |
| # HuggingFace on every run (also dodges HF rate-limits). Complements the sccache compiler | |
| # cache but is always ON: there is intentionally NO on/off flag for it (it is GitHub's | |
| # free cache, safe + free), whereas the sccache cache is toggled by the `use_cache` | |
| # workflow_dispatch input / USE_CACHE env. Not Depot — GB-scale blobs are usage-priced | |
| # there and its file cache needs Depot-hosted runners. See CLAUDE.md. | |
| - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace) | |
| uses: actions/cache@v6 | |
| with: | |
| path: models/ | |
| # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; | |
| # bump the suffix when the model set / URLs change. | |
| key: gguf-models-v1 | |
| - name: Download text generation model | |
| run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME} | |
| - name: Download reranking model | |
| run: test -f models/${RERANKING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${RERANKING_MODEL_URL} --create-dirs -o models/${RERANKING_MODEL_NAME} | |
| - name: Download draft model | |
| run: test -f models/${DRAFT_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME} | |
| - name: Download reasoning model | |
| run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME} | |
| - name: Download tool-calling model | |
| run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME} | |
| - name: Download nomic embedding model (issue #98 regression) | |
| run: test -f models/${NOMIC_EMBED_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${NOMIC_EMBED_MODEL_URL} --create-dirs -o models/${NOMIC_EMBED_MODEL_NAME} | |
| - name: Download vision model | |
| run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME} | |
| - name: Download vision mmproj | |
| run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME} | |
| - name: Download TTS model (OuteTTS) | |
| run: test -f models/${TTS_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TTS_MODEL_URL} --create-dirs -o models/${TTS_MODEL_NAME} | |
| - name: Download TTS vocoder (WavTokenizer) | |
| run: test -f models/${TTS_VOCODER_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TTS_VOCODER_URL} --create-dirs -o models/${TTS_VOCODER_NAME} | |
| - name: List files in models directory | |
| run: ls -l models/ | |
| - name: Validate model files | |
| run: bash .github/validate-models.sh | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Memory before tests | |
| run: free -h | |
| - name: Enable core dumps | |
| run: | | |
| ulimit -c unlimited | |
| echo "${{ github.workspace }}/core.%e.%p" | sudo tee /proc/sys/kernel/core_pattern | |
| - name: Run tests | |
| run: | | |
| mvn -e --no-transfer-progress -P jcstress test \ | |
| -Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.nomic.path=models/${NOMIC_EMBED_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \ | |
| -Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH} \ | |
| -Dnet.ladenthin.llama.tts.ttc.model=models/${TTS_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.tts.vocoder.model=models/${TTS_VOCODER_NAME} | |
| - uses: actions/upload-artifact@v7 | |
| if: success() | |
| with: | |
| name: jacoco-report | |
| path: target/site/jacoco/jacoco.xml | |
| if-no-files-found: ignore | |
| - name: Run PIT mutation tests | |
| run: mvn --batch-mode --no-transfer-progress test-compile org.pitest:pitest-maven:mutationCoverage | |
| - name: Extract PIT survivors | |
| if: always() | |
| run: | | |
| echo "=== PIT Survived Mutations ===" | |
| for html_file in $(find target/pit-reports -name "*.html" -type f 2>/dev/null | sort); do | |
| if grep -q "SURVIVED" "$html_file"; then | |
| echo "Found survivors in $html_file:" | |
| grep -B 2 -A 3 "SURVIVED" "$html_file" | |
| echo "" | |
| fi | |
| done | |
| - uses: actions/upload-artifact@v7 | |
| if: always() | |
| with: { name: pit-reports, path: target/pit-reports/ } | |
| - name: Memory after tests | |
| if: always() | |
| run: free -h | |
| - if: failure() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: error-log-linux-x86_64 | |
| path: | | |
| ${{ github.workspace }}/hs_err_pid*.log | |
| ${{ github.workspace }}/core.* | |
| ${{ github.workspace }}/*.hprof | |
| ${{ github.workspace }}/target/surefire-reports/*.dump | |
| ${{ github.workspace }}/target/surefire-reports/*.dumpstream | |
| ${{ github.workspace }}/target/surefire-reports/*.txt | |
| ${{ github.workspace }}/target/surefire-reports/TEST-*.xml | |
| if-no-files-found: warn | |
| # --------------------------------------------------------------------------- | |
| # vmlens interleaving analysis — pure-Java, needs no native library or models. | |
| # Staged to a single smoke test for now (see the `vmlens` profile in pom.xml). | |
| # --------------------------------------------------------------------------- | |
| vmlens: | |
| name: Test (vmlens interleavings) | |
| needs: startgate | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| cache: maven | |
| - name: Test under vmlens (interleaving analysis) | |
| # Add each new test in the `vmlens` package to this -Dtest list (surefire | |
| # -Dtest matches simple class names, not package globs; the default suite is | |
| # excluded from the vmlens package via pom.xml managed surefire <excludes>). | |
| run: >- | |
| mvn --batch-mode --no-transfer-progress -Pvmlens test | |
| -Dtest=VmlensInterleavingSmokeTest,SessionStateInterleavingTest -DfailIfNoTests=false | |
| - uses: actions/upload-artifact@v7 | |
| if: always() | |
| with: | |
| name: vmlens-report | |
| path: target/vmlens-report/ | |
| if-no-files-found: ignore | |
| test-java-macos-arm64-metal: | |
| name: Java Tests macOS 14 arm64 (Metal) | |
| needs: build-macos-arm64-metal | |
| runs-on: macos-14 | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== CPU Information ===" | |
| sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null || true | |
| echo "" | |
| echo "=== Processor Details ===" | |
| system_profiler SPHardwareDataType | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: macos-14-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| # GGUF model cache — introduced to stop re-downloading ~5 GB of test models from | |
| # HuggingFace on every run (also dodges HF rate-limits). Complements the sccache compiler | |
| # cache but is always ON: there is intentionally NO on/off flag for it (it is GitHub's | |
| # free cache, safe + free), whereas the sccache cache is toggled by the `use_cache` | |
| # workflow_dispatch input / USE_CACHE env. Not Depot — GB-scale blobs are usage-priced | |
| # there and its file cache needs Depot-hosted runners. See CLAUDE.md. | |
| - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace) | |
| uses: actions/cache@v6 | |
| with: | |
| path: models/ | |
| # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; | |
| # bump the suffix when the model set / URLs change. | |
| key: gguf-models-v1 | |
| - name: Download text generation model | |
| run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME} | |
| - name: Download reranking model | |
| run: test -f models/${RERANKING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${RERANKING_MODEL_URL} --create-dirs -o models/${RERANKING_MODEL_NAME} | |
| - name: Download draft model | |
| run: test -f models/${DRAFT_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME} | |
| - name: Download reasoning model | |
| run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME} | |
| - name: Download tool-calling model | |
| run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME} | |
| - name: Download nomic embedding model (issue #98 regression) | |
| run: test -f models/${NOMIC_EMBED_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${NOMIC_EMBED_MODEL_URL} --create-dirs -o models/${NOMIC_EMBED_MODEL_NAME} | |
| - name: Download vision model | |
| run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME} | |
| - name: Download vision mmproj | |
| run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME} | |
| - name: Download TTS model (OuteTTS) | |
| run: test -f models/${TTS_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TTS_MODEL_URL} --create-dirs -o models/${TTS_MODEL_NAME} | |
| - name: Download TTS vocoder (WavTokenizer) | |
| run: test -f models/${TTS_VOCODER_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TTS_VOCODER_URL} --create-dirs -o models/${TTS_VOCODER_NAME} | |
| - name: List files in models directory | |
| run: ls -l models/ | |
| - name: Validate model files | |
| run: bash .github/validate-models.sh | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Memory before tests | |
| run: vm_stat && sysctl hw.memsize hw.physmem | |
| - name: Enable core dumps | |
| run: ulimit -c unlimited | |
| - name: Run tests | |
| run: | | |
| mvn -e --no-transfer-progress -Dnet.ladenthin.llama.test.ngl=0 test \ | |
| -Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.nomic.path=models/${NOMIC_EMBED_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \ | |
| -Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH} \ | |
| -Dnet.ladenthin.llama.tts.ttc.model=models/${TTS_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.tts.vocoder.model=models/${TTS_VOCODER_NAME} | |
| - name: Memory after tests | |
| if: always() | |
| run: vm_stat && sysctl hw.memsize hw.physmem | |
| - if: failure() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: error-log-macos-14-metal | |
| path: | | |
| ${{ github.workspace }}/hs_err_pid*.log | |
| ${{ github.workspace }}/*.hprof | |
| ${{ github.workspace }}/target/surefire-reports/*.dump | |
| ${{ github.workspace }}/target/surefire-reports/*.dumpstream | |
| ${{ github.workspace }}/target/surefire-reports/*.txt | |
| ${{ github.workspace }}/target/surefire-reports/TEST-*.xml | |
| if-no-files-found: warn | |
| test-java-macos-arm64-no-metal: | |
| name: Java Tests macOS 15 arm64 (no Metal) | |
| needs: build-macos-arm64-no-metal | |
| runs-on: macos-15 | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== CPU Information ===" | |
| sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null || true | |
| echo "" | |
| echo "=== Processor Details ===" | |
| system_profiler SPHardwareDataType | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: macos-15-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| # GGUF model cache — introduced to stop re-downloading ~5 GB of test models from | |
| # HuggingFace on every run (also dodges HF rate-limits). Complements the sccache compiler | |
| # cache but is always ON: there is intentionally NO on/off flag for it (it is GitHub's | |
| # free cache, safe + free), whereas the sccache cache is toggled by the `use_cache` | |
| # workflow_dispatch input / USE_CACHE env. Not Depot — GB-scale blobs are usage-priced | |
| # there and its file cache needs Depot-hosted runners. See CLAUDE.md. | |
| - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace) | |
| uses: actions/cache@v6 | |
| with: | |
| path: models/ | |
| # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; | |
| # bump the suffix when the model set / URLs change. | |
| key: gguf-models-v1 | |
| - name: Download text generation model | |
| run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME} | |
| - name: Download reranking model | |
| run: test -f models/${RERANKING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${RERANKING_MODEL_URL} --create-dirs -o models/${RERANKING_MODEL_NAME} | |
| - name: Download draft model | |
| run: test -f models/${DRAFT_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME} | |
| - name: Download reasoning model | |
| run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME} | |
| - name: Download tool-calling model | |
| run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME} | |
| - name: Download nomic embedding model (issue #98 regression) | |
| run: test -f models/${NOMIC_EMBED_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${NOMIC_EMBED_MODEL_URL} --create-dirs -o models/${NOMIC_EMBED_MODEL_NAME} | |
| - name: Download vision model | |
| run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME} | |
| - name: Download vision mmproj | |
| run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME} | |
| - name: Download TTS model (OuteTTS) | |
| run: test -f models/${TTS_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TTS_MODEL_URL} --create-dirs -o models/${TTS_MODEL_NAME} | |
| - name: Download TTS vocoder (WavTokenizer) | |
| run: test -f models/${TTS_VOCODER_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TTS_VOCODER_URL} --create-dirs -o models/${TTS_VOCODER_NAME} | |
| - name: List files in models directory | |
| run: ls -l models/ | |
| - name: Validate model files | |
| run: bash .github/validate-models.sh | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Memory before tests | |
| run: vm_stat && sysctl hw.memsize hw.physmem | |
| - name: Enable core dumps | |
| run: ulimit -c unlimited | |
| - name: Run tests | |
| run: | | |
| mvn -e --no-transfer-progress test \ | |
| -Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.nomic.path=models/${NOMIC_EMBED_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \ | |
| -Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH} \ | |
| -Dnet.ladenthin.llama.tts.ttc.model=models/${TTS_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.tts.vocoder.model=models/${TTS_VOCODER_NAME} | |
| - name: Memory after tests | |
| if: always() | |
| run: vm_stat && sysctl hw.memsize hw.physmem | |
| - if: failure() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: error-log-macos-15-no-metal | |
| path: | | |
| ${{ github.workspace }}/hs_err_pid*.log | |
| ${{ github.workspace }}/*.hprof | |
| ${{ github.workspace }}/target/surefire-reports/*.dump | |
| ${{ github.workspace }}/target/surefire-reports/*.dumpstream | |
| ${{ github.workspace }}/target/surefire-reports/*.txt | |
| ${{ github.workspace }}/target/surefire-reports/TEST-*.xml | |
| if-no-files-found: warn | |
| test-java-macos-arm64-metal-15: | |
| name: Java Tests macOS 15 arm64 (Metal) | |
| needs: build-macos-arm64-metal-15 | |
| runs-on: macos-15 | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== CPU Information ===" | |
| sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null || true | |
| echo "" | |
| echo "=== Processor Details ===" | |
| system_profiler SPHardwareDataType | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: macos-15-metal-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| # GGUF model cache — introduced to stop re-downloading ~5 GB of test models from | |
| # HuggingFace on every run (also dodges HF rate-limits). Complements the sccache compiler | |
| # cache but is always ON: there is intentionally NO on/off flag for it (it is GitHub's | |
| # free cache, safe + free), whereas the sccache cache is toggled by the `use_cache` | |
| # workflow_dispatch input / USE_CACHE env. Not Depot — GB-scale blobs are usage-priced | |
| # there and its file cache needs Depot-hosted runners. See CLAUDE.md. | |
| - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace) | |
| uses: actions/cache@v6 | |
| with: | |
| path: models/ | |
| # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; | |
| # bump the suffix when the model set / URLs change. | |
| key: gguf-models-v1 | |
| - name: Download text generation model | |
| run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME} | |
| - name: Download reranking model | |
| run: test -f models/${RERANKING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${RERANKING_MODEL_URL} --create-dirs -o models/${RERANKING_MODEL_NAME} | |
| - name: Download draft model | |
| run: test -f models/${DRAFT_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME} | |
| - name: Download reasoning model | |
| run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME} | |
| - name: Download tool-calling model | |
| run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME} | |
| - name: Download nomic embedding model (issue #98 regression) | |
| run: test -f models/${NOMIC_EMBED_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${NOMIC_EMBED_MODEL_URL} --create-dirs -o models/${NOMIC_EMBED_MODEL_NAME} | |
| - name: Download vision model | |
| run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME} | |
| - name: Download vision mmproj | |
| run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME} | |
| - name: Download TTS model (OuteTTS) | |
| run: test -f models/${TTS_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TTS_MODEL_URL} --create-dirs -o models/${TTS_MODEL_NAME} | |
| - name: Download TTS vocoder (WavTokenizer) | |
| run: test -f models/${TTS_VOCODER_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TTS_VOCODER_URL} --create-dirs -o models/${TTS_VOCODER_NAME} | |
| - name: List files in models directory | |
| run: ls -l models/ | |
| - name: Validate model files | |
| run: bash .github/validate-models.sh | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Memory before tests | |
| run: vm_stat && sysctl hw.memsize hw.physmem | |
| - name: Enable core dumps | |
| run: ulimit -c unlimited | |
| - name: Run tests | |
| run: | | |
| mvn -e --no-transfer-progress test \ | |
| -Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.nomic.path=models/${NOMIC_EMBED_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \ | |
| -Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH} \ | |
| -Dnet.ladenthin.llama.tts.ttc.model=models/${TTS_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.tts.vocoder.model=models/${TTS_VOCODER_NAME} | |
| - name: Memory after tests | |
| if: always() | |
| run: vm_stat && sysctl hw.memsize hw.physmem | |
| - if: failure() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: error-log-macos-15-metal | |
| path: | | |
| ${{ github.workspace }}/hs_err_pid*.log | |
| ${{ github.workspace }}/*.hprof | |
| ${{ github.workspace }}/target/surefire-reports/*.dump | |
| ${{ github.workspace }}/target/surefire-reports/*.dumpstream | |
| ${{ github.workspace }}/target/surefire-reports/*.txt | |
| ${{ github.workspace }}/target/surefire-reports/TEST-*.xml | |
| if-no-files-found: warn | |
| test-java-windows-x86_64: | |
| name: Java Tests Windows 2025 x86_64 (default / Ninja) | |
| needs: build-windows-x86_64 | |
| runs-on: windows-2025-vs2026 | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Display CPU Info | |
| shell: pwsh | |
| run: | | |
| Write-Host "=== CPU Information (Get-CimInstance - All Properties) ===" | |
| Get-CimInstance Win32_Processor | Select-Object * | Format-List | |
| Write-Host "" | |
| Write-Host "=== CPU Information (systeminfo) ===" | |
| systeminfo | Select-String "Processor" | |
| Write-Host "" | |
| Write-Host "=== CPU Information (Get-ComputerInfo) ===" | |
| Get-ComputerInfo -Property "CsProcessors*" 2>$null || Write-Host "Get-ComputerInfo not available" | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86_64-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace) | |
| uses: actions/cache@v6 | |
| with: | |
| path: models/ | |
| # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; | |
| # bump the suffix when the model set / URLs change. | |
| key: gguf-models-v1 | |
| - name: Download text generation model | |
| run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME } | |
| - name: Download reranking model | |
| run: if (-not (Test-Path "models/$env:RERANKING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME } | |
| - name: Download draft model | |
| run: if (-not (Test-Path "models/$env:DRAFT_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME } | |
| - name: Download reasoning model | |
| run: if (-not (Test-Path "models/$env:REASONING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME } | |
| - name: Download tool-calling model | |
| run: if (-not (Test-Path "models/$env:TOOL_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME } | |
| - name: Download nomic embedding model (issue #98 regression) | |
| run: if (-not (Test-Path "models/$env:NOMIC_EMBED_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:NOMIC_EMBED_MODEL_URL --create-dirs -o models/$env:NOMIC_EMBED_MODEL_NAME } | |
| - name: Download vision model | |
| run: if (-not (Test-Path "models/$env:VISION_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME } | |
| - name: Download vision mmproj | |
| run: if (-not (Test-Path "models/$env:VISION_MMPROJ_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME } | |
| - name: Download TTS model (OuteTTS) | |
| run: if (-not (Test-Path "models/$env:TTS_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TTS_MODEL_URL --create-dirs -o models/$env:TTS_MODEL_NAME } | |
| - name: Download TTS vocoder (WavTokenizer) | |
| run: if (-not (Test-Path "models/$env:TTS_VOCODER_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TTS_VOCODER_URL --create-dirs -o models/$env:TTS_VOCODER_NAME } | |
| - name: List files in models directory | |
| run: ls -l models/ | |
| - name: Validate model files | |
| run: .github\validate-models.bat | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Memory before tests | |
| run: Get-CimInstance Win32_OperatingSystem | Select-Object FreePhysicalMemory,TotalVisibleMemorySize | Format-List | |
| shell: pwsh | |
| - name: Enable WER LocalDumps for java.exe | |
| # Windows Error Reporting writes minidumps when java.exe (or any other | |
| # registered process) crashes via __fastfail / abort / unhandled SEH. | |
| # We use it as the Windows analogue of Linux core dumps so that a JVM | |
| # crash inside the JNI layer leaves us a real native callstack instead | |
| # of just surefire's "VM terminated without saying goodbye" line. | |
| # DumpType=2 == MiniDumpWithFullMemory; the workspace dumps/ folder is | |
| # globbed by the failure-upload step below. | |
| shell: pwsh | |
| run: | | |
| $key = 'HKLM:\SOFTWARE\Microsoft\Windows\Windows Error Reporting\LocalDumps\java.exe' | |
| New-Item -Path $key -Force | Out-Null | |
| New-Item -Path "${{ github.workspace }}\dumps" -ItemType Directory -Force | Out-Null | |
| New-ItemProperty -Path $key -Name 'DumpFolder' -Value "${{ github.workspace }}\dumps" -PropertyType ExpandString -Force | Out-Null | |
| New-ItemProperty -Path $key -Name 'DumpType' -Value 2 -PropertyType DWord -Force | Out-Null | |
| New-ItemProperty -Path $key -Name 'DumpCount' -Value 5 -PropertyType DWord -Force | Out-Null | |
| Get-ItemProperty -Path $key | Format-List | |
| - name: Run tests | |
| run: | | |
| mvn -e --no-transfer-progress test ` | |
| "-Dnet.ladenthin.llama.tool.model=models/$env:TOOL_MODEL_NAME" ` | |
| "-Dnet.ladenthin.llama.nomic.path=models/$env:NOMIC_EMBED_MODEL_NAME" ` | |
| "-Dnet.ladenthin.llama.vision.model=models/$env:VISION_MODEL_NAME" ` | |
| "-Dnet.ladenthin.llama.vision.mmproj=models/$env:VISION_MMPROJ_NAME" ` | |
| "-Dnet.ladenthin.llama.vision.image=$env:VISION_IMAGE_PATH" ` | |
| "-Dnet.ladenthin.llama.tts.ttc.model=models/$env:TTS_MODEL_NAME" ` | |
| "-Dnet.ladenthin.llama.tts.vocoder.model=models/$env:TTS_VOCODER_NAME" | |
| - name: Memory after tests | |
| if: always() | |
| run: Get-CimInstance Win32_OperatingSystem | Select-Object FreePhysicalMemory,TotalVisibleMemorySize | Format-List | |
| shell: pwsh | |
| - if: failure() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: windows-output | |
| path: | | |
| ${{ github.workspace }}\hs_err_pid*.log | |
| ${{ github.workspace }}\*.hprof | |
| ${{ github.workspace }}\dumps\*.dmp | |
| ${{ github.workspace }}\target\surefire-reports\*.dump | |
| ${{ github.workspace }}\target\surefire-reports\*.dumpstream | |
| ${{ github.workspace }}\target\surefire-reports\*.txt | |
| ${{ github.workspace }}\target\surefire-reports\TEST-*.xml | |
| ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/**/* | |
| if-no-files-found: warn | |
| # Java/inference validation of the MSVC-built x86_64 DLL (the analogue of | |
| # test-java-windows-x86_64 for the default Ninja build). Loads the MSVC jllama.dll | |
| # via JNI and runs the full model-backed suite, so both Windows generators are | |
| # validated end-to-end before the `msvc-windows` classifier JAR ships. | |
| test-java-windows-x86_64-msvc: | |
| name: Java Tests Windows 2025 x86_64 (MSVC classifier) | |
| needs: build-windows-x86_64-msvc | |
| runs-on: windows-2025-vs2026 | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Display CPU Info | |
| shell: pwsh | |
| run: | | |
| Write-Host "=== CPU Information (Get-CimInstance - All Properties) ===" | |
| Get-CimInstance Win32_Processor | Select-Object * | Format-List | |
| Write-Host "" | |
| Write-Host "=== CPU Information (systeminfo) ===" | |
| systeminfo | Select-String "Processor" | |
| Write-Host "" | |
| Write-Host "=== CPU Information (Get-ComputerInfo) ===" | |
| Get-ComputerInfo -Property "CsProcessors*" 2>$null || Write-Host "Get-ComputerInfo not available" | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86_64-msvc | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace) | |
| uses: actions/cache@v6 | |
| with: | |
| path: models/ | |
| # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; | |
| # bump the suffix when the model set / URLs change. | |
| key: gguf-models-v1 | |
| - name: Download text generation model | |
| run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME } | |
| - name: Download reranking model | |
| run: if (-not (Test-Path "models/$env:RERANKING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME } | |
| - name: Download draft model | |
| run: if (-not (Test-Path "models/$env:DRAFT_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME } | |
| - name: Download reasoning model | |
| run: if (-not (Test-Path "models/$env:REASONING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME } | |
| - name: Download tool-calling model | |
| run: if (-not (Test-Path "models/$env:TOOL_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME } | |
| - name: Download nomic embedding model (issue #98 regression) | |
| run: if (-not (Test-Path "models/$env:NOMIC_EMBED_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:NOMIC_EMBED_MODEL_URL --create-dirs -o models/$env:NOMIC_EMBED_MODEL_NAME } | |
| - name: Download vision model | |
| run: if (-not (Test-Path "models/$env:VISION_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME } | |
| - name: Download vision mmproj | |
| run: if (-not (Test-Path "models/$env:VISION_MMPROJ_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME } | |
| - name: Download TTS model (OuteTTS) | |
| run: if (-not (Test-Path "models/$env:TTS_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TTS_MODEL_URL --create-dirs -o models/$env:TTS_MODEL_NAME } | |
| - name: Download TTS vocoder (WavTokenizer) | |
| run: if (-not (Test-Path "models/$env:TTS_VOCODER_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TTS_VOCODER_URL --create-dirs -o models/$env:TTS_VOCODER_NAME } | |
| - name: List files in models directory | |
| run: ls -l models/ | |
| - name: Validate model files | |
| run: .github\validate-models.bat | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Memory before tests | |
| run: Get-CimInstance Win32_OperatingSystem | Select-Object FreePhysicalMemory,TotalVisibleMemorySize | Format-List | |
| shell: pwsh | |
| - name: Enable WER LocalDumps for java.exe | |
| # Windows Error Reporting writes minidumps when java.exe (or any other | |
| # registered process) crashes via __fastfail / abort / unhandled SEH. | |
| # We use it as the Windows analogue of Linux core dumps so that a JVM | |
| # crash inside the JNI layer leaves us a real native callstack instead | |
| # of just surefire's "VM terminated without saying goodbye" line. | |
| # DumpType=2 == MiniDumpWithFullMemory; the workspace dumps/ folder is | |
| # globbed by the failure-upload step below. | |
| shell: pwsh | |
| run: | | |
| $key = 'HKLM:\SOFTWARE\Microsoft\Windows\Windows Error Reporting\LocalDumps\java.exe' | |
| New-Item -Path $key -Force | Out-Null | |
| New-Item -Path "${{ github.workspace }}\dumps" -ItemType Directory -Force | Out-Null | |
| New-ItemProperty -Path $key -Name 'DumpFolder' -Value "${{ github.workspace }}\dumps" -PropertyType ExpandString -Force | Out-Null | |
| New-ItemProperty -Path $key -Name 'DumpType' -Value 2 -PropertyType DWord -Force | Out-Null | |
| New-ItemProperty -Path $key -Name 'DumpCount' -Value 5 -PropertyType DWord -Force | Out-Null | |
| Get-ItemProperty -Path $key | Format-List | |
| - name: Run tests | |
| run: | | |
| mvn -e --no-transfer-progress test ` | |
| "-Dnet.ladenthin.llama.tool.model=models/$env:TOOL_MODEL_NAME" ` | |
| "-Dnet.ladenthin.llama.nomic.path=models/$env:NOMIC_EMBED_MODEL_NAME" ` | |
| "-Dnet.ladenthin.llama.vision.model=models/$env:VISION_MODEL_NAME" ` | |
| "-Dnet.ladenthin.llama.vision.mmproj=models/$env:VISION_MMPROJ_NAME" ` | |
| "-Dnet.ladenthin.llama.vision.image=$env:VISION_IMAGE_PATH" ` | |
| "-Dnet.ladenthin.llama.tts.ttc.model=models/$env:TTS_MODEL_NAME" ` | |
| "-Dnet.ladenthin.llama.tts.vocoder.model=models/$env:TTS_VOCODER_NAME" | |
| - name: Memory after tests | |
| if: always() | |
| run: Get-CimInstance Win32_OperatingSystem | Select-Object FreePhysicalMemory,TotalVisibleMemorySize | Format-List | |
| shell: pwsh | |
| - if: failure() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: windows-output-msvc | |
| path: | | |
| ${{ github.workspace }}\hs_err_pid*.log | |
| ${{ github.workspace }}\*.hprof | |
| ${{ github.workspace }}\dumps\*.dmp | |
| ${{ github.workspace }}\target\surefire-reports\*.dump | |
| ${{ github.workspace }}\target\surefire-reports\*.dumpstream | |
| ${{ github.workspace }}\target\surefire-reports\*.txt | |
| ${{ github.workspace }}\target\surefire-reports\TEST-*.xml | |
| ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/**/* | |
| if-no-files-found: warn | |
| # --------------------------------------------------------------------------- | |
| # Package and publish | |
| # --------------------------------------------------------------------------- | |
| package: | |
| name: Package JARs | |
| needs: | |
| - crosscompile-linux-x86_64-cuda | |
| - crosscompile-linux-aarch64 | |
| - crosscompile-android-aarch64 | |
| - crosscompile-android-aarch64-opencl | |
| - build-windows-x86_64 | |
| - build-windows-x86 | |
| - build-windows-x86_64-msvc | |
| - build-windows-x86-msvc | |
| - build-windows-x86_64-cuda | |
| - build-windows-x86_64-vulkan | |
| - build-windows-x86_64-opencl | |
| - test-cpp-linux-x86_64 | |
| - build-macos-arm64-metal-15 | |
| - test-java-linux-x86_64 | |
| - test-java-macos-arm64-metal | |
| - test-java-macos-arm64-no-metal | |
| - test-java-macos-arm64-metal-15 | |
| - test-java-windows-x86_64 | |
| - test-java-windows-x86_64-msvc | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| pattern: "*-libraries" | |
| merge-multiple: true | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: linux-libraries-cuda | |
| path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: android-libraries-opencl | |
| path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/ | |
| # MSVC-built Windows natives -> `msvc-windows` classifier tree. The default JAR | |
| # now ships the Ninja `*-libraries` natives merged above (default flip). | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86_64-msvc | |
| path: ${{ github.workspace }}/src/main/resources_windows_msvc/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86-msvc | |
| path: ${{ github.workspace }}/src/main/resources_windows_msvc/net/ladenthin/llama/ | |
| # Windows GPU classifiers (x86_64 only) -> one tree each. | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86_64-cuda | |
| path: ${{ github.workspace }}/src/main/resources_windows_cuda/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86_64-vulkan | |
| path: ${{ github.workspace }}/src/main/resources_windows_vulkan/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86_64-opencl | |
| path: ${{ github.workspace }}/src/main/resources_windows_opencl/net/ladenthin/llama/ | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Build JARs | |
| # `assembly` additionally produces the fat jar-with-dependencies uber JAR | |
| # (llama-<version>-jar-with-dependencies.jar: library classes + Java runtime deps + | |
| # default-platform native libs in one drop-on-classpath JAR, runnable via its | |
| # OpenAiCompatServer Main-Class). It lands in target/ and is uploaded in the `llama-jars` | |
| # artifact below - a CI run artifact only, not a Maven Central / GitHub-Release asset. | |
| # Windows classifier JARs: `windows-msvc` (MSVC-built CPU natives) plus the GPU | |
| # backends `cuda-windows` / `vulkan-windows` / `opencl-windows`. The default JAR's | |
| # Windows natives are the Ninja `*-libraries` merged into src/main/resources/ above. | |
| run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-msvc,cuda-windows,vulkan-windows,opencl-windows,assembly -Dmaven.test.skip=true -Dgpg.skip=true package | |
| - name: Upload JARs | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: llama-jars | |
| path: target/*.jar | |
| report: | |
| name: Report | |
| needs: [package] | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: write | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - uses: actions/setup-java@v5 | |
| with: { java-version: '${{ env.JAVA_VERSION }}', distribution: temurin } | |
| - uses: actions/download-artifact@v8 | |
| with: { name: jacoco-report, path: target/site/jacoco/ } | |
| continue-on-error: true | |
| - uses: advanced-security/maven-dependency-submission-action@v5 | |
| - name: Coveralls | |
| uses: coverallsapp/github-action@v2 | |
| with: | |
| github-token: ${{ secrets.GITHUB_TOKEN }} | |
| file: target/site/jacoco/jacoco.xml | |
| format: jacoco | |
| continue-on-error: true | |
| - name: Codecov | |
| uses: codecov/codecov-action@v7 | |
| with: | |
| token: ${{ secrets.CODECOV_TOKEN }} | |
| files: target/site/jacoco/jacoco.xml | |
| continue-on-error: true | |
| check-snapshot: | |
| name: "Check: main branch / SNAPSHOT" | |
| needs: [report] | |
| runs-on: ubuntu-latest | |
| if: >- | |
| (github.event_name == 'push' && github.ref == 'refs/heads/main') || | |
| (github.event_name == 'workflow_dispatch' && !startsWith(github.ref, 'refs/tags/v')) | |
| steps: | |
| - name: Confirm snapshot ref | |
| run: echo "Confirmed on snapshot ref ${{ github.ref }}" | |
| check-tag: | |
| name: "Check: v* tag" | |
| needs: [report] | |
| runs-on: ubuntu-latest | |
| if: startsWith(github.ref, 'refs/tags/v') | |
| steps: | |
| - name: Confirm tag ref | |
| run: echo "Confirmed on tag ${{ github.ref }}" | |
| publish-snapshot: | |
| name: Publish Snapshot to Central | |
| needs: [check-snapshot, crosscompile-linux-x86_64-cuda, crosscompile-android-aarch64-opencl, code-style] | |
| if: needs.check-snapshot.result == 'success' && inputs.publish_to_central | |
| runs-on: ubuntu-latest | |
| environment: maven-central | |
| permissions: | |
| contents: write | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| pattern: "*-libraries" | |
| merge-multiple: true | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: linux-libraries-cuda | |
| path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: android-libraries-opencl | |
| path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86_64-msvc | |
| path: ${{ github.workspace }}/src/main/resources_windows_msvc/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86-msvc | |
| path: ${{ github.workspace }}/src/main/resources_windows_msvc/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86_64-cuda | |
| path: ${{ github.workspace }}/src/main/resources_windows_cuda/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86_64-vulkan | |
| path: ${{ github.workspace }}/src/main/resources_windows_vulkan/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86_64-opencl | |
| path: ${{ github.workspace }}/src/main/resources_windows_opencl/net/ladenthin/llama/ | |
| - name: Set up Maven Central Repository | |
| uses: actions/setup-java@v5 | |
| with: | |
| java-version: ${{ env.JAVA_VERSION }} | |
| distribution: 'temurin' | |
| server-id: central | |
| server-username: MAVEN_USERNAME | |
| server-password: MAVEN_PASSWORD | |
| gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }} | |
| gpg-passphrase: MAVEN_GPG_PASSPHRASE | |
| - name: Guard - require a -SNAPSHOT version | |
| shell: bash | |
| run: | | |
| VERSION=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.version | tail -n1) | |
| echo "Resolved project version: $VERSION" | |
| case "$VERSION" in | |
| *-SNAPSHOT) echo "OK: -SNAPSHOT version, continuing snapshot deploy." ;; | |
| *) echo "::error::Refusing to publish non-SNAPSHOT version '$VERSION' from the snapshot job. Snapshot publishing requires a -SNAPSHOT version; releases go through the v* tag path."; exit 1 ;; | |
| esac | |
| - name: Publish snapshot | |
| run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-msvc,cuda-windows,vulkan-windows,opencl-windows -Dmaven.test.skip=true deploy | |
| env: | |
| MAVEN_USERNAME: ${{ secrets.CENTRAL_USERNAME }} | |
| MAVEN_PASSWORD: ${{ secrets.CENTRAL_TOKEN }} | |
| MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} | |
| - name: Collect signed artifacts | |
| run: | | |
| mkdir -p signed-snapshot-assets | |
| cp target/*.jar signed-snapshot-assets/ 2>/dev/null || true | |
| cp target/*.jar.asc signed-snapshot-assets/ 2>/dev/null || true | |
| - uses: actions/upload-artifact@v7 | |
| with: | |
| name: signed-snapshot-assets | |
| path: signed-snapshot-assets/ | |
| github-snapshot: | |
| name: Update Snapshot Pre-release on GitHub | |
| needs: [publish-snapshot] | |
| if: needs.publish-snapshot.result == 'success' | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: write | |
| steps: | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: signed-snapshot-assets | |
| path: snapshot-assets/ | |
| - name: Update snapshot pre-release | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| gh release view snapshot --repo ${{ github.repository }} 2>/dev/null \ | |
| || gh release create snapshot \ | |
| --repo ${{ github.repository }} \ | |
| --prerelease \ | |
| --title "Snapshot (latest)" \ | |
| --notes "Latest snapshot build from the main branch." | |
| gh release upload snapshot snapshot-assets/* \ | |
| --repo ${{ github.repository }} \ | |
| --clobber | |
| publish-release: | |
| name: Publish Release to Central | |
| if: needs.check-tag.result == 'success' && inputs.publish_to_central | |
| needs: [check-tag, crosscompile-linux-x86_64-cuda, crosscompile-android-aarch64-opencl, code-style] | |
| runs-on: ubuntu-latest | |
| environment: maven-central | |
| permissions: | |
| contents: write | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| pattern: "*-libraries" | |
| merge-multiple: true | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: linux-libraries-cuda | |
| path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: android-libraries-opencl | |
| path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86_64-msvc | |
| path: ${{ github.workspace }}/src/main/resources_windows_msvc/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86-msvc | |
| path: ${{ github.workspace }}/src/main/resources_windows_msvc/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86_64-cuda | |
| path: ${{ github.workspace }}/src/main/resources_windows_cuda/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86_64-vulkan | |
| path: ${{ github.workspace }}/src/main/resources_windows_vulkan/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86_64-opencl | |
| path: ${{ github.workspace }}/src/main/resources_windows_opencl/net/ladenthin/llama/ | |
| - name: Set up Maven Central Repository | |
| uses: actions/setup-java@v5 | |
| with: | |
| java-version: ${{ env.JAVA_VERSION }} | |
| distribution: 'temurin' | |
| server-id: central | |
| server-username: MAVEN_USERNAME | |
| server-password: MAVEN_PASSWORD | |
| gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }} | |
| gpg-passphrase: MAVEN_GPG_PASSPHRASE | |
| - name: Publish release | |
| run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-msvc,cuda-windows,vulkan-windows,opencl-windows -Dmaven.test.skip=true deploy | |
| env: | |
| MAVEN_USERNAME: ${{ secrets.CENTRAL_USERNAME }} | |
| MAVEN_PASSWORD: ${{ secrets.CENTRAL_TOKEN }} | |
| MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} | |
| - name: Collect signed artifacts | |
| run: | | |
| mkdir -p signed-release-assets | |
| cp target/*.jar signed-release-assets/ 2>/dev/null || true | |
| cp target/*.jar.asc signed-release-assets/ 2>/dev/null || true | |
| - uses: actions/upload-artifact@v7 | |
| with: | |
| name: signed-release-assets | |
| path: signed-release-assets/ | |
| github-release-signed: | |
| name: Attach Signed Binaries to GitHub Release | |
| needs: [publish-release] | |
| if: needs.publish-release.result == 'success' | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: write | |
| steps: | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: signed-release-assets | |
| path: release-assets/ | |
| - name: Upload release assets | |
| uses: softprops/action-gh-release@v3 | |
| with: | |
| files: release-assets/* |