diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml index cbb27049..cd1284d1 100644 --- a/.github/workflows/claude-code-review.yml +++ b/.github/workflows/claude-code-review.yml @@ -20,7 +20,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v6.0.3 + uses: actions/checkout@v6 with: fetch-depth: 1 diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml index 68eb725c..225c705c 100644 --- a/.github/workflows/claude.yml +++ b/.github/workflows/claude.yml @@ -31,7 +31,7 @@ jobs: actions: read # Required for Claude to read CI results on PRs steps: - name: Checkout repository - uses: actions/checkout@v6.0.3 + uses: actions/checkout@v6 with: fetch-depth: 1 diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 7879c6ea..9f031739 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -19,8 +19,8 @@ jobs: contents: read security-events: write steps: - - uses: actions/checkout@v6.0.3 - - uses: actions/setup-java@v5.2.0 + - uses: actions/checkout@v6 + - uses: actions/setup-java@v5 with: { java-version: '21', distribution: temurin } - uses: github/codeql-action/init@v4 with: { languages: java, queries: +security-and-quality } diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index c66584b2..f06b9e75 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -61,8 +61,8 @@ jobs: needs: startgate runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6.0.3 - - uses: actions/setup-java@v5.2.0 + - uses: actions/checkout@v6 + - uses: actions/setup-java@v5 with: java-version: '21' distribution: temurin @@ -80,7 +80,7 @@ jobs: needs: startgate runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6.0.3 + - uses: actions/checkout@v6 - name: Display CPU Info shell: bash run: | @@ -104,7 +104,7 @@ jobs: needs: startgate runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6.0.3 + - uses: actions/checkout@v6 - name: Display CPU Info shell: bash run: | @@ -128,7 +128,7 @@ jobs: needs: startgate runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6.0.3 + - uses: actions/checkout@v6 - name: Display CPU Info shell: bash run: | @@ -152,7 +152,7 @@ jobs: needs: startgate runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6.0.3 + - uses: actions/checkout@v6 - name: Display CPU Info shell: bash run: | @@ -176,7 +176,7 @@ jobs: needs: startgate runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6.0.3 + - uses: actions/checkout@v6 - name: Build libraries shell: bash run: | @@ -196,8 +196,8 @@ jobs: needs: startgate runs-on: macos-15 steps: - - uses: actions/checkout@v6.0.3 - - uses: actions/setup-java@v5.2.0 + - uses: actions/checkout@v6 + - uses: actions/setup-java@v5 with: distribution: 'temurin' java-version: ${{ env.JAVA_VERSION }} @@ -227,8 +227,8 @@ jobs: needs: startgate runs-on: macos-14 steps: - - uses: actions/checkout@v6.0.3 - - uses: actions/setup-java@v5.2.0 + - uses: actions/checkout@v6 + - uses: actions/setup-java@v5 with: distribution: 'temurin' java-version: ${{ env.JAVA_VERSION }} @@ -258,7 +258,7 @@ jobs: needs: startgate runs-on: windows-2025-vs2026 steps: - - uses: actions/checkout@v6.0.3 + - uses: actions/checkout@v6 - name: Display CPU Info shell: pwsh run: | @@ -287,7 +287,7 @@ jobs: needs: startgate runs-on: windows-2025-vs2026 steps: - - uses: actions/checkout@v6.0.3 + - uses: actions/checkout@v6 - name: Display CPU Info shell: pwsh run: | @@ -320,8 +320,8 @@ jobs: needs: startgate runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6.0.3 - - uses: actions/setup-java@v5.2.0 + - uses: actions/checkout@v6 + - uses: actions/setup-java@v5 with: distribution: 'temurin' java-version: ${{ env.JAVA_VERSION }} @@ -344,8 +344,8 @@ jobs: needs: startgate runs-on: macos-15 steps: - - uses: actions/checkout@v6.0.3 - - uses: actions/setup-java@v5.2.0 + - uses: actions/checkout@v6 + - uses: actions/setup-java@v5 with: distribution: 'temurin' java-version: ${{ env.JAVA_VERSION }} @@ -379,7 +379,7 @@ jobs: needs: crosscompile-linux-x86_64 runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6.0.3 + - uses: actions/checkout@v6 - name: Display CPU Info shell: bash run: | @@ -410,7 +410,7 @@ jobs: run: ls -l models/ - name: Validate model files run: bash .github/validate-models.sh - - uses: actions/setup-java@v5.2.0 + - uses: actions/setup-java@v5 with: distribution: 'temurin' java-version: ${{ env.JAVA_VERSION }} @@ -434,7 +434,7 @@ jobs: path: target/site/jacoco/jacoco.xml if-no-files-found: ignore - name: Run PIT mutation tests - run: mvn --batch-mode --no-transfer-progress test-compile org.pitest:pitest-maven:mutationCoverage -Dmaven.javadoc.skip=true + run: mvn --batch-mode --no-transfer-progress test-compile org.pitest:pitest-maven:mutationCoverage - name: Extract PIT survivors if: always() run: | @@ -471,7 +471,7 @@ jobs: needs: build-macos-arm64-metal runs-on: macos-14 steps: - - uses: actions/checkout@v6.0.3 + - uses: actions/checkout@v6 - name: Display CPU Info shell: bash run: | @@ -500,7 +500,7 @@ jobs: run: ls -l models/ - name: Validate model files run: bash .github/validate-models.sh - - uses: actions/setup-java@v5.2.0 + - uses: actions/setup-java@v5 with: distribution: 'temurin' java-version: ${{ env.JAVA_VERSION }} @@ -535,7 +535,7 @@ jobs: needs: build-macos-arm64-no-metal runs-on: macos-15 steps: - - uses: actions/checkout@v6.0.3 + - uses: actions/checkout@v6 - name: Display CPU Info shell: bash run: | @@ -564,7 +564,7 @@ jobs: run: ls -l models/ - name: Validate model files run: bash .github/validate-models.sh - - uses: actions/setup-java@v5.2.0 + - uses: actions/setup-java@v5 with: distribution: 'temurin' java-version: ${{ env.JAVA_VERSION }} @@ -599,7 +599,7 @@ jobs: needs: build-macos-arm64-metal-15 runs-on: macos-15 steps: - - uses: actions/checkout@v6.0.3 + - uses: actions/checkout@v6 - name: Display CPU Info shell: bash run: | @@ -628,7 +628,7 @@ jobs: run: ls -l models/ - name: Validate model files run: bash .github/validate-models.sh - - uses: actions/setup-java@v5.2.0 + - uses: actions/setup-java@v5 with: distribution: 'temurin' java-version: ${{ env.JAVA_VERSION }} @@ -663,7 +663,7 @@ jobs: needs: build-windows-x86_64 runs-on: windows-2025-vs2026 steps: - - uses: actions/checkout@v6.0.3 + - uses: actions/checkout@v6 - name: Display CPU Info shell: pwsh run: | @@ -695,7 +695,7 @@ jobs: run: ls -l models/ - name: Validate model files run: .github\validate-models.bat - - uses: actions/setup-java@v5.2.0 + - uses: actions/setup-java@v5 with: distribution: 'temurin' java-version: ${{ env.JAVA_VERSION }} @@ -765,7 +765,7 @@ jobs: - test-java-windows-x86_64 runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6.0.3 + - uses: actions/checkout@v6 - uses: actions/download-artifact@v8 with: pattern: "*-libraries" @@ -779,7 +779,7 @@ jobs: with: name: android-libraries-opencl path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/ - - uses: actions/setup-java@v5.2.0 + - uses: actions/setup-java@v5 with: distribution: 'temurin' java-version: ${{ env.JAVA_VERSION }} @@ -798,8 +798,8 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@v6.0.3 - - uses: actions/setup-java@v5.2.0 + - uses: actions/checkout@v6 + - uses: actions/setup-java@v5 with: { java-version: '${{ env.JAVA_VERSION }}', distribution: temurin } - uses: actions/download-artifact@v8 with: { name: jacoco-report, path: target/site/jacoco/ } @@ -848,7 +848,7 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@v6.0.3 + - uses: actions/checkout@v6 - uses: actions/download-artifact@v8 with: pattern: "*-libraries" @@ -863,7 +863,7 @@ jobs: name: android-libraries-opencl path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/ - name: Set up Maven Central Repository - uses: actions/setup-java@v5.2.0 + uses: actions/setup-java@v5 with: java-version: ${{ env.JAVA_VERSION }} distribution: 'temurin' @@ -923,7 +923,7 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@v6.0.3 + - uses: actions/checkout@v6 - uses: actions/download-artifact@v8 with: pattern: "*-libraries" @@ -938,7 +938,7 @@ jobs: name: android-libraries-opencl path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/ - name: Set up Maven Central Repository - uses: actions/setup-java@v5.2.0 + uses: actions/setup-java@v5 with: java-version: ${{ env.JAVA_VERSION }} distribution: 'temurin' diff --git a/.github/workflows/reuse.yml b/.github/workflows/reuse.yml index e7ac6620..715c2715 100644 --- a/.github/workflows/reuse.yml +++ b/.github/workflows/reuse.yml @@ -13,5 +13,5 @@ jobs: test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6.0.3 + - uses: actions/checkout@v6 - uses: fsfe/reuse-action@v6 diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index 1a67b9e5..05fd2c29 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -25,7 +25,7 @@ jobs: actions: read steps: - name: Checkout - uses: actions/checkout@v6.0.3 + uses: actions/checkout@v6 with: persist-credentials: false diff --git a/.github/workflows/sonarqube.yml b/.github/workflows/sonarqube.yml index 316149f5..08ce65a3 100644 --- a/.github/workflows/sonarqube.yml +++ b/.github/workflows/sonarqube.yml @@ -14,22 +14,22 @@ jobs: name: Build and analyze runs-on: ubuntu-latest steps: - - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + - uses: actions/checkout@v6 with: fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis - name: Set up JDK 21 - uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 + uses: actions/setup-java@v5 with: java-version: 21 distribution: 'zulu' - name: Cache SonarQube packages - uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + uses: actions/cache@v5 with: path: ~/.sonar/cache key: ${{ runner.os }}-sonar restore-keys: ${{ runner.os }}-sonar - name: Cache Maven packages - uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + uses: actions/cache@v5 with: path: ~/.m2 key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} diff --git a/CLAUDE.md b/CLAUDE.md index 1817f11d..c4225013 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,7 +6,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co Java bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp) via JNI, providing a high-level API for LLM inference in Java. The Java layer communicates with a native C++ library through JNI. -Current llama.cpp pinned version: **b9621** +Current llama.cpp pinned version: **b9637** ## Upgrading CUDA Version diff --git a/CMakeLists.txt b/CMakeLists.txt index 3f812d72..df6f10d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -139,7 +139,7 @@ set(LLAMA_BUILD_APP OFF CACHE BOOL "" FORCE) FetchContent_Declare( llama.cpp GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git - GIT_TAG b9621 + GIT_TAG b9637 ) FetchContent_MakeAvailable(llama.cpp) diff --git a/README.md b/README.md index aa3fd704..caafe9d0 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ **Build:** ![Java 8+](https://img.shields.io/badge/Java-8%2B-informational) ![Platform](https://img.shields.io/badge/Platform-Linux%20%7C%20macOS%20%7C%20Windows%20%7C%20Android-lightgrey) -[![llama.cpp b9621](https://img.shields.io/badge/llama.cpp-%23b9621-informational)](https://github.com/ggml-org/llama.cpp/releases/tag/b9621) +[![llama.cpp b9637](https://img.shields.io/badge/llama.cpp-%23b9637-informational)](https://github.com/ggml-org/llama.cpp/releases/tag/b9637) [![JPMS](https://img.shields.io/badge/JPMS-modular%20JAR-25A162)](https://openjdk.org/projects/jigsaw/) ![JUnit](https://img.shields.io/badge/tested%20with-JUnit6-25A162) [![JSpecify](https://img.shields.io/badge/JSpecify-1.0.0%20%40NullMarked-25A162)](https://jspecify.dev) diff --git a/docs/history/llama-cpp-breaking-changes.md b/docs/history/llama-cpp-breaking-changes.md index aa8f4f10..ff663035 100644 --- a/docs/history/llama-cpp-breaking-changes.md +++ b/docs/history/llama-cpp-breaking-changes.md @@ -346,3 +346,9 @@ Used during `llama.cpp` version bumps: when upgrading, scan this file from the r | ~b9555–b9621 | `ggml/src/ggml-vulkan/` + Vulkan shaders | New `VK_VALVE_shader_mixed_float_dot_product` extension support for F16→F32 fused dot products (`dot2_f16`) in flash attention and GEMM matmul. Internal Vulkan backend, no project changes required | | ~b9555–b9621 | `ggml/src/ggml-opencl/` + OpenCL kernels | New Q5_0 and Q5_1 GEMM/GEMV noshuffle kernels for Qualcomm Adreno GPUs. Internal OpenCL backend (affects `opencl-android-aarch64` classifier build only); no project source changes required | | ~b9555–b9621 | `ggml/src/ggml-cuda/ssm-scan.cu` | Added `__syncthreads()` before the final reduction stage to prevent shared-memory race conditions on multi-warp SSM scan. Bug fix, internal CUDA backend, no project changes required | +| b9621–b9637 | `common/chat.cpp` | New Cohere2 MoE ("North Code") chat parser `common_chat_params_init_cohere2moe` + auto-detection (template containing `<\|START_TEXT\|>` and `<\|START_ACTION\|>`). Purely additive — compiled in the `chat.cpp` TU and reached through the existing specialized-template path, so the project's `oaicompat_chat_params_parse` picks it up automatically. No project source changes required. **New feature:** Cohere2 MoE reasoning + JSON tool-call chat support | +| b9621–b9637 | `common/jinja/runtime.cpp`, `common/jinja/value.cpp` | Jinja chat-template engine fixes: filter aliases `count`→`length`, `d`→`default`, `e`→`escape`; negative-step slice start/stop defaults; `split` raises on empty separator; `replace('', x)` now expands between every char. Compiled into `common`; improves chat-template compatibility automatically. No project source changes required | +| b9621–b9637 | `src/llama-arch.{h,cpp}`, `src/models/cohere2moe.cpp` (new), `src/models/models.h`, `src/llama-model.cpp`, `src/llama-model-saver.cpp`, `src/llama-vocab.cpp` | New `LLM_ARCH_COHERE2MOE` architecture (MoE + MTP/NextN) with `llama_model_cohere2moe`; `cohere2moe` tokenizer pre-type (maps to `LLAMA_VOCAB_PRE_TYPE_TINY_AYA`); Cohere2 dense path gains `ffn_*_s` NVFP4 scale tensors; tied-NVFP4-`output` assert relaxed to allow sidecar LM-head scales. Additive enum/struct internal to libllama; the project includes `llama.h`, not `llama-arch.h`/`models.h`, and switches on no arch enum. No project source changes required. **New feature:** loads North-Mini-Code GGUFs | +| b9621–b9637 | `ggml/src/ggml-vulkan/` + shaders | Unary shaders consolidated into one templated `unary.comp`; new `EXPM1` Vulkan op; GLU push-constants reworked (per-dim strides + misalign offsets); fastdiv `L` values byte-packed to stay under the 128B push-constant limit. Internal Vulkan backend — the project builds CPU/CUDA/Metal/OpenCL only, never Vulkan. No project changes required | +| b9621–b9637 | `tools/server/server-http.cpp`, `tools/ui/`, `scripts/ui-assets.cmake` | Optional gzip-compressed WebUI asset serving (`LLAMA_UI_GZIP`, `llama_ui_use_gzip()`). The project compiles `server-context/queue/task/models` but not `server-http.cpp` or `tools/ui`, so the HTTP/WebUI layer is absent from `jllama`. No project changes required | +| b9621–b9637 | `tools/cli/cli.cpp`, `.devops/*.Dockerfile`, `.github/`, `conversion/`, `convert_hf_to_gguf_update.py`, `gguf-py/`, `models/templates/Cohere2MoE.jinja`, `docs/`, `tests/` | CLI preserved-token wiring, Docker image `docker.io/` prefixes, CI labeler/release tweaks, Python GGUF converters, the new model template asset, doc typos, and upstream tests. None are compiled into `jllama` or shipped by the project. No project changes required |