Merge pull request #239 from bernardladenthin/claude/intelligent-cray-9tfnxv

bernardladenthin · web-flow · commit 91d9799260ac · 2026-06-17T16:02:44.000+02:00
Upgrade llama.cpp to b9682 and improve CI test diagnostics
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -422,7 +422,7 @@ jobs:
           echo "${{ github.workspace }}/core.%e.%p" | sudo tee /proc/sys/kernel/core_pattern
       - name: Run tests
         run: |
-          mvn --no-transfer-progress -P jcstress test \
+          mvn -e --no-transfer-progress -P jcstress test \
             -Dnet.ladenthin.llama.nomic.path=models/${NOMIC_EMBED_MODEL_NAME} \
             -Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \
             -Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \
@@ -539,7 +539,7 @@ jobs:
         run: ulimit -c unlimited
       - name: Run tests
         run: |
-          mvn --no-transfer-progress -Dnet.ladenthin.llama.test.ngl=0 test \
+          mvn -e --no-transfer-progress -Dnet.ladenthin.llama.test.ngl=0 test \
             -Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \
             -Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \
             -Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH}
@@ -603,7 +603,7 @@ jobs:
         run: ulimit -c unlimited
       - name: Run tests
         run: |
-          mvn --no-transfer-progress test \
+          mvn -e --no-transfer-progress test \
             -Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \
             -Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \
             -Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH}
@@ -667,7 +667,7 @@ jobs:
         run: ulimit -c unlimited
       - name: Run tests
         run: |
-          mvn --no-transfer-progress test \
+          mvn -e --no-transfer-progress test \
             -Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \
             -Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \
             -Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH}
@@ -750,7 +750,7 @@ jobs:
           Get-ItemProperty -Path $key | Format-List
       - name: Run tests
         run: |
-          mvn --no-transfer-progress test `
+          mvn -e --no-transfer-progress test `
             "-Dnet.ladenthin.llama.vision.model=models/$env:VISION_MODEL_NAME" `
             "-Dnet.ladenthin.llama.vision.mmproj=models/$env:VISION_MMPROJ_NAME" `
             "-Dnet.ladenthin.llama.vision.image=$env:VISION_IMAGE_PATH"
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -6,7 +6,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 
 Java bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp) via JNI, providing a high-level API for LLM inference in Java. The Java layer communicates with a native C++ library through JNI.
 
-Current llama.cpp pinned version: **b9642**
+Current llama.cpp pinned version: **b9682**
 
 ## Upgrading CUDA Version
 
@@ -590,7 +590,7 @@ ctest --test-dir build --output-on-failure -R "ResultsToJson"
 
 #### Upstream source location (in CMake build tree)
 
-llama.cpp is fetched via CMake FetchContent, pinned to `GIT_TAG b9642`.
+llama.cpp is fetched via CMake FetchContent, pinned to `GIT_TAG b9682`.
 
 ```
 build/_deps/llama.cpp-src/tools/server/   ← server-task.h, server-common.h, etc.
@@ -763,6 +763,10 @@ See [`../workspace/policies/jqwik-prompt-injection.md`](../workspace/policies/jq
 
 See [`../workspace/policies/lombok-config.md`](../workspace/policies/lombok-config.md).
 
+## CI Test Diagnostics
+
+See [`../workspace/policies/ci-test-diagnostics.md`](../workspace/policies/ci-test-diagnostics.md).
+
 ## JPMS Module Descriptor
 
 This repo ships a `module-info.java` compiled in a separate `release 9` execution. Javadoc
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -139,7 +139,7 @@ set(LLAMA_BUILD_APP OFF CACHE BOOL "" FORCE)
 FetchContent_Declare(
 	llama.cpp
 	GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git
-	GIT_TAG        b9642
+	GIT_TAG        b9682
 )
 FetchContent_MakeAvailable(llama.cpp)
 
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 **Build:**  
 ![Java 8+](https://img.shields.io/badge/Java-8%2B-informational)  
 ![Platform](https://img.shields.io/badge/Platform-Linux%20%7C%20macOS%20%7C%20Windows%20%7C%20Android-lightgrey)  
-[![llama.cpp b9642](https://img.shields.io/badge/llama.cpp-%23b9642-informational)](https://github.com/ggml-org/llama.cpp/releases/tag/b9642)  
+[![llama.cpp b9682](https://img.shields.io/badge/llama.cpp-%23b9682-informational)](https://github.com/ggml-org/llama.cpp/releases/tag/b9682)  
 [![JPMS](https://img.shields.io/badge/JPMS-modular%20JAR-25A162)](https://openjdk.org/projects/jigsaw/)  
 ![JUnit](https://img.shields.io/badge/tested%20with-JUnit6-25A162)  
 [![JSpecify](https://img.shields.io/badge/JSpecify-1.0.0%20%40NullMarked-25A162)](https://jspecify.dev)  
diff --git a/docs/history/llama-cpp-breaking-changes.md b/docs/history/llama-cpp-breaking-changes.md
@@ -356,3 +356,8 @@ Used during `llama.cpp` version bumps: when upgrading, scan this file from the r
 | b9637–b9642 | `ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl` | WebGPU matmul shared-memory dequant templates rewritten: legacy/k-quant `#elif` chains converted to independent `#if defined(...)` blocks, and the i-quant (super-block 256) IQ1/IQ2/IQ3/IQ4 paths reworked to process `NQ` quants per thread with vectorized `store_shmem_iquants`/`create_iq_gw4` helpers. Internal WebGPU backend — the project builds CPU/CUDA/Metal/OpenCL only, never WebGPU. No project changes required |
 | b9637–b9642 | `tools/ui/`, `tools/ui/src/lib/utils/heic-to-jpeg.ts` (new) | WebUI gains a "render thinking as Markdown" display setting and client-side HEIC/HEIF image upload support (lazy CDN-loaded `heic-to` decoder → JPEG). The project compiles `server-context/queue/task/models` but not `tools/ui`, so the WebUI is absent from `jllama`. No project changes required |
 | b9637–b9642 | `convert_lora_to_gguf.py`, `tests/test-backend-ops.cpp` | LoRA converter now resolves the base-model architecture via `get_model_architecture(hparams, ModelType.TEXT)` instead of hand-reading `text_config`/`architectures`; a `GGML_TYPE_BF16` `test_repeat` case was added to the backend-ops test. Python tooling and an upstream test — neither is compiled into `jllama`. No project changes required |
+| b9642–b9682 | `tools/mtmd/mtmd-helper.h` + `tools/mtmd/mtmd-helper.cpp` | `mtmd_helper_decode_image_chunk` gained two parameters — a post-decode callback plus its `user_data` — so callers can hook each decoded multimodal chunk; the standalone `process_chunk` helper was removed and folded into `mtmd_helper_eval_chunk_single`. Consumed only inside the upstream-compiled `mtmd-helper.cpp` / `server-context.cpp`; the project's hand-written C++ references no `mtmd_*`/`process_chunk` symbol (zero matches in `src/main/cpp`). No project source changes required. **New feature:** the post-decode callback enables multimodal speculative-draft decoding — exposable later as a vision + draft-model Java path |
+| b9642–b9682 | `common/common.cpp` (`build_lora_mm_id`) | The LoRA multimodal id-embedding builder gained a `w_s` scale-weight argument for per-adapter scaling. Internal to the upstream-compiled `common` library; the project never calls it. No project source changes required |
+| b9642–b9682 | `common/speculative.{h,cpp}` | Speculative decoding now accumulates per-draft-position acceptance statistics and adds an Eagle3 backend-sampling path (the draft model samples on the compute backend). `common_speculative_*` is compiled into `common` and reached only through the upstream server's speculative slot; the project's C++ references no `speculative`/`draft` symbol. No project source changes required. **New feature:** per-position draft-acceptance metrics — could surface as speculative-decoding telemetry in a future Java API |
+| b9642–b9682 | `tools/server/server-context.cpp` | Server slot refactored so an `mtmd` (multimodal) prompt can feed a speculative draft model: image/media chunks are routed through the new `mtmd_helper_decode_image_chunk` callback before drafting. Compiled directly into `jllama` (the project builds `server-context/queue/task/models`), but the change is internal to the slot state machine and binds no new/renamed symbol; verified that `jllama.cpp` and the `*_helpers.hpp` headers call none of the touched functions. No project source changes required |
+| b9642–b9682 | `ggml/src/ggml-*` backends, `tools/` (incl. `llama-bench --offline`), conda-forge packaging, `docs/`, `.github/` | Routine backend kernel updates and tooling/docs/CI tweaks (a new `llama-bench --offline` flag, conda-forge recipe notes). None are compiled into `jllama` beyond the already-built CPU/CUDA/Metal/OpenCL backends, and none change a symbol the project binds. No project changes required |
diff --git a/pom.xml b/pom.xml
@@ -80,7 +80,7 @@ SPDX-License-Identifier: MIT
 		<spotbugs.version>4.10.2.0</spotbugs.version>
 		<fb-contrib.version>7.7.4</fb-contrib.version>
 		<findsecbugs.version>1.14.0</findsecbugs.version>
-		<spotless.version>3.6.0</spotless.version>
+		<spotless.version>3.7.0</spotless.version>
 		<palantir-java-format.version>2.92.0</palantir-java-format.version>
 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 		<project.build.outputTimestamp>${git.commit.time}</project.build.outputTimestamp>
@@ -330,7 +330,7 @@ SPDX-License-Identifier: MIT
 				<plugin>
 					<groupId>org.sonatype.central</groupId>
 					<artifactId>central-publishing-maven-plugin</artifactId>
-					<version>0.10.0</version>
+					<version>0.11.0</version>
 				</plugin>
 			</plugins>
 		</pluginManagement>
@@ -587,7 +587,7 @@ SPDX-License-Identifier: MIT
 				<groupId>org.apache.maven.plugins</groupId>
 				<artifactId>maven-surefire-plugin</artifactId>
 				<configuration>
-					<argLine>@{argLine} -XX:ErrorFile=hs_err_pid%p.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=.</argLine>
+					<argLine>@{argLine} -Xmx2g -XX:ErrorFile=hs_err_pid%p.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=.</argLine>
 					<!--
 					  Capture each test class's stdout/stderr into
 					  target/surefire-reports/<class>-output.txt. When a native crash

Original file line number	Diff line number	Diff line change
`@@ -139,7 +139,7 @@ set(LLAMA_BUILD_APP OFF CACHE BOOL "" FORCE)`
`139`	`139`	`FetchContent_Declare(`
`140`	`140`	`llama.cpp`
`141`	`141`	`GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git`
`142`		`- GIT_TAG b9642`
	`142`	`+ GIT_TAG b9682`
`143`	`143`	`)`
`144`	`144`	`FetchContent_MakeAvailable(llama.cpp)`
`145`	`145`