From f4584e858088c6cf696738e6fe7ccdbcb706e6d7 Mon Sep 17 00:00:00 2001
From: Claude
Date: Sat, 20 Jun 2026 23:22:52 +0000
Subject: [PATCH 1/4] docs: correct #103/#34 to upstream kherud issue
references
The vision/multimodal feature requests #103 (VLM image input) and #34
(multimodal inputs) are issues in the upstream kherud/java-llama.cpp
project. This fork's numbering diverged, so bare "#103"/"#34" pointed at
unrelated already-merged PRs here. Qualify every in-repo reference as
upstream kherud/java-llama.cpp#103 / #34 (README system-properties table,
CLAUDE.md, publish.yml vision-download steps, MultimodalIntegrationTest +
TestConstants javadoc) and drop the inaccurate "closes" wording, since
these cannot be closed from this fork.
Co-Authored-By: Claude Opus 4.8
Claude-Session: https://claude.ai/code/session_01WfCPxFtiVbUD7Y3C9drQEz
---
.github/workflows/publish.yml | 14 +++++++-------
CLAUDE.md | 2 +-
README.md | 2 +-
.../ladenthin/llama/MultimodalIntegrationTest.java | 4 ++--
.../java/net/ladenthin/llama/TestConstants.java | 2 +-
5 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 36f52e71..75787c21 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -33,7 +33,7 @@ env:
TOOL_MODEL_NAME: "Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
NOMIC_EMBED_MODEL_URL: "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF/resolve/main/nomic-embed-text-v1.5.f16.gguf"
NOMIC_EMBED_MODEL_NAME: "nomic-embed-text-v1.5.f16.gguf"
- # Vision model + mmproj for MultimodalIntegrationTest (issues #103 / #34).
+ # Vision model + mmproj for MultimodalIntegrationTest (upstream kherud/java-llama.cpp#103 / #34).
# SmolVLM-500M is the smallest community vision GGUF that loads reliably
# under the upstream mtmd pipeline. Total download ~600 MB across model
# plus mmproj; matches the existing per-test-job download budget.
@@ -786,7 +786,7 @@ jobs:
run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
- name: Download nomic embedding model (issue #98 regression)
run: test -f models/${NOMIC_EMBED_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${NOMIC_EMBED_MODEL_URL} --create-dirs -o models/${NOMIC_EMBED_MODEL_NAME}
- - name: Download vision model (issues #103 / #34)
+ - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
- name: Download vision mmproj
run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
@@ -921,7 +921,7 @@ jobs:
run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
- name: Download tool-calling model
run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
- - name: Download vision model (issues #103 / #34)
+ - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
- name: Download vision mmproj
run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
@@ -1001,7 +1001,7 @@ jobs:
run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
- name: Download tool-calling model
run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
- - name: Download vision model (issues #103 / #34)
+ - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
- name: Download vision mmproj
run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
@@ -1081,7 +1081,7 @@ jobs:
run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
- name: Download tool-calling model
run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
- - name: Download vision model (issues #103 / #34)
+ - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
- name: Download vision mmproj
run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
@@ -1151,7 +1151,7 @@ jobs:
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME
- name: Download tool-calling model
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME
- - name: Download vision model (issues #103 / #34)
+ - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME
- name: Download vision mmproj
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME
@@ -1244,7 +1244,7 @@ jobs:
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME
- name: Download tool-calling model
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME
- - name: Download vision model (issues #103 / #34)
+ - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME
- name: Download vision mmproj
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME
diff --git a/CLAUDE.md b/CLAUDE.md
index 90c08879..d0e68c2b 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -555,7 +555,7 @@ the README. The summary below covers only the optional-model bindings:
| Property | Default test that uses it | Model |
|----------|---------------------------|-------|
| `net.ladenthin.llama.nomic.path` | `LlamaEmbeddingsTest#testNomicEmbedLoads` | `nomic-embed-text-v1.5.f16.gguf` (issue #98 regression) |
-| `net.ladenthin.llama.vision.model` | `MultimodalIntegrationTest` (closes #103 / #34) | `SmolVLM-500M-Instruct-Q8_0.gguf` (any vision-capable GGUF works) |
+| `net.ladenthin.llama.vision.model` | `MultimodalIntegrationTest` (upstream kherud/java-llama.cpp#103 / #34) | `SmolVLM-500M-Instruct-Q8_0.gguf` (any vision-capable GGUF works) |
| `net.ladenthin.llama.vision.mmproj` | `MultimodalIntegrationTest` | matching mmproj for the vision model, e.g. `mmproj-SmolVLM-500M-Instruct-Q8_0.gguf` |
| `net.ladenthin.llama.vision.image` | `MultimodalIntegrationTest` | committed default `src/test/resources/images/test-image.jpg`; override to any png/jpeg/webp/gif on disk |
diff --git a/README.md b/README.md
index 2a14873b..a844027d 100644
--- a/README.md
+++ b/README.md
@@ -276,7 +276,7 @@ Every `net.ladenthin.llama.*` system property recognised by the library, deep-sc
| `net.ladenthin.llama.test.ngl` | `43` for the general suite; `0` for `ToolCallingIntegrationTest` | test | Model-backed integration tests | Number of GPU layers used during testing. Pin to `0` on CPU-only hosts: `mvn test -Dnet.ladenthin.llama.test.ngl=0`. The tool test also selects device `none` at zero layers so Metal/CUDA is not initialized. |
| `net.ladenthin.llama.tool.model` | `models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf` (test self-skips if missing) | test | `ToolCallingIntegrationTest` | Path to a tool-capable GGUF used to verify required blocking and streaming tool calls. The default matches the Qwen2.5 model in upstream llama.cpp's tool-call test matrix. |
| `net.ladenthin.llama.nomic.path` | unset (test self-skips) | test | `LlamaEmbeddingsTest#testNomicEmbedLoads` | Path to a Nomic embedding model (`nomic-embed-text-v1.5.f16.gguf` or a compatible BERT-family encoder). Regression test for upstream issue #98 (BERT-encoder `result_output` assertion). |
-| `net.ladenthin.llama.vision.model` | unset (test self-skips) | test | `MultimodalIntegrationTest` (closes #103 / #34) | Path to a vision-capable model GGUF. Any vision-capable GGUF works; CI default is `SmolVLM-500M-Instruct-Q8_0.gguf`. |
+| `net.ladenthin.llama.vision.model` | unset (test self-skips) | test | `MultimodalIntegrationTest` (upstream kherud/java-llama.cpp#103 / #34) | Path to a vision-capable model GGUF. Any vision-capable GGUF works; CI default is `SmolVLM-500M-Instruct-Q8_0.gguf`. |
| `net.ladenthin.llama.vision.mmproj` | unset (test self-skips) | test | `MultimodalIntegrationTest` | Matching mmproj GGUF for the vision model. |
| `net.ladenthin.llama.vision.image` | `src/test/resources/images/test-image.jpg` (a CC-BY-4.0 / MIT-granted photo committed to the repo) | test | `MultimodalIntegrationTest` | Visual prompt image. Any png/jpeg/webp/gif works; the extension drives MIME detection. |
diff --git a/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java b/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java
index de089088..886ee70d 100644
--- a/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java
+++ b/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java
@@ -59,13 +59,13 @@
* author. Any image the test machine can reach works at runtime — the
* URL is just an env var.
*
- * Closes issues #103 and #34.
+ * Implements the upstream vision feature requests kherud/java-llama.cpp#103 and #34.
*/
@ClaudeGenerated(
purpose = "End-to-end vision regression: real vision GGUF + mmproj + author-licensed (MIT) "
+ "test image fed through the typed ChatMessage(role, List) API; "
+ "asserts non-empty reply to prove the OAI multipart content round-trips through "
- + "the upstream mtmd pipeline. Closes #103 / #34.")
+ + "the upstream mtmd pipeline. Implements upstream kherud/java-llama.cpp#103 / #34.")
public class MultimodalIntegrationTest {
private static final ObjectMapper MAPPER = new ObjectMapper();
diff --git a/src/test/java/net/ladenthin/llama/TestConstants.java b/src/test/java/net/ladenthin/llama/TestConstants.java
index 57aa218a..d15e36b3 100644
--- a/src/test/java/net/ladenthin/llama/TestConstants.java
+++ b/src/test/java/net/ladenthin/llama/TestConstants.java
@@ -46,7 +46,7 @@ public class TestConstants {
/**
* System property holding a path to a vision-capable model GGUF. Consumed by
- * {@code MultimodalIntegrationTest} (closes #103 / #34). The CI default is the
+ * {@code MultimodalIntegrationTest} (upstream kherud/java-llama.cpp#103 / #34). The CI default is the
* SmolVLM-500M Q8_0 GGUF; the test self-skips when the property is unset or
* the file is missing.
*/
From 84170eed307b8afab3ccedb7b095047ec34ca7d1 Mon Sep 17 00:00:00 2001
From: Claude
Date: Sat, 20 Jun 2026 23:29:51 +0000
Subject: [PATCH 2/4] ci(cache): cache vision GGUFs (bump key v2) + add model
cache to Windows jobs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The SmolVLM vision model + mmproj were added to the model set but the
GitHub Actions cache key (gguf-models-v1) was never bumped, so the cache —
which only writes on a miss — kept restoring the pre-vision archive and
re-downloaded ~600 MB of vision GGUFs from HuggingFace on every run.
- Bump the shared key gguf-models-v1 -> gguf-models-v2 on the 4 caching
jobs (linux + 3 macOS) so the next run repopulates the cache with the
full current 8-model set including the vision pair.
- Add the actions/cache step (same shared key) to the two Windows Java
jobs (test-java-windows-x86_64 and -ninja), which previously had no
cache at all, and add Test-Path skip guards to their downloads so a
cache hit actually skips the curl (they were unconditional before).
GGUF files are platform-independent, so all six Java-test jobs share one
cache entry.
Co-Authored-By: Claude Opus 4.8
Claude-Session: https://claude.ai/code/session_01WfCPxFtiVbUD7Y3C9drQEz
---
.github/workflows/publish.yml | 58 ++++++++++++++++++++++-------------
1 file changed, 36 insertions(+), 22 deletions(-)
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 75787c21..5e5e3ac3 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -771,9 +771,9 @@ jobs:
uses: actions/cache@v5
with:
path: models/
- # GGUF is platform-independent, so ubuntu + macOS share one entry;
+ # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
# bump the suffix when the model set / URLs change.
- key: gguf-models-v1
+ key: gguf-models-v2
- name: Download text generation model
run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
- name: Download reranking model
@@ -908,9 +908,9 @@ jobs:
uses: actions/cache@v5
with:
path: models/
- # GGUF is platform-independent, so ubuntu + macOS share one entry;
+ # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
# bump the suffix when the model set / URLs change.
- key: gguf-models-v1
+ key: gguf-models-v2
- name: Download text generation model
run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
- name: Download reranking model
@@ -988,9 +988,9 @@ jobs:
uses: actions/cache@v5
with:
path: models/
- # GGUF is platform-independent, so ubuntu + macOS share one entry;
+ # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
# bump the suffix when the model set / URLs change.
- key: gguf-models-v1
+ key: gguf-models-v2
- name: Download text generation model
run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
- name: Download reranking model
@@ -1068,9 +1068,9 @@ jobs:
uses: actions/cache@v5
with:
path: models/
- # GGUF is platform-independent, so ubuntu + macOS share one entry;
+ # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
# bump the suffix when the model set / URLs change.
- key: gguf-models-v1
+ key: gguf-models-v2
- name: Download text generation model
run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
- name: Download reranking model
@@ -1141,20 +1141,27 @@ jobs:
with:
name: Windows-x86_64-libraries
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
+ - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
+ uses: actions/cache@v5
+ with:
+ path: models/
+ # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
+ # bump the suffix when the model set / URLs change.
+ key: gguf-models-v2
- name: Download text generation model
- run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME
+ run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME }
- name: Download reranking model
- run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME
+ run: if (-not (Test-Path "models/$env:RERANKING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME }
- name: Download draft model
- run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME
+ run: if (-not (Test-Path "models/$env:DRAFT_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME }
- name: Download reasoning model
- run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME
+ run: if (-not (Test-Path "models/$env:REASONING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME }
- name: Download tool-calling model
- run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME
+ run: if (-not (Test-Path "models/$env:TOOL_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME }
- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
- run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME
+ run: if (-not (Test-Path "models/$env:VISION_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME }
- name: Download vision mmproj
- run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME
+ run: if (-not (Test-Path "models/$env:VISION_MMPROJ_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME }
- name: List files in models directory
run: ls -l models/
- name: Validate model files
@@ -1234,20 +1241,27 @@ jobs:
with:
name: Windows-x86_64-ninja
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
+ - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
+ uses: actions/cache@v5
+ with:
+ path: models/
+ # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
+ # bump the suffix when the model set / URLs change.
+ key: gguf-models-v2
- name: Download text generation model
- run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME
+ run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME }
- name: Download reranking model
- run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME
+ run: if (-not (Test-Path "models/$env:RERANKING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME }
- name: Download draft model
- run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME
+ run: if (-not (Test-Path "models/$env:DRAFT_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME }
- name: Download reasoning model
- run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME
+ run: if (-not (Test-Path "models/$env:REASONING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME }
- name: Download tool-calling model
- run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME
+ run: if (-not (Test-Path "models/$env:TOOL_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME }
- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
- run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME
+ run: if (-not (Test-Path "models/$env:VISION_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME }
- name: Download vision mmproj
- run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME
+ run: if (-not (Test-Path "models/$env:VISION_MMPROJ_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME }
- name: List files in models directory
run: ls -l models/
- name: Validate model files
From 19056b6d396d729452405f62e47aec78f5ff938d Mon Sep 17 00:00:00 2001
From: Claude
Date: Sat, 20 Jun 2026 23:30:34 +0000
Subject: [PATCH 3/4] ci(cache): keep cache key gguf-models-v1 (stale entry
deleted manually)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Revert the v1->v2 key bump: the old gguf-models-v1 cache (pre-vision
contents) is being deleted by hand, so the next run misses on v1 and
repopulates it with the full current model set (including the vision
GGUFs) — no key change needed. The Windows cache steps + Test-Path skip
guards and the "+ Windows" shared-entry comment are retained.
Co-Authored-By: Claude Opus 4.8
Claude-Session: https://claude.ai/code/session_01WfCPxFtiVbUD7Y3C9drQEz
---
.github/workflows/publish.yml | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 5e5e3ac3..f3fb1c5f 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -773,7 +773,7 @@ jobs:
path: models/
# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
# bump the suffix when the model set / URLs change.
- key: gguf-models-v2
+ key: gguf-models-v1
- name: Download text generation model
run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
- name: Download reranking model
@@ -910,7 +910,7 @@ jobs:
path: models/
# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
# bump the suffix when the model set / URLs change.
- key: gguf-models-v2
+ key: gguf-models-v1
- name: Download text generation model
run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
- name: Download reranking model
@@ -990,7 +990,7 @@ jobs:
path: models/
# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
# bump the suffix when the model set / URLs change.
- key: gguf-models-v2
+ key: gguf-models-v1
- name: Download text generation model
run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
- name: Download reranking model
@@ -1070,7 +1070,7 @@ jobs:
path: models/
# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
# bump the suffix when the model set / URLs change.
- key: gguf-models-v2
+ key: gguf-models-v1
- name: Download text generation model
run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
- name: Download reranking model
@@ -1147,7 +1147,7 @@ jobs:
path: models/
# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
# bump the suffix when the model set / URLs change.
- key: gguf-models-v2
+ key: gguf-models-v1
- name: Download text generation model
run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME }
- name: Download reranking model
@@ -1247,7 +1247,7 @@ jobs:
path: models/
# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
# bump the suffix when the model set / URLs change.
- key: gguf-models-v2
+ key: gguf-models-v1
- name: Download text generation model
run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME }
- name: Download reranking model
From 7216799f0eb00490f761c9e2f31ea34b897d07ec Mon Sep 17 00:00:00 2001
From: Claude
Date: Sat, 20 Jun 2026 23:39:04 +0000
Subject: [PATCH 4/4] style(test): clang-format (void) cast in
ConfigureMultimodalTask EXPECT_THROW
The two configure_multimodal_task_impl EXPECT_THROW tests merged from
PR #249 used "(void) configure_..." with a space after the cast, which
the pinned clang-format 22.1.5 rejects (wants "(void)configure_..."),
reddening the C++ format CI check. Reformat with the pinned version; no
other files drift.
Co-Authored-By: Claude Opus 4.8
Claude-Session: https://claude.ai/code/session_01WfCPxFtiVbUD7Y3C9drQEz
---
src/test/cpp/test_jni_helpers.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/test/cpp/test_jni_helpers.cpp b/src/test/cpp/test_jni_helpers.cpp
index 367c03ca..211323bc 100644
--- a/src/test/cpp/test_jni_helpers.cpp
+++ b/src/test/cpp/test_jni_helpers.cpp
@@ -608,12 +608,12 @@ TEST(ConfigureMultimodalTask, MovesPromptAndMediaToCliTask) {
TEST(ConfigureMultimodalTask, MediaWithoutProjectorThrows) {
server_task task(SERVER_TASK_TYPE_COMPLETION);
- EXPECT_THROW((void) configure_multimodal_task_impl(task, false, {{"prompt", "<__media__>"}}, {{0x01}}),
+ EXPECT_THROW((void)configure_multimodal_task_impl(task, false, {{"prompt", "<__media__>"}}, {{0x01}}),
std::invalid_argument);
}
TEST(ConfigureMultimodalTask, NonStringPromptThrows) {
server_task task(SERVER_TASK_TYPE_COMPLETION);
- EXPECT_THROW((void) configure_multimodal_task_impl(task, true, {{"prompt", json::array({1, 2})}}, {{0x01}}),
+ EXPECT_THROW((void)configure_multimodal_task_impl(task, true, {{"prompt", json::array({1, 2})}}, {{0x01}}),
std::invalid_argument);
}