From f4584e858088c6cf696738e6fe7ccdbcb706e6d7 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 20 Jun 2026 23:22:52 +0000
Subject: [PATCH 1/4] docs: correct #103/#34 to upstream kherud issue
 references

The vision/multimodal feature requests #103 (VLM image input) and #34
(multimodal inputs) are issues in the upstream kherud/java-llama.cpp
project. This fork's numbering diverged, so bare "#103"/"#34" pointed at
unrelated already-merged PRs here. Qualify every in-repo reference as
upstream kherud/java-llama.cpp#103 / #34 (README system-properties table,
CLAUDE.md, publish.yml vision-download steps, MultimodalIntegrationTest +
TestConstants javadoc) and drop the inaccurate "closes" wording, since
these cannot be closed from this fork.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01WfCPxFtiVbUD7Y3C9drQEz
---
 .github/workflows/publish.yml                      | 14 +++++++-------
 CLAUDE.md                                          |  2 +-
 README.md                                          |  2 +-
 .../ladenthin/llama/MultimodalIntegrationTest.java |  4 ++--
 .../java/net/ladenthin/llama/TestConstants.java    |  2 +-
 5 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 36f52e71..75787c21 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -33,7 +33,7 @@ env:
   TOOL_MODEL_NAME: "Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
   NOMIC_EMBED_MODEL_URL: "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF/resolve/main/nomic-embed-text-v1.5.f16.gguf"
   NOMIC_EMBED_MODEL_NAME: "nomic-embed-text-v1.5.f16.gguf"
-  # Vision model + mmproj for MultimodalIntegrationTest (issues #103 / #34).
+  # Vision model + mmproj for MultimodalIntegrationTest (upstream kherud/java-llama.cpp#103 / #34).
   # SmolVLM-500M is the smallest community vision GGUF that loads reliably
   # under the upstream mtmd pipeline. Total download ~600 MB across model
   # plus mmproj; matches the existing per-test-job download budget.
@@ -786,7 +786,7 @@ jobs:
         run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
       - name: Download nomic embedding model (issue #98 regression)
         run: test -f models/${NOMIC_EMBED_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${NOMIC_EMBED_MODEL_URL} --create-dirs -o models/${NOMIC_EMBED_MODEL_NAME}
-      - name: Download vision model (issues #103 / #34)
+      - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
         run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
       - name: Download vision mmproj
         run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
@@ -921,7 +921,7 @@ jobs:
         run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
       - name: Download tool-calling model
         run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
-      - name: Download vision model (issues #103 / #34)
+      - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
         run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
       - name: Download vision mmproj
         run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
@@ -1001,7 +1001,7 @@ jobs:
         run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
       - name: Download tool-calling model
         run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
-      - name: Download vision model (issues #103 / #34)
+      - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
         run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
       - name: Download vision mmproj
         run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
@@ -1081,7 +1081,7 @@ jobs:
         run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
       - name: Download tool-calling model
         run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
-      - name: Download vision model (issues #103 / #34)
+      - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
         run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
       - name: Download vision mmproj
         run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
@@ -1151,7 +1151,7 @@ jobs:
         run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME
       - name: Download tool-calling model
         run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME
-      - name: Download vision model (issues #103 / #34)
+      - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
         run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME
       - name: Download vision mmproj
         run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME
@@ -1244,7 +1244,7 @@ jobs:
         run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME
       - name: Download tool-calling model
         run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME
-      - name: Download vision model (issues #103 / #34)
+      - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
         run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME
       - name: Download vision mmproj
         run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME
diff --git a/CLAUDE.md b/CLAUDE.md
index 90c08879..d0e68c2b 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -555,7 +555,7 @@ the README. The summary below covers only the optional-model bindings:
 | Property | Default test that uses it | Model |
 |----------|---------------------------|-------|
 | `net.ladenthin.llama.nomic.path` | `LlamaEmbeddingsTest#testNomicEmbedLoads` | `nomic-embed-text-v1.5.f16.gguf` (issue #98 regression) |
-| `net.ladenthin.llama.vision.model` | `MultimodalIntegrationTest` (closes #103 / #34) | `SmolVLM-500M-Instruct-Q8_0.gguf` (any vision-capable GGUF works) |
+| `net.ladenthin.llama.vision.model` | `MultimodalIntegrationTest` (upstream kherud/java-llama.cpp#103 / #34) | `SmolVLM-500M-Instruct-Q8_0.gguf` (any vision-capable GGUF works) |
 | `net.ladenthin.llama.vision.mmproj` | `MultimodalIntegrationTest` | matching mmproj for the vision model, e.g. `mmproj-SmolVLM-500M-Instruct-Q8_0.gguf` |
 | `net.ladenthin.llama.vision.image` | `MultimodalIntegrationTest` | committed default `src/test/resources/images/test-image.jpg`; override to any png/jpeg/webp/gif on disk |
 
diff --git a/README.md b/README.md
index 2a14873b..a844027d 100644
--- a/README.md
+++ b/README.md
@@ -276,7 +276,7 @@ Every `net.ladenthin.llama.*` system property recognised by the library, deep-sc
 | `net.ladenthin.llama.test.ngl` | `43` for the general suite; `0` for `ToolCallingIntegrationTest` | test | Model-backed integration tests | Number of GPU layers used during testing. Pin to `0` on CPU-only hosts: `mvn test -Dnet.ladenthin.llama.test.ngl=0`. The tool test also selects device `none` at zero layers so Metal/CUDA is not initialized. |
 | `net.ladenthin.llama.tool.model` | `models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf` (test self-skips if missing) | test | `ToolCallingIntegrationTest` | Path to a tool-capable GGUF used to verify required blocking and streaming tool calls. The default matches the Qwen2.5 model in upstream llama.cpp's tool-call test matrix. |
 | `net.ladenthin.llama.nomic.path` | unset (test self-skips) | test | `LlamaEmbeddingsTest#testNomicEmbedLoads` | Path to a Nomic embedding model (`nomic-embed-text-v1.5.f16.gguf` or a compatible BERT-family encoder). Regression test for upstream issue #98 (BERT-encoder `result_output` assertion). |
-| `net.ladenthin.llama.vision.model` | unset (test self-skips) | test | `MultimodalIntegrationTest` (closes #103 / #34) | Path to a vision-capable model GGUF. Any vision-capable GGUF works; CI default is `SmolVLM-500M-Instruct-Q8_0.gguf`. |
+| `net.ladenthin.llama.vision.model` | unset (test self-skips) | test | `MultimodalIntegrationTest` (upstream kherud/java-llama.cpp#103 / #34) | Path to a vision-capable model GGUF. Any vision-capable GGUF works; CI default is `SmolVLM-500M-Instruct-Q8_0.gguf`. |
 | `net.ladenthin.llama.vision.mmproj` | unset (test self-skips) | test | `MultimodalIntegrationTest` | Matching mmproj GGUF for the vision model. |
 | `net.ladenthin.llama.vision.image` | `src/test/resources/images/test-image.jpg` (a CC-BY-4.0 / MIT-granted photo committed to the repo) | test | `MultimodalIntegrationTest` | Visual prompt image. Any png/jpeg/webp/gif works; the extension drives MIME detection. |
 
diff --git a/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java b/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java
index de089088..886ee70d 100644
--- a/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java
+++ b/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java
@@ -59,13 +59,13 @@
  * author. Any image the test machine can reach works at runtime &#x2014; the
  * URL is just an env var.</p>
  *
- * <p>Closes issues #103 and #34.</p>
+ * <p>Implements the upstream vision feature requests kherud/java-llama.cpp#103 and #34.</p>
  */
 @ClaudeGenerated(
         purpose = "End-to-end vision regression: real vision GGUF + mmproj + author-licensed (MIT) "
                 + "test image fed through the typed ChatMessage(role, List<ContentPart>) API; "
                 + "asserts non-empty reply to prove the OAI multipart content round-trips through "
-                + "the upstream mtmd pipeline. Closes #103 / #34.")
+                + "the upstream mtmd pipeline. Implements upstream kherud/java-llama.cpp#103 / #34.")
 public class MultimodalIntegrationTest {
 
     private static final ObjectMapper MAPPER = new ObjectMapper();
diff --git a/src/test/java/net/ladenthin/llama/TestConstants.java b/src/test/java/net/ladenthin/llama/TestConstants.java
index 57aa218a..d15e36b3 100644
--- a/src/test/java/net/ladenthin/llama/TestConstants.java
+++ b/src/test/java/net/ladenthin/llama/TestConstants.java
@@ -46,7 +46,7 @@ public class TestConstants {
 
     /**
      * System property holding a path to a vision-capable model GGUF. Consumed by
-     * {@code MultimodalIntegrationTest} (closes #103 / #34). The CI default is the
+     * {@code MultimodalIntegrationTest} (upstream kherud/java-llama.cpp#103 / #34). The CI default is the
      * SmolVLM-500M Q8_0 GGUF; the test self-skips when the property is unset or
      * the file is missing.
      */

From 84170eed307b8afab3ccedb7b095047ec34ca7d1 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 20 Jun 2026 23:29:51 +0000
Subject: [PATCH 2/4] ci(cache): cache vision GGUFs (bump key v2) + add model
 cache to Windows jobs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The SmolVLM vision model + mmproj were added to the model set but the
GitHub Actions cache key (gguf-models-v1) was never bumped, so the cache —
which only writes on a miss — kept restoring the pre-vision archive and
re-downloaded ~600 MB of vision GGUFs from HuggingFace on every run.

- Bump the shared key gguf-models-v1 -> gguf-models-v2 on the 4 caching
  jobs (linux + 3 macOS) so the next run repopulates the cache with the
  full current 8-model set including the vision pair.
- Add the actions/cache step (same shared key) to the two Windows Java
  jobs (test-java-windows-x86_64 and -ninja), which previously had no
  cache at all, and add Test-Path skip guards to their downloads so a
  cache hit actually skips the curl (they were unconditional before).

GGUF files are platform-independent, so all six Java-test jobs share one
cache entry.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01WfCPxFtiVbUD7Y3C9drQEz
---
 .github/workflows/publish.yml | 58 ++++++++++++++++++++++-------------
 1 file changed, 36 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 75787c21..5e5e3ac3 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -771,9 +771,9 @@ jobs:
         uses: actions/cache@v5
         with:
           path: models/
-          # GGUF is platform-independent, so ubuntu + macOS share one entry;
+          # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
           # bump the suffix when the model set / URLs change.
-          key: gguf-models-v1
+          key: gguf-models-v2
       - name: Download text generation model
         run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
       - name: Download reranking model
@@ -908,9 +908,9 @@ jobs:
         uses: actions/cache@v5
         with:
           path: models/
-          # GGUF is platform-independent, so ubuntu + macOS share one entry;
+          # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
           # bump the suffix when the model set / URLs change.
-          key: gguf-models-v1
+          key: gguf-models-v2
       - name: Download text generation model
         run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
       - name: Download reranking model
@@ -988,9 +988,9 @@ jobs:
         uses: actions/cache@v5
         with:
           path: models/
-          # GGUF is platform-independent, so ubuntu + macOS share one entry;
+          # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
           # bump the suffix when the model set / URLs change.
-          key: gguf-models-v1
+          key: gguf-models-v2
       - name: Download text generation model
         run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
       - name: Download reranking model
@@ -1068,9 +1068,9 @@ jobs:
         uses: actions/cache@v5
         with:
           path: models/
-          # GGUF is platform-independent, so ubuntu + macOS share one entry;
+          # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
           # bump the suffix when the model set / URLs change.
-          key: gguf-models-v1
+          key: gguf-models-v2
       - name: Download text generation model
         run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
       - name: Download reranking model
@@ -1141,20 +1141,27 @@ jobs:
         with:
           name: Windows-x86_64-libraries
           path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
+      - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
+        uses: actions/cache@v5
+        with:
+          path: models/
+          # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
+          # bump the suffix when the model set / URLs change.
+          key: gguf-models-v2
       - name: Download text generation model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME
+        run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME }
       - name: Download reranking model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME
+        run: if (-not (Test-Path "models/$env:RERANKING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME }
       - name: Download draft model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME
+        run: if (-not (Test-Path "models/$env:DRAFT_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME }
       - name: Download reasoning model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME
+        run: if (-not (Test-Path "models/$env:REASONING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME }
       - name: Download tool-calling model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME
+        run: if (-not (Test-Path "models/$env:TOOL_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME }
       - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME
+        run: if (-not (Test-Path "models/$env:VISION_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME }
       - name: Download vision mmproj
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME
+        run: if (-not (Test-Path "models/$env:VISION_MMPROJ_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME }
       - name: List files in models directory
         run: ls -l models/
       - name: Validate model files
@@ -1234,20 +1241,27 @@ jobs:
         with:
           name: Windows-x86_64-ninja
           path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
+      - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
+        uses: actions/cache@v5
+        with:
+          path: models/
+          # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
+          # bump the suffix when the model set / URLs change.
+          key: gguf-models-v2
       - name: Download text generation model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME
+        run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME }
       - name: Download reranking model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME
+        run: if (-not (Test-Path "models/$env:RERANKING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME }
       - name: Download draft model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME
+        run: if (-not (Test-Path "models/$env:DRAFT_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME }
       - name: Download reasoning model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME
+        run: if (-not (Test-Path "models/$env:REASONING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME }
       - name: Download tool-calling model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME
+        run: if (-not (Test-Path "models/$env:TOOL_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME }
       - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME
+        run: if (-not (Test-Path "models/$env:VISION_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME }
       - name: Download vision mmproj
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME
+        run: if (-not (Test-Path "models/$env:VISION_MMPROJ_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME }
       - name: List files in models directory
         run: ls -l models/
       - name: Validate model files

From 19056b6d396d729452405f62e47aec78f5ff938d Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 20 Jun 2026 23:30:34 +0000
Subject: [PATCH 3/4] ci(cache): keep cache key gguf-models-v1 (stale entry
 deleted manually)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Revert the v1->v2 key bump: the old gguf-models-v1 cache (pre-vision
contents) is being deleted by hand, so the next run misses on v1 and
repopulates it with the full current model set (including the vision
GGUFs) — no key change needed. The Windows cache steps + Test-Path skip
guards and the "+ Windows" shared-entry comment are retained.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01WfCPxFtiVbUD7Y3C9drQEz
---
 .github/workflows/publish.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 5e5e3ac3..f3fb1c5f 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -773,7 +773,7 @@ jobs:
           path: models/
           # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
           # bump the suffix when the model set / URLs change.
-          key: gguf-models-v2
+          key: gguf-models-v1
       - name: Download text generation model
         run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
       - name: Download reranking model
@@ -910,7 +910,7 @@ jobs:
           path: models/
           # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
           # bump the suffix when the model set / URLs change.
-          key: gguf-models-v2
+          key: gguf-models-v1
       - name: Download text generation model
         run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
       - name: Download reranking model
@@ -990,7 +990,7 @@ jobs:
           path: models/
           # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
           # bump the suffix when the model set / URLs change.
-          key: gguf-models-v2
+          key: gguf-models-v1
       - name: Download text generation model
         run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
       - name: Download reranking model
@@ -1070,7 +1070,7 @@ jobs:
           path: models/
           # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
           # bump the suffix when the model set / URLs change.
-          key: gguf-models-v2
+          key: gguf-models-v1
       - name: Download text generation model
         run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
       - name: Download reranking model
@@ -1147,7 +1147,7 @@ jobs:
           path: models/
           # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
           # bump the suffix when the model set / URLs change.
-          key: gguf-models-v2
+          key: gguf-models-v1
       - name: Download text generation model
         run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME }
       - name: Download reranking model
@@ -1247,7 +1247,7 @@ jobs:
           path: models/
           # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
           # bump the suffix when the model set / URLs change.
-          key: gguf-models-v2
+          key: gguf-models-v1
       - name: Download text generation model
         run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME }
       - name: Download reranking model

From 7216799f0eb00490f761c9e2f31ea34b897d07ec Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 20 Jun 2026 23:39:04 +0000
Subject: [PATCH 4/4] style(test): clang-format (void) cast in
 ConfigureMultimodalTask EXPECT_THROW

The two configure_multimodal_task_impl EXPECT_THROW tests merged from
PR #249 used "(void) configure_..." with a space after the cast, which
the pinned clang-format 22.1.5 rejects (wants "(void)configure_..."),
reddening the C++ format CI check. Reformat with the pinned version; no
other files drift.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01WfCPxFtiVbUD7Y3C9drQEz
---
 src/test/cpp/test_jni_helpers.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/test/cpp/test_jni_helpers.cpp b/src/test/cpp/test_jni_helpers.cpp
index 367c03ca..211323bc 100644
--- a/src/test/cpp/test_jni_helpers.cpp
+++ b/src/test/cpp/test_jni_helpers.cpp
@@ -608,12 +608,12 @@ TEST(ConfigureMultimodalTask, MovesPromptAndMediaToCliTask) {
 
 TEST(ConfigureMultimodalTask, MediaWithoutProjectorThrows) {
     server_task task(SERVER_TASK_TYPE_COMPLETION);
-    EXPECT_THROW((void) configure_multimodal_task_impl(task, false, {{"prompt", "<__media__>"}}, {{0x01}}),
+    EXPECT_THROW((void)configure_multimodal_task_impl(task, false, {{"prompt", "<__media__>"}}, {{0x01}}),
                  std::invalid_argument);
 }
 
 TEST(ConfigureMultimodalTask, NonStringPromptThrows) {
     server_task task(SERVER_TASK_TYPE_COMPLETION);
-    EXPECT_THROW((void) configure_multimodal_task_impl(task, true, {{"prompt", json::array({1, 2})}}, {{0x01}}),
+    EXPECT_THROW((void)configure_multimodal_task_impl(task, true, {{"prompt", json::array({1, 2})}}, {{0x01}}),
                  std::invalid_argument);
 }