Merge pull request bernardladenthin#250 from bernardladenthin/claude/dazzling-clarke-9rzf6r

bernardladenthin · web-flow · commit 3f5c1402a653 · 2026-06-21T01:39:37.000+02:00
Add Windows model caching and fix issue reference formatting
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -33,7 +33,7 @@ env:
   TOOL_MODEL_NAME: "Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
   NOMIC_EMBED_MODEL_URL: "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF/resolve/main/nomic-embed-text-v1.5.f16.gguf"
   NOMIC_EMBED_MODEL_NAME: "nomic-embed-text-v1.5.f16.gguf"
-  # Vision model + mmproj for MultimodalIntegrationTest (issues #103 / #34).
+  # Vision model + mmproj for MultimodalIntegrationTest (upstream kherud/java-llama.cpp#103 / #34).
   # SmolVLM-500M is the smallest community vision GGUF that loads reliably
   # under the upstream mtmd pipeline. Total download ~600 MB across model
   # plus mmproj; matches the existing per-test-job download budget.
@@ -771,7 +771,7 @@ jobs:
         uses: actions/cache@v5
         with:
           path: models/
-          # GGUF is platform-independent, so ubuntu + macOS share one entry;
+          # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
           # bump the suffix when the model set / URLs change.
           key: gguf-models-v1
       - name: Download text generation model
@@ -786,7 +786,7 @@ jobs:
         run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
       - name: Download nomic embedding model (issue #98 regression)
         run: test -f models/${NOMIC_EMBED_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${NOMIC_EMBED_MODEL_URL} --create-dirs -o models/${NOMIC_EMBED_MODEL_NAME}
-      - name: Download vision model (issues #103 / #34)
+      - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
         run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
       - name: Download vision mmproj
         run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
@@ -908,7 +908,7 @@ jobs:
         uses: actions/cache@v5
         with:
           path: models/
-          # GGUF is platform-independent, so ubuntu + macOS share one entry;
+          # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
           # bump the suffix when the model set / URLs change.
           key: gguf-models-v1
       - name: Download text generation model
@@ -921,7 +921,7 @@ jobs:
         run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
       - name: Download tool-calling model
         run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
-      - name: Download vision model (issues #103 / #34)
+      - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
         run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
       - name: Download vision mmproj
         run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
@@ -988,7 +988,7 @@ jobs:
         uses: actions/cache@v5
         with:
           path: models/
-          # GGUF is platform-independent, so ubuntu + macOS share one entry;
+          # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
           # bump the suffix when the model set / URLs change.
           key: gguf-models-v1
       - name: Download text generation model
@@ -1001,7 +1001,7 @@ jobs:
         run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
       - name: Download tool-calling model
         run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
-      - name: Download vision model (issues #103 / #34)
+      - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
         run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
       - name: Download vision mmproj
         run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
@@ -1068,7 +1068,7 @@ jobs:
         uses: actions/cache@v5
         with:
           path: models/
-          # GGUF is platform-independent, so ubuntu + macOS share one entry;
+          # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
           # bump the suffix when the model set / URLs change.
           key: gguf-models-v1
       - name: Download text generation model
@@ -1081,7 +1081,7 @@ jobs:
         run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
       - name: Download tool-calling model
         run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
-      - name: Download vision model (issues #103 / #34)
+      - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
         run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
       - name: Download vision mmproj
         run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
@@ -1141,20 +1141,27 @@ jobs:
         with:
           name: Windows-x86_64-libraries
           path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
+      - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
+        uses: actions/cache@v5
+        with:
+          path: models/
+          # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
+          # bump the suffix when the model set / URLs change.
+          key: gguf-models-v1
       - name: Download text generation model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME
+        run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME }
       - name: Download reranking model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME
+        run: if (-not (Test-Path "models/$env:RERANKING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME }
       - name: Download draft model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME
+        run: if (-not (Test-Path "models/$env:DRAFT_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME }
       - name: Download reasoning model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME
+        run: if (-not (Test-Path "models/$env:REASONING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME }
       - name: Download tool-calling model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME
-      - name: Download vision model (issues #103 / #34)
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME
+        run: if (-not (Test-Path "models/$env:TOOL_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME }
+      - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
+        run: if (-not (Test-Path "models/$env:VISION_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME }
       - name: Download vision mmproj
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME
+        run: if (-not (Test-Path "models/$env:VISION_MMPROJ_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME }
       - name: List files in models directory
         run: ls -l models/
       - name: Validate model files
@@ -1234,20 +1241,27 @@ jobs:
         with:
           name: Windows-x86_64-ninja
           path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
+      - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
+        uses: actions/cache@v5
+        with:
+          path: models/
+          # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
+          # bump the suffix when the model set / URLs change.
+          key: gguf-models-v1
       - name: Download text generation model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME
+        run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME }
       - name: Download reranking model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME
+        run: if (-not (Test-Path "models/$env:RERANKING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME }
       - name: Download draft model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME
+        run: if (-not (Test-Path "models/$env:DRAFT_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME }
       - name: Download reasoning model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME
+        run: if (-not (Test-Path "models/$env:REASONING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME }
       - name: Download tool-calling model
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME
-      - name: Download vision model (issues #103 / #34)
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME
+        run: if (-not (Test-Path "models/$env:TOOL_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME }
+      - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
+        run: if (-not (Test-Path "models/$env:VISION_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME }
       - name: Download vision mmproj
-        run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME
+        run: if (-not (Test-Path "models/$env:VISION_MMPROJ_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME }
       - name: List files in models directory
         run: ls -l models/
       - name: Validate model files
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -555,7 +555,7 @@ the README. The summary below covers only the optional-model bindings:
 | Property | Default test that uses it | Model |
 |----------|---------------------------|-------|
 | `net.ladenthin.llama.nomic.path` | `LlamaEmbeddingsTest#testNomicEmbedLoads` | `nomic-embed-text-v1.5.f16.gguf` (issue #98 regression) |
-| `net.ladenthin.llama.vision.model` | `MultimodalIntegrationTest` (closes #103 / #34) | `SmolVLM-500M-Instruct-Q8_0.gguf` (any vision-capable GGUF works) |
+| `net.ladenthin.llama.vision.model` | `MultimodalIntegrationTest` (upstream kherud/java-llama.cpp#103 / #34) | `SmolVLM-500M-Instruct-Q8_0.gguf` (any vision-capable GGUF works) |
 | `net.ladenthin.llama.vision.mmproj` | `MultimodalIntegrationTest` | matching mmproj for the vision model, e.g. `mmproj-SmolVLM-500M-Instruct-Q8_0.gguf` |
 | `net.ladenthin.llama.vision.image` | `MultimodalIntegrationTest` | committed default `src/test/resources/images/test-image.jpg`; override to any png/jpeg/webp/gif on disk |
 
diff --git a/README.md b/README.md
@@ -276,7 +276,7 @@ Every `net.ladenthin.llama.*` system property recognised by the library, deep-sc
 | `net.ladenthin.llama.test.ngl` | `43` for the general suite; `0` for `ToolCallingIntegrationTest` | test | Model-backed integration tests | Number of GPU layers used during testing. Pin to `0` on CPU-only hosts: `mvn test -Dnet.ladenthin.llama.test.ngl=0`. The tool test also selects device `none` at zero layers so Metal/CUDA is not initialized. |
 | `net.ladenthin.llama.tool.model` | `models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf` (test self-skips if missing) | test | `ToolCallingIntegrationTest` | Path to a tool-capable GGUF used to verify required blocking and streaming tool calls. The default matches the Qwen2.5 model in upstream llama.cpp's tool-call test matrix. |
 | `net.ladenthin.llama.nomic.path` | unset (test self-skips) | test | `LlamaEmbeddingsTest#testNomicEmbedLoads` | Path to a Nomic embedding model (`nomic-embed-text-v1.5.f16.gguf` or a compatible BERT-family encoder). Regression test for upstream issue #98 (BERT-encoder `result_output` assertion). |
-| `net.ladenthin.llama.vision.model` | unset (test self-skips) | test | `MultimodalIntegrationTest` (closes #103 / #34) | Path to a vision-capable model GGUF. Any vision-capable GGUF works; CI default is `SmolVLM-500M-Instruct-Q8_0.gguf`. |
+| `net.ladenthin.llama.vision.model` | unset (test self-skips) | test | `MultimodalIntegrationTest` (upstream kherud/java-llama.cpp#103 / #34) | Path to a vision-capable model GGUF. Any vision-capable GGUF works; CI default is `SmolVLM-500M-Instruct-Q8_0.gguf`. |
 | `net.ladenthin.llama.vision.mmproj` | unset (test self-skips) | test | `MultimodalIntegrationTest` | Matching mmproj GGUF for the vision model. |
 | `net.ladenthin.llama.vision.image` | `src/test/resources/images/test-image.jpg` (a CC-BY-4.0 / MIT-granted photo committed to the repo) | test | `MultimodalIntegrationTest` | Visual prompt image. Any png/jpeg/webp/gif works; the extension drives MIME detection. |
 
diff --git a/src/test/cpp/test_jni_helpers.cpp b/src/test/cpp/test_jni_helpers.cpp
@@ -608,12 +608,12 @@ TEST(ConfigureMultimodalTask, MovesPromptAndMediaToCliTask) {
 
 TEST(ConfigureMultimodalTask, MediaWithoutProjectorThrows) {
     server_task task(SERVER_TASK_TYPE_COMPLETION);
-    EXPECT_THROW((void) configure_multimodal_task_impl(task, false, {{"prompt", "<__media__>"}}, {{0x01}}),
+    EXPECT_THROW((void)configure_multimodal_task_impl(task, false, {{"prompt", "<__media__>"}}, {{0x01}}),
                  std::invalid_argument);
 }
 
 TEST(ConfigureMultimodalTask, NonStringPromptThrows) {
     server_task task(SERVER_TASK_TYPE_COMPLETION);
-    EXPECT_THROW((void) configure_multimodal_task_impl(task, true, {{"prompt", json::array({1, 2})}}, {{0x01}}),
+    EXPECT_THROW((void)configure_multimodal_task_impl(task, true, {{"prompt", json::array({1, 2})}}, {{0x01}}),
                  std::invalid_argument);
 }
diff --git a/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java b/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java
@@ -59,13 +59,13 @@
  * author. Any image the test machine can reach works at runtime &#x2014; the
  * URL is just an env var.</p>
  *
- * <p>Closes issues #103 and #34.</p>
+ * <p>Implements the upstream vision feature requests kherud/java-llama.cpp#103 and #34.</p>
  */
 @ClaudeGenerated(
         purpose = "End-to-end vision regression: real vision GGUF + mmproj + author-licensed (MIT) "
                 + "test image fed through the typed ChatMessage(role, List<ContentPart>) API; "
                 + "asserts non-empty reply to prove the OAI multipart content round-trips through "
-                + "the upstream mtmd pipeline. Closes #103 / #34.")
+                + "the upstream mtmd pipeline. Implements upstream kherud/java-llama.cpp#103 / #34.")
 public class MultimodalIntegrationTest {
 
     private static final ObjectMapper MAPPER = new ObjectMapper();
diff --git a/src/test/java/net/ladenthin/llama/TestConstants.java b/src/test/java/net/ladenthin/llama/TestConstants.java
@@ -46,7 +46,7 @@ public class TestConstants {
 
     /**
      * System property holding a path to a vision-capable model GGUF. Consumed by
-     * {@code MultimodalIntegrationTest} (closes #103 / #34). The CI default is the
+     * {@code MultimodalIntegrationTest} (upstream kherud/java-llama.cpp#103 / #34). The CI default is the
      * SmolVLM-500M Q8_0 GGUF; the test self-skips when the property is unset or
      * the file is missing.
      */

Original file line number	Diff line number	Diff line change
`@@ -608,12 +608,12 @@ TEST(ConfigureMultimodalTask, MovesPromptAndMediaToCliTask) {`
`608`	`608`
`609`	`609`	`TEST(ConfigureMultimodalTask, MediaWithoutProjectorThrows) {`
`610`	`610`	`server_task task(SERVER_TASK_TYPE_COMPLETION);`
`611`		`- EXPECT_THROW((void) configure_multimodal_task_impl(task, false, {{"prompt", "<__media__>"}}, {{0x01}}),`
	`611`	`+ EXPECT_THROW((void)configure_multimodal_task_impl(task, false, {{"prompt", "<__media__>"}}, {{0x01}}),`
`612`	`612`	`std::invalid_argument);`
`613`	`613`	`}`
`614`	`614`
`615`	`615`	`TEST(ConfigureMultimodalTask, NonStringPromptThrows) {`
`616`	`616`	`server_task task(SERVER_TASK_TYPE_COMPLETION);`
`617`		`- EXPECT_THROW((void) configure_multimodal_task_impl(task, true, {{"prompt", json::array({1, 2})}}, {{0x01}}),`
	`617`	`+ EXPECT_THROW((void)configure_multimodal_task_impl(task, true, {{"prompt", json::array({1, 2})}}, {{0x01}}),`
`618`	`618`	`std::invalid_argument);`
`619`	`619`	`}`