From f4584e858088c6cf696738e6fe7ccdbcb706e6d7 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 20 Jun 2026 23:22:52 +0000 Subject: [PATCH 1/4] docs: correct #103/#34 to upstream kherud issue references The vision/multimodal feature requests #103 (VLM image input) and #34 (multimodal inputs) are issues in the upstream kherud/java-llama.cpp project. This fork's numbering diverged, so bare "#103"/"#34" pointed at unrelated already-merged PRs here. Qualify every in-repo reference as upstream kherud/java-llama.cpp#103 / #34 (README system-properties table, CLAUDE.md, publish.yml vision-download steps, MultimodalIntegrationTest + TestConstants javadoc) and drop the inaccurate "closes" wording, since these cannot be closed from this fork. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01WfCPxFtiVbUD7Y3C9drQEz --- .github/workflows/publish.yml | 14 +++++++------- CLAUDE.md | 2 +- README.md | 2 +- .../ladenthin/llama/MultimodalIntegrationTest.java | 4 ++-- .../java/net/ladenthin/llama/TestConstants.java | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 36f52e71..75787c21 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -33,7 +33,7 @@ env: TOOL_MODEL_NAME: "Qwen2.5-1.5B-Instruct-Q4_K_M.gguf" NOMIC_EMBED_MODEL_URL: "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF/resolve/main/nomic-embed-text-v1.5.f16.gguf" NOMIC_EMBED_MODEL_NAME: "nomic-embed-text-v1.5.f16.gguf" - # Vision model + mmproj for MultimodalIntegrationTest (issues #103 / #34). + # Vision model + mmproj for MultimodalIntegrationTest (upstream kherud/java-llama.cpp#103 / #34). # SmolVLM-500M is the smallest community vision GGUF that loads reliably # under the upstream mtmd pipeline. Total download ~600 MB across model # plus mmproj; matches the existing per-test-job download budget. @@ -786,7 +786,7 @@ jobs: run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME} - name: Download nomic embedding model (issue #98 regression) run: test -f models/${NOMIC_EMBED_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${NOMIC_EMBED_MODEL_URL} --create-dirs -o models/${NOMIC_EMBED_MODEL_NAME} - - name: Download vision model (issues #103 / #34) + - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34) run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME} - name: Download vision mmproj run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME} @@ -921,7 +921,7 @@ jobs: run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME} - name: Download tool-calling model run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME} - - name: Download vision model (issues #103 / #34) + - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34) run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME} - name: Download vision mmproj run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME} @@ -1001,7 +1001,7 @@ jobs: run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME} - name: Download tool-calling model run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME} - - name: Download vision model (issues #103 / #34) + - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34) run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME} - name: Download vision mmproj run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME} @@ -1081,7 +1081,7 @@ jobs: run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME} - name: Download tool-calling model run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME} - - name: Download vision model (issues #103 / #34) + - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34) run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME} - name: Download vision mmproj run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME} @@ -1151,7 +1151,7 @@ jobs: run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME - name: Download tool-calling model run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME - - name: Download vision model (issues #103 / #34) + - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34) run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME - name: Download vision mmproj run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME @@ -1244,7 +1244,7 @@ jobs: run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME - name: Download tool-calling model run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME - - name: Download vision model (issues #103 / #34) + - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34) run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME - name: Download vision mmproj run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME diff --git a/CLAUDE.md b/CLAUDE.md index 90c08879..d0e68c2b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -555,7 +555,7 @@ the README. The summary below covers only the optional-model bindings: | Property | Default test that uses it | Model | |----------|---------------------------|-------| | `net.ladenthin.llama.nomic.path` | `LlamaEmbeddingsTest#testNomicEmbedLoads` | `nomic-embed-text-v1.5.f16.gguf` (issue #98 regression) | -| `net.ladenthin.llama.vision.model` | `MultimodalIntegrationTest` (closes #103 / #34) | `SmolVLM-500M-Instruct-Q8_0.gguf` (any vision-capable GGUF works) | +| `net.ladenthin.llama.vision.model` | `MultimodalIntegrationTest` (upstream kherud/java-llama.cpp#103 / #34) | `SmolVLM-500M-Instruct-Q8_0.gguf` (any vision-capable GGUF works) | | `net.ladenthin.llama.vision.mmproj` | `MultimodalIntegrationTest` | matching mmproj for the vision model, e.g. `mmproj-SmolVLM-500M-Instruct-Q8_0.gguf` | | `net.ladenthin.llama.vision.image` | `MultimodalIntegrationTest` | committed default `src/test/resources/images/test-image.jpg`; override to any png/jpeg/webp/gif on disk | diff --git a/README.md b/README.md index 2a14873b..a844027d 100644 --- a/README.md +++ b/README.md @@ -276,7 +276,7 @@ Every `net.ladenthin.llama.*` system property recognised by the library, deep-sc | `net.ladenthin.llama.test.ngl` | `43` for the general suite; `0` for `ToolCallingIntegrationTest` | test | Model-backed integration tests | Number of GPU layers used during testing. Pin to `0` on CPU-only hosts: `mvn test -Dnet.ladenthin.llama.test.ngl=0`. The tool test also selects device `none` at zero layers so Metal/CUDA is not initialized. | | `net.ladenthin.llama.tool.model` | `models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf` (test self-skips if missing) | test | `ToolCallingIntegrationTest` | Path to a tool-capable GGUF used to verify required blocking and streaming tool calls. The default matches the Qwen2.5 model in upstream llama.cpp's tool-call test matrix. | | `net.ladenthin.llama.nomic.path` | unset (test self-skips) | test | `LlamaEmbeddingsTest#testNomicEmbedLoads` | Path to a Nomic embedding model (`nomic-embed-text-v1.5.f16.gguf` or a compatible BERT-family encoder). Regression test for upstream issue #98 (BERT-encoder `result_output` assertion). | -| `net.ladenthin.llama.vision.model` | unset (test self-skips) | test | `MultimodalIntegrationTest` (closes #103 / #34) | Path to a vision-capable model GGUF. Any vision-capable GGUF works; CI default is `SmolVLM-500M-Instruct-Q8_0.gguf`. | +| `net.ladenthin.llama.vision.model` | unset (test self-skips) | test | `MultimodalIntegrationTest` (upstream kherud/java-llama.cpp#103 / #34) | Path to a vision-capable model GGUF. Any vision-capable GGUF works; CI default is `SmolVLM-500M-Instruct-Q8_0.gguf`. | | `net.ladenthin.llama.vision.mmproj` | unset (test self-skips) | test | `MultimodalIntegrationTest` | Matching mmproj GGUF for the vision model. | | `net.ladenthin.llama.vision.image` | `src/test/resources/images/test-image.jpg` (a CC-BY-4.0 / MIT-granted photo committed to the repo) | test | `MultimodalIntegrationTest` | Visual prompt image. Any png/jpeg/webp/gif works; the extension drives MIME detection. | diff --git a/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java b/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java index de089088..886ee70d 100644 --- a/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java +++ b/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java @@ -59,13 +59,13 @@ * author. Any image the test machine can reach works at runtime — the * URL is just an env var.

* - *

Closes issues #103 and #34.

+ *

Implements the upstream vision feature requests kherud/java-llama.cpp#103 and #34.

*/ @ClaudeGenerated( purpose = "End-to-end vision regression: real vision GGUF + mmproj + author-licensed (MIT) " + "test image fed through the typed ChatMessage(role, List) API; " + "asserts non-empty reply to prove the OAI multipart content round-trips through " - + "the upstream mtmd pipeline. Closes #103 / #34.") + + "the upstream mtmd pipeline. Implements upstream kherud/java-llama.cpp#103 / #34.") public class MultimodalIntegrationTest { private static final ObjectMapper MAPPER = new ObjectMapper(); diff --git a/src/test/java/net/ladenthin/llama/TestConstants.java b/src/test/java/net/ladenthin/llama/TestConstants.java index 57aa218a..d15e36b3 100644 --- a/src/test/java/net/ladenthin/llama/TestConstants.java +++ b/src/test/java/net/ladenthin/llama/TestConstants.java @@ -46,7 +46,7 @@ public class TestConstants { /** * System property holding a path to a vision-capable model GGUF. Consumed by - * {@code MultimodalIntegrationTest} (closes #103 / #34). The CI default is the + * {@code MultimodalIntegrationTest} (upstream kherud/java-llama.cpp#103 / #34). The CI default is the * SmolVLM-500M Q8_0 GGUF; the test self-skips when the property is unset or * the file is missing. */ From 84170eed307b8afab3ccedb7b095047ec34ca7d1 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 20 Jun 2026 23:29:51 +0000 Subject: [PATCH 2/4] ci(cache): cache vision GGUFs (bump key v2) + add model cache to Windows jobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SmolVLM vision model + mmproj were added to the model set but the GitHub Actions cache key (gguf-models-v1) was never bumped, so the cache — which only writes on a miss — kept restoring the pre-vision archive and re-downloaded ~600 MB of vision GGUFs from HuggingFace on every run. - Bump the shared key gguf-models-v1 -> gguf-models-v2 on the 4 caching jobs (linux + 3 macOS) so the next run repopulates the cache with the full current 8-model set including the vision pair. - Add the actions/cache step (same shared key) to the two Windows Java jobs (test-java-windows-x86_64 and -ninja), which previously had no cache at all, and add Test-Path skip guards to their downloads so a cache hit actually skips the curl (they were unconditional before). GGUF files are platform-independent, so all six Java-test jobs share one cache entry. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01WfCPxFtiVbUD7Y3C9drQEz --- .github/workflows/publish.yml | 58 ++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 75787c21..5e5e3ac3 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -771,9 +771,9 @@ jobs: uses: actions/cache@v5 with: path: models/ - # GGUF is platform-independent, so ubuntu + macOS share one entry; + # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; # bump the suffix when the model set / URLs change. - key: gguf-models-v1 + key: gguf-models-v2 - name: Download text generation model run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME} - name: Download reranking model @@ -908,9 +908,9 @@ jobs: uses: actions/cache@v5 with: path: models/ - # GGUF is platform-independent, so ubuntu + macOS share one entry; + # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; # bump the suffix when the model set / URLs change. - key: gguf-models-v1 + key: gguf-models-v2 - name: Download text generation model run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME} - name: Download reranking model @@ -988,9 +988,9 @@ jobs: uses: actions/cache@v5 with: path: models/ - # GGUF is platform-independent, so ubuntu + macOS share one entry; + # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; # bump the suffix when the model set / URLs change. - key: gguf-models-v1 + key: gguf-models-v2 - name: Download text generation model run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME} - name: Download reranking model @@ -1068,9 +1068,9 @@ jobs: uses: actions/cache@v5 with: path: models/ - # GGUF is platform-independent, so ubuntu + macOS share one entry; + # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; # bump the suffix when the model set / URLs change. - key: gguf-models-v1 + key: gguf-models-v2 - name: Download text generation model run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME} - name: Download reranking model @@ -1141,20 +1141,27 @@ jobs: with: name: Windows-x86_64-libraries path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace) + uses: actions/cache@v5 + with: + path: models/ + # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; + # bump the suffix when the model set / URLs change. + key: gguf-models-v2 - name: Download text generation model - run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME + run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME } - name: Download reranking model - run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME + run: if (-not (Test-Path "models/$env:RERANKING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME } - name: Download draft model - run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME + run: if (-not (Test-Path "models/$env:DRAFT_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME } - name: Download reasoning model - run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME + run: if (-not (Test-Path "models/$env:REASONING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME } - name: Download tool-calling model - run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME + run: if (-not (Test-Path "models/$env:TOOL_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME } - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34) - run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME + run: if (-not (Test-Path "models/$env:VISION_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME } - name: Download vision mmproj - run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME + run: if (-not (Test-Path "models/$env:VISION_MMPROJ_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME } - name: List files in models directory run: ls -l models/ - name: Validate model files @@ -1234,20 +1241,27 @@ jobs: with: name: Windows-x86_64-ninja path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ + - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace) + uses: actions/cache@v5 + with: + path: models/ + # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; + # bump the suffix when the model set / URLs change. + key: gguf-models-v2 - name: Download text generation model - run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME + run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME } - name: Download reranking model - run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME + run: if (-not (Test-Path "models/$env:RERANKING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME } - name: Download draft model - run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME + run: if (-not (Test-Path "models/$env:DRAFT_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME } - name: Download reasoning model - run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME + run: if (-not (Test-Path "models/$env:REASONING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME } - name: Download tool-calling model - run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME + run: if (-not (Test-Path "models/$env:TOOL_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME } - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34) - run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME + run: if (-not (Test-Path "models/$env:VISION_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME } - name: Download vision mmproj - run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME + run: if (-not (Test-Path "models/$env:VISION_MMPROJ_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME } - name: List files in models directory run: ls -l models/ - name: Validate model files From 19056b6d396d729452405f62e47aec78f5ff938d Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 20 Jun 2026 23:30:34 +0000 Subject: [PATCH 3/4] ci(cache): keep cache key gguf-models-v1 (stale entry deleted manually) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Revert the v1->v2 key bump: the old gguf-models-v1 cache (pre-vision contents) is being deleted by hand, so the next run misses on v1 and repopulates it with the full current model set (including the vision GGUFs) — no key change needed. The Windows cache steps + Test-Path skip guards and the "+ Windows" shared-entry comment are retained. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01WfCPxFtiVbUD7Y3C9drQEz --- .github/workflows/publish.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 5e5e3ac3..f3fb1c5f 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -773,7 +773,7 @@ jobs: path: models/ # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; # bump the suffix when the model set / URLs change. - key: gguf-models-v2 + key: gguf-models-v1 - name: Download text generation model run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME} - name: Download reranking model @@ -910,7 +910,7 @@ jobs: path: models/ # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; # bump the suffix when the model set / URLs change. - key: gguf-models-v2 + key: gguf-models-v1 - name: Download text generation model run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME} - name: Download reranking model @@ -990,7 +990,7 @@ jobs: path: models/ # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; # bump the suffix when the model set / URLs change. - key: gguf-models-v2 + key: gguf-models-v1 - name: Download text generation model run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME} - name: Download reranking model @@ -1070,7 +1070,7 @@ jobs: path: models/ # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; # bump the suffix when the model set / URLs change. - key: gguf-models-v2 + key: gguf-models-v1 - name: Download text generation model run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME} - name: Download reranking model @@ -1147,7 +1147,7 @@ jobs: path: models/ # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; # bump the suffix when the model set / URLs change. - key: gguf-models-v2 + key: gguf-models-v1 - name: Download text generation model run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME } - name: Download reranking model @@ -1247,7 +1247,7 @@ jobs: path: models/ # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; # bump the suffix when the model set / URLs change. - key: gguf-models-v2 + key: gguf-models-v1 - name: Download text generation model run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME } - name: Download reranking model From 7216799f0eb00490f761c9e2f31ea34b897d07ec Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 20 Jun 2026 23:39:04 +0000 Subject: [PATCH 4/4] style(test): clang-format (void) cast in ConfigureMultimodalTask EXPECT_THROW The two configure_multimodal_task_impl EXPECT_THROW tests merged from PR #249 used "(void) configure_..." with a space after the cast, which the pinned clang-format 22.1.5 rejects (wants "(void)configure_..."), reddening the C++ format CI check. Reformat with the pinned version; no other files drift. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01WfCPxFtiVbUD7Y3C9drQEz --- src/test/cpp/test_jni_helpers.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/cpp/test_jni_helpers.cpp b/src/test/cpp/test_jni_helpers.cpp index 367c03ca..211323bc 100644 --- a/src/test/cpp/test_jni_helpers.cpp +++ b/src/test/cpp/test_jni_helpers.cpp @@ -608,12 +608,12 @@ TEST(ConfigureMultimodalTask, MovesPromptAndMediaToCliTask) { TEST(ConfigureMultimodalTask, MediaWithoutProjectorThrows) { server_task task(SERVER_TASK_TYPE_COMPLETION); - EXPECT_THROW((void) configure_multimodal_task_impl(task, false, {{"prompt", "<__media__>"}}, {{0x01}}), + EXPECT_THROW((void)configure_multimodal_task_impl(task, false, {{"prompt", "<__media__>"}}, {{0x01}}), std::invalid_argument); } TEST(ConfigureMultimodalTask, NonStringPromptThrows) { server_task task(SERVER_TASK_TYPE_COMPLETION); - EXPECT_THROW((void) configure_multimodal_task_impl(task, true, {{"prompt", json::array({1, 2})}}, {{0x01}}), + EXPECT_THROW((void)configure_multimodal_task_impl(task, true, {{"prompt", json::array({1, 2})}}, {{0x01}}), std::invalid_argument); }