Skip to content

Commit 3f5c140

Browse files
Merge pull request bernardladenthin#250 from bernardladenthin/claude/dazzling-clarke-9rzf6r
Add Windows model caching and fix issue reference formatting
2 parents 7a67d95 + 7216799 commit 3f5c140

6 files changed

Lines changed: 46 additions & 32 deletions

File tree

.github/workflows/publish.yml

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ env:
3333
TOOL_MODEL_NAME: "Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
3434
NOMIC_EMBED_MODEL_URL: "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF/resolve/main/nomic-embed-text-v1.5.f16.gguf"
3535
NOMIC_EMBED_MODEL_NAME: "nomic-embed-text-v1.5.f16.gguf"
36-
# Vision model + mmproj for MultimodalIntegrationTest (issues #103 / #34).
36+
# Vision model + mmproj for MultimodalIntegrationTest (upstream kherud/java-llama.cpp#103 / #34).
3737
# SmolVLM-500M is the smallest community vision GGUF that loads reliably
3838
# under the upstream mtmd pipeline. Total download ~600 MB across model
3939
# plus mmproj; matches the existing per-test-job download budget.
@@ -771,7 +771,7 @@ jobs:
771771
uses: actions/cache@v5
772772
with:
773773
path: models/
774-
# GGUF is platform-independent, so ubuntu + macOS share one entry;
774+
# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
775775
# bump the suffix when the model set / URLs change.
776776
key: gguf-models-v1
777777
- name: Download text generation model
@@ -786,7 +786,7 @@ jobs:
786786
run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
787787
- name: Download nomic embedding model (issue #98 regression)
788788
run: test -f models/${NOMIC_EMBED_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${NOMIC_EMBED_MODEL_URL} --create-dirs -o models/${NOMIC_EMBED_MODEL_NAME}
789-
- name: Download vision model (issues #103 / #34)
789+
- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
790790
run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
791791
- name: Download vision mmproj
792792
run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
@@ -908,7 +908,7 @@ jobs:
908908
uses: actions/cache@v5
909909
with:
910910
path: models/
911-
# GGUF is platform-independent, so ubuntu + macOS share one entry;
911+
# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
912912
# bump the suffix when the model set / URLs change.
913913
key: gguf-models-v1
914914
- name: Download text generation model
@@ -921,7 +921,7 @@ jobs:
921921
run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
922922
- name: Download tool-calling model
923923
run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
924-
- name: Download vision model (issues #103 / #34)
924+
- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
925925
run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
926926
- name: Download vision mmproj
927927
run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
@@ -988,7 +988,7 @@ jobs:
988988
uses: actions/cache@v5
989989
with:
990990
path: models/
991-
# GGUF is platform-independent, so ubuntu + macOS share one entry;
991+
# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
992992
# bump the suffix when the model set / URLs change.
993993
key: gguf-models-v1
994994
- name: Download text generation model
@@ -1001,7 +1001,7 @@ jobs:
10011001
run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
10021002
- name: Download tool-calling model
10031003
run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
1004-
- name: Download vision model (issues #103 / #34)
1004+
- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
10051005
run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
10061006
- name: Download vision mmproj
10071007
run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
@@ -1068,7 +1068,7 @@ jobs:
10681068
uses: actions/cache@v5
10691069
with:
10701070
path: models/
1071-
# GGUF is platform-independent, so ubuntu + macOS share one entry;
1071+
# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
10721072
# bump the suffix when the model set / URLs change.
10731073
key: gguf-models-v1
10741074
- name: Download text generation model
@@ -1081,7 +1081,7 @@ jobs:
10811081
run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
10821082
- name: Download tool-calling model
10831083
run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
1084-
- name: Download vision model (issues #103 / #34)
1084+
- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
10851085
run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
10861086
- name: Download vision mmproj
10871087
run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
@@ -1141,20 +1141,27 @@ jobs:
11411141
with:
11421142
name: Windows-x86_64-libraries
11431143
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
1144+
- name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
1145+
uses: actions/cache@v5
1146+
with:
1147+
path: models/
1148+
# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
1149+
# bump the suffix when the model set / URLs change.
1150+
key: gguf-models-v1
11441151
- name: Download text generation model
1145-
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME
1152+
run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME }
11461153
- name: Download reranking model
1147-
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME
1154+
run: if (-not (Test-Path "models/$env:RERANKING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME }
11481155
- name: Download draft model
1149-
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME
1156+
run: if (-not (Test-Path "models/$env:DRAFT_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME }
11501157
- name: Download reasoning model
1151-
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME
1158+
run: if (-not (Test-Path "models/$env:REASONING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME }
11521159
- name: Download tool-calling model
1153-
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME
1154-
- name: Download vision model (issues #103 / #34)
1155-
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME
1160+
run: if (-not (Test-Path "models/$env:TOOL_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME }
1161+
- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
1162+
run: if (-not (Test-Path "models/$env:VISION_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME }
11561163
- name: Download vision mmproj
1157-
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME
1164+
run: if (-not (Test-Path "models/$env:VISION_MMPROJ_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME }
11581165
- name: List files in models directory
11591166
run: ls -l models/
11601167
- name: Validate model files
@@ -1234,20 +1241,27 @@ jobs:
12341241
with:
12351242
name: Windows-x86_64-ninja
12361243
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
1244+
- name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
1245+
uses: actions/cache@v5
1246+
with:
1247+
path: models/
1248+
# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
1249+
# bump the suffix when the model set / URLs change.
1250+
key: gguf-models-v1
12371251
- name: Download text generation model
1238-
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME
1252+
run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME }
12391253
- name: Download reranking model
1240-
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME
1254+
run: if (-not (Test-Path "models/$env:RERANKING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME }
12411255
- name: Download draft model
1242-
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME
1256+
run: if (-not (Test-Path "models/$env:DRAFT_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME }
12431257
- name: Download reasoning model
1244-
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME
1258+
run: if (-not (Test-Path "models/$env:REASONING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME }
12451259
- name: Download tool-calling model
1246-
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME
1247-
- name: Download vision model (issues #103 / #34)
1248-
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME
1260+
run: if (-not (Test-Path "models/$env:TOOL_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME }
1261+
- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
1262+
run: if (-not (Test-Path "models/$env:VISION_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME }
12491263
- name: Download vision mmproj
1250-
run: curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME
1264+
run: if (-not (Test-Path "models/$env:VISION_MMPROJ_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME }
12511265
- name: List files in models directory
12521266
run: ls -l models/
12531267
- name: Validate model files

CLAUDE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,7 @@ the README. The summary below covers only the optional-model bindings:
555555
| Property | Default test that uses it | Model |
556556
|----------|---------------------------|-------|
557557
| `net.ladenthin.llama.nomic.path` | `LlamaEmbeddingsTest#testNomicEmbedLoads` | `nomic-embed-text-v1.5.f16.gguf` (issue #98 regression) |
558-
| `net.ladenthin.llama.vision.model` | `MultimodalIntegrationTest` (closes #103 / #34) | `SmolVLM-500M-Instruct-Q8_0.gguf` (any vision-capable GGUF works) |
558+
| `net.ladenthin.llama.vision.model` | `MultimodalIntegrationTest` (upstream kherud/java-llama.cpp#103 / #34) | `SmolVLM-500M-Instruct-Q8_0.gguf` (any vision-capable GGUF works) |
559559
| `net.ladenthin.llama.vision.mmproj` | `MultimodalIntegrationTest` | matching mmproj for the vision model, e.g. `mmproj-SmolVLM-500M-Instruct-Q8_0.gguf` |
560560
| `net.ladenthin.llama.vision.image` | `MultimodalIntegrationTest` | committed default `src/test/resources/images/test-image.jpg`; override to any png/jpeg/webp/gif on disk |
561561

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ Every `net.ladenthin.llama.*` system property recognised by the library, deep-sc
276276
| `net.ladenthin.llama.test.ngl` | `43` for the general suite; `0` for `ToolCallingIntegrationTest` | test | Model-backed integration tests | Number of GPU layers used during testing. Pin to `0` on CPU-only hosts: `mvn test -Dnet.ladenthin.llama.test.ngl=0`. The tool test also selects device `none` at zero layers so Metal/CUDA is not initialized. |
277277
| `net.ladenthin.llama.tool.model` | `models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf` (test self-skips if missing) | test | `ToolCallingIntegrationTest` | Path to a tool-capable GGUF used to verify required blocking and streaming tool calls. The default matches the Qwen2.5 model in upstream llama.cpp's tool-call test matrix. |
278278
| `net.ladenthin.llama.nomic.path` | unset (test self-skips) | test | `LlamaEmbeddingsTest#testNomicEmbedLoads` | Path to a Nomic embedding model (`nomic-embed-text-v1.5.f16.gguf` or a compatible BERT-family encoder). Regression test for upstream issue #98 (BERT-encoder `result_output` assertion). |
279-
| `net.ladenthin.llama.vision.model` | unset (test self-skips) | test | `MultimodalIntegrationTest` (closes #103 / #34) | Path to a vision-capable model GGUF. Any vision-capable GGUF works; CI default is `SmolVLM-500M-Instruct-Q8_0.gguf`. |
279+
| `net.ladenthin.llama.vision.model` | unset (test self-skips) | test | `MultimodalIntegrationTest` (upstream kherud/java-llama.cpp#103 / #34) | Path to a vision-capable model GGUF. Any vision-capable GGUF works; CI default is `SmolVLM-500M-Instruct-Q8_0.gguf`. |
280280
| `net.ladenthin.llama.vision.mmproj` | unset (test self-skips) | test | `MultimodalIntegrationTest` | Matching mmproj GGUF for the vision model. |
281281
| `net.ladenthin.llama.vision.image` | `src/test/resources/images/test-image.jpg` (a CC-BY-4.0 / MIT-granted photo committed to the repo) | test | `MultimodalIntegrationTest` | Visual prompt image. Any png/jpeg/webp/gif works; the extension drives MIME detection. |
282282

src/test/cpp/test_jni_helpers.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -608,12 +608,12 @@ TEST(ConfigureMultimodalTask, MovesPromptAndMediaToCliTask) {
608608

609609
TEST(ConfigureMultimodalTask, MediaWithoutProjectorThrows) {
610610
server_task task(SERVER_TASK_TYPE_COMPLETION);
611-
EXPECT_THROW((void) configure_multimodal_task_impl(task, false, {{"prompt", "<__media__>"}}, {{0x01}}),
611+
EXPECT_THROW((void)configure_multimodal_task_impl(task, false, {{"prompt", "<__media__>"}}, {{0x01}}),
612612
std::invalid_argument);
613613
}
614614

615615
TEST(ConfigureMultimodalTask, NonStringPromptThrows) {
616616
server_task task(SERVER_TASK_TYPE_COMPLETION);
617-
EXPECT_THROW((void) configure_multimodal_task_impl(task, true, {{"prompt", json::array({1, 2})}}, {{0x01}}),
617+
EXPECT_THROW((void)configure_multimodal_task_impl(task, true, {{"prompt", json::array({1, 2})}}, {{0x01}}),
618618
std::invalid_argument);
619619
}

src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,13 @@
5959
* author. Any image the test machine can reach works at runtime &#x2014; the
6060
* URL is just an env var.</p>
6161
*
62-
* <p>Closes issues #103 and #34.</p>
62+
* <p>Implements the upstream vision feature requests kherud/java-llama.cpp#103 and #34.</p>
6363
*/
6464
@ClaudeGenerated(
6565
purpose = "End-to-end vision regression: real vision GGUF + mmproj + author-licensed (MIT) "
6666
+ "test image fed through the typed ChatMessage(role, List<ContentPart>) API; "
6767
+ "asserts non-empty reply to prove the OAI multipart content round-trips through "
68-
+ "the upstream mtmd pipeline. Closes #103 / #34.")
68+
+ "the upstream mtmd pipeline. Implements upstream kherud/java-llama.cpp#103 / #34.")
6969
public class MultimodalIntegrationTest {
7070

7171
private static final ObjectMapper MAPPER = new ObjectMapper();

src/test/java/net/ladenthin/llama/TestConstants.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ public class TestConstants {
4646

4747
/**
4848
* System property holding a path to a vision-capable model GGUF. Consumed by
49-
* {@code MultimodalIntegrationTest} (closes #103 / #34). The CI default is the
49+
* {@code MultimodalIntegrationTest} (upstream kherud/java-llama.cpp#103 / #34). The CI default is the
5050
* SmolVLM-500M Q8_0 GGUF; the test self-skips when the property is unset or
5151
* the file is missing.
5252
*/

0 commit comments

Comments
 (0)