llama-langchain4j: model-backed integration tests reusing the shared GGUF cache

claude · claude · commit e7cbc5e2487d · 2026-07-01T11:35:24.000Z
Adds end-to-end coverage for the embedding and scoring adapters (previously only chat had an integration test) and wires a CI job that exercises all four adapters against the models the pipeline already caches — no new model, no duplicate download logic. - New JllamaEmbeddingModelIntegrationTest / JllamaScoringModelIntegrationTest, self-skipping via -Dnet.ladenthin.llama.langchain4j.embedding.model and .rerank.model (mirrors the existing chat integration test). Module now: 7 mapping tests + 4 model-backed integration tests (self-skip without a GGUF). - New test-java-llama-langchain4j-integration job reuses the existing shared cache (gguf-models-v1, restore-only) and the Linux-x86_64-libraries native artifact. It runs after test-java-linux-x86_64 (which populates the cache), installs the core jar with the bundled native lib, and points the adapters at the already-cached chat (Qwen3-0.6B), nomic-embedding and jina-reranker models. Validation-only (not a release gate); a cold cache degrades to a self-skip. - README + CLAUDE.md document the per-adapter model properties and the cache reuse. Verified locally: module builds green, 7 mapping tests pass, 4 integration tests self-skip without models. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01Rt1paYztGJ2AKUuBuAGDXE
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -132,6 +132,49 @@ jobs:
       - name: Build and test llama-langchain4j
         run: mvn -B --no-transfer-progress -f llama-langchain4j/pom.xml verify
 
+  # ---------------------------------------------------------------------------
+  # Model-backed integration for the langchain4j adapters. Reuses the SAME shared GGUF
+  # cache and the SAME Linux-x86_64 native artifact the core Java jobs already use — no
+  # extra model download and no duplicated download logic (restore-only cache, no curl
+  # steps). Runs after test-java-linux-x86_64 so the shared cache is guaranteed populated
+  # (that job saves it), then exercises the chat / embedding / scoring adapters against
+  # the already-cached chat (Qwen3-0.6B), nomic-embedding and jina-reranker models. The
+  # model-backed tests self-skip when a model is absent, so a cold cache degrades to a
+  # skip, never a failure.
+  # ---------------------------------------------------------------------------
+
+  test-java-llama-langchain4j-integration:
+    name: Integration Test llama-langchain4j (model-backed)
+    needs: [crosscompile-linux-x86_64, test-java-linux-x86_64]
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v7
+      - name: Download Linux x86_64 native library (reused, not rebuilt)
+        uses: actions/download-artifact@v8
+        with:
+          name: Linux-x86_64-libraries
+          path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
+      - name: Restore shared GGUF model cache (restore-only; populated by the Java test jobs)
+        uses: actions/cache@v6
+        with:
+          path: models/
+          key: gguf-models-v1
+      - uses: actions/setup-java@v5
+        with:
+          distribution: 'temurin'
+          java-version: ${{ env.JAVA_VERSION }}
+      - name: Install core net.ladenthin:llama jar (bundles the downloaded native library)
+        run: >
+          mvn -B --no-transfer-progress -DskipTests -Denforcer.skip=true
+          -Dspotless.check.skip=true -Dspotbugs.skip=true
+          -Dmaven.javadoc.skip=true -Dmaven.source.skip=true -Dgpg.skip=true install
+      - name: Run llama-langchain4j model-backed integration tests (reused cached models)
+        run: >
+          mvn -B --no-transfer-progress -f llama-langchain4j/pom.xml test
+          -Dnet.ladenthin.llama.model.path=models/${REASONING_MODEL_NAME}
+          -Dnet.ladenthin.llama.langchain4j.embedding.model=models/${NOMIC_EMBED_MODEL_NAME}
+          -Dnet.ladenthin.llama.langchain4j.rerank.model=models/${RERANKING_MODEL_NAME}
+
   # ---------------------------------------------------------------------------
   # Build the llama.cpp WebUI ONCE, from the same pinned tag CMakeLists.txt fetches,
   # and share it to every native build as the generated, platform-independent
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -1247,12 +1247,19 @@ Wiring:
 2. **`.github/workflows/publish.yml`** — the `test-java-llama-langchain4j` job installs the
    core Java jar, runs a **version-lockstep guard** (module version must equal core version,
    else the build fails — the standalone module can't inherit `${project.version}` from a
-   reactor), then `mvn -f llama-langchain4j/pom.xml verify` (mapping unit tests run; the
-   model-backed `JllamaChatModelIntegrationTest` self-skips without a GGUF; `verify` also
-   builds the javadoc jar so a release-time javadoc break is caught in PR CI). The
+   reactor), then `mvn -f llama-langchain4j/pom.xml verify` (7 model-free mapping unit tests
+   run; the 4 model-backed integration tests self-skip without a GGUF; `verify` also builds the
+   javadoc jar so a release-time javadoc break is caught in PR CI). The
    `publish-snapshot`/`publish-release` jobs `needs:` this job and, after the core `deploy`
    (which installs the core jar locally), run a second `deploy` of the module at the same
-   version.
+   version. A separate **`test-java-llama-langchain4j-integration`** job runs the model-backed
+   tests (chat/streaming/embedding/scoring adapters) by **reusing** the shared GGUF cache
+   (`gguf-models-v1`, restore-only — no extra download) and the `Linux-x86_64-libraries` native
+   artifact: it runs after `test-java-linux-x86_64` (which populates the cache), installs the
+   core jar with the downloaded native lib bundled, and passes the already-cached chat
+   (`REASONING_MODEL_NAME`), nomic-embedding and jina-reranker model paths via the module's
+   `-Dnet.ladenthin.llama.langchain4j.{embedding,rerank}.model` / `net.ladenthin.llama.model.path`
+   properties. It is validation-only (not a release gate); a cold cache degrades to a self-skip.
 3. **Version bumps** — when the root `pom.xml` `<version>` changes, bump
    `llama-langchain4j/pom.xml` `<version>` to match in the same commit, or the lockstep guard
    reds CI.
diff --git a/llama-langchain4j/README.md b/llama-langchain4j/README.md
@@ -83,12 +83,24 @@ cd llama-langchain4j
 mvn test
 ```
 
-The end-to-end test (`JllamaChatModelIntegrationTest`) self-skips unless you pass a model:
+The model-backed integration tests self-skip unless you point them at a GGUF. Each adapter has
+its own property so you can run them independently (a chat/instruct model, an embedding-mode model,
+and a reranking-mode model respectively):
 
 ```bash
-mvn test -Dnet.ladenthin.llama.model.path=/abs/path/to/model.gguf
+# chat + streaming (JllamaChatModelIntegrationTest)
+mvn test -Dnet.ladenthin.llama.model.path=/abs/path/to/chat.gguf
+# embeddings (JllamaEmbeddingModelIntegrationTest)
+mvn test -Dnet.ladenthin.llama.langchain4j.embedding.model=/abs/path/to/embedding.gguf
+# re-ranking / scoring (JllamaScoringModelIntegrationTest)
+mvn test -Dnet.ladenthin.llama.langchain4j.rerank.model=/abs/path/to/reranker.gguf
 ```
 
+In CI these reuse the project's existing shared GGUF cache (the chat, nomic-embedding and
+jina-reranker models the core test jobs already download) — the
+`test-java-llama-langchain4j-integration` job restores that cache and the
+`Linux-x86_64` native library artifact, so no extra model is downloaded.
+
 ## Not mapped yet
 
 - **Tool calling.** `ChatRequest.toolSpecifications()` are not forwarded, so the chat adapters return
diff --git a/llama-langchain4j/src/test/java/net/ladenthin/llama/langchain4j/JllamaEmbeddingModelIntegrationTest.java b/llama-langchain4j/src/test/java/net/ladenthin/llama/langchain4j/JllamaEmbeddingModelIntegrationTest.java
@@ -0,0 +1,58 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama.langchain4j;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.notNullValue;
+
+import dev.langchain4j.data.embedding.Embedding;
+import dev.langchain4j.data.segment.TextSegment;
+import dev.langchain4j.model.output.Response;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.List;
+import net.ladenthin.llama.LlamaModel;
+import net.ladenthin.llama.parameters.ModelParameters;
+import org.junit.jupiter.api.Assumptions;
+import org.junit.jupiter.api.Test;
+
+/**
+ * End-to-end smoke test for {@link JllamaEmbeddingModel} over a real embedding model. Self-skips
+ * unless {@code -Dnet.ladenthin.llama.langchain4j.embedding.model=/abs/path/to/embedding.gguf}
+ * points at a GGUF loadable in embedding mode (and the native library is present), mirroring the
+ * core project's model-gated tests. CI reuses the already-cached nomic embedding model, so no extra
+ * download is introduced.
+ */
+class JllamaEmbeddingModelIntegrationTest {
+
+    private static Path modelPath() {
+        String path = System.getProperty("net.ladenthin.llama.langchain4j.embedding.model");
+        Assumptions.assumeTrue(path != null && !path.isEmpty(), "embedding model path property not set");
+        Path resolved = Paths.get(path);
+        Assumptions.assumeTrue(Files.exists(resolved), "embedding model file not present: " + resolved);
+        return resolved;
+    }
+
+    @Test
+    void embedsAllSegmentsInInputOrder() {
+        Path model = modelPath();
+        try (LlamaModel llama =
+                new LlamaModel(new ModelParameters().setModel(model.toString()).enableEmbedding())) {
+            JllamaEmbeddingModel embeddingModel = new JllamaEmbeddingModel(llama);
+
+            List<TextSegment> segments =
+                    Arrays.asList(TextSegment.from("hello world"), TextSegment.from("goodbye world"));
+            Response<List<Embedding>> response = embeddingModel.embedAll(segments);
+
+            assertThat(response, is(notNullValue()));
+            assertThat(response.content().size(), is(2));
+            assertThat(response.content().get(0).vector().length, is(greaterThan(0)));
+        }
+    }
+}
diff --git a/llama-langchain4j/src/test/java/net/ladenthin/llama/langchain4j/JllamaScoringModelIntegrationTest.java b/llama-langchain4j/src/test/java/net/ladenthin/llama/langchain4j/JllamaScoringModelIntegrationTest.java
@@ -0,0 +1,57 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama.langchain4j;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.notNullValue;
+
+import dev.langchain4j.data.segment.TextSegment;
+import dev.langchain4j.model.output.Response;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.List;
+import net.ladenthin.llama.LlamaModel;
+import net.ladenthin.llama.parameters.ModelParameters;
+import org.junit.jupiter.api.Assumptions;
+import org.junit.jupiter.api.Test;
+
+/**
+ * End-to-end smoke test for {@link JllamaScoringModel} (re-ranker) over a real reranking model.
+ * Self-skips unless {@code -Dnet.ladenthin.llama.langchain4j.rerank.model=/abs/path/to/reranker.gguf}
+ * points at a GGUF loadable in reranking mode (and the native library is present), mirroring the
+ * core project's model-gated tests. CI reuses the already-cached jina reranker model, so no extra
+ * download is introduced.
+ */
+class JllamaScoringModelIntegrationTest {
+
+    private static Path modelPath() {
+        String path = System.getProperty("net.ladenthin.llama.langchain4j.rerank.model");
+        Assumptions.assumeTrue(path != null && !path.isEmpty(), "rerank model path property not set");
+        Path resolved = Paths.get(path);
+        Assumptions.assumeTrue(Files.exists(resolved), "rerank model file not present: " + resolved);
+        return resolved;
+    }
+
+    @Test
+    void scoresEverySegmentInInputOrder() {
+        Path model = modelPath();
+        try (LlamaModel llama =
+                new LlamaModel(new ModelParameters().setModel(model.toString()).enableReranking())) {
+            JllamaScoringModel scoringModel = new JllamaScoringModel(llama);
+
+            List<TextSegment> segments =
+                    Arrays.asList(
+                            TextSegment.from("A cat sat on the mat."),
+                            TextSegment.from("The stock market fell today."));
+            Response<List<Double>> response = scoringModel.scoreAll(segments, "domestic pets");
+
+            assertThat(response, is(notNullValue()));
+            assertThat(response.content().size(), is(2));
+        }
+    }
+}