Add LoadProgressCallback for model-load progress (#113)

claude · claude · commit 70df324f6087 · 2026-05-23T07:16:54.000Z
Exposes llama.cpp's llama_model_params.progress_callback as a Java functional interface. New constructor: new LlamaModel(parameters, progress -> { ... return true; }); The callback receives a float in [0.0, 1.0] on the loader thread (same thread that called the constructor) and may return false to abort, in which case the constructor throws LlamaException. JNI: extracts the existing loadModel body into load_model_impl, adds a trampoline that forwards float progress to a Java LoadProgressCallback.onProgress(float)Z via CallBooleanMethod. Trampoline state lives on the loader stack — bounded lifetime is the single load call. Two native entry points share the implementation: loadModel(String[]) — unchanged signature loadModelWithProgress(String[], LoadProgressCallback) Tests in LoadProgressCallbackTest (model-gated): non-decreasing progress in [0,1] reaching ~1.0, returning false aborts with LlamaException, null callback overload delegates to plain loadModel. All 435 C++ unit tests still pass. mvn javadoc:jar BUILD SUCCESS. https://claude.ai/code/session_01R4ZrEy3ptJDLuUgUKuM4Gy
diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
@@ -598,7 +598,26 @@ JNIEXPORT void JNICALL JNI_OnUnload(JavaVM *vm, void *reserved) {
     llama_backend_free();
 }
 
-JNIEXPORT void JNICALL Java_net_ladenthin_llama_LlamaModel_loadModel(JNIEnv *env, jobject obj, jobjectArray jparams) {
+// Trampoline state for llama.cpp's load_progress_callback. The native loader runs
+// on the calling JNI thread so we can capture JNIEnv directly. Lifetime is bounded
+// by the single load_model_impl call.
+namespace {
+struct load_progress_ud {
+    JNIEnv  *env;
+    jobject  callback;
+    jmethodID on_progress;
+};
+
+bool jni_load_progress_trampoline(float progress, void *user_data) {
+    auto *ud = static_cast<load_progress_ud *>(user_data);
+    return ud->env->CallBooleanMethod(ud->callback, ud->on_progress, progress) == JNI_TRUE;
+}
+} // namespace
+
+// Shared implementation of loadModel and loadModelWithProgress. When `progress` is
+// non-null, installs a load-progress trampoline; otherwise behaves identically to
+// the no-callback path.
+static void load_model_impl(JNIEnv *env, jobject obj, jobjectArray jparams, jobject progress) {
     common_params params;
 
     const jsize argc = env->GetArrayLength(jparams);
@@ -662,6 +681,21 @@ JNIEXPORT void JNICALL Java_net_ladenthin_llama_LlamaModel_loadModel(JNIEnv *env
 
     LOG_INF("%s: loading model\n", __func__);
 
+    // Install the load-progress trampoline if the caller supplied a callback.
+    load_progress_ud progress_ud{};
+    if (progress != nullptr) {
+        jclass cb_cls = env->GetObjectClass(progress);
+        progress_ud.env         = env;
+        progress_ud.callback    = progress;
+        progress_ud.on_progress = env->GetMethodID(cb_cls, "onProgress", "(F)Z");
+        if (progress_ud.on_progress == nullptr) {
+            fail_load("LoadProgressCallback.onProgress(float) not found");
+            return;
+        }
+        params.load_progress_callback           = jni_load_progress_trampoline;
+        params.load_progress_callback_user_data = &progress_ud;
+    }
+
     if (!jctx->server.load_model(params)) {
         fail_load("could not load model from given file path");
         return;
@@ -706,6 +740,16 @@ JNIEXPORT void JNICALL Java_net_ladenthin_llama_LlamaModel_loadModel(JNIEnv *env
     env->SetLongField(obj, f_model_pointer, reinterpret_cast<jlong>(jctx));
 }
 
+JNIEXPORT void JNICALL Java_net_ladenthin_llama_LlamaModel_loadModel(JNIEnv *env, jobject obj, jobjectArray jparams) {
+    load_model_impl(env, obj, jparams, nullptr);
+}
+
+JNIEXPORT void JNICALL Java_net_ladenthin_llama_LlamaModel_loadModelWithProgress(JNIEnv *env, jobject obj,
+                                                                              jobjectArray jparams,
+                                                                              jobject       callback) {
+    load_model_impl(env, obj, jparams, callback);
+}
+
 JNIEXPORT jstring JNICALL Java_net_ladenthin_llama_LlamaModel_getModelMetaJson(JNIEnv *env, jobject obj) {
     REQUIRE_SERVER_CONTEXT(nullptr);
     if (jctx->vocab_only) {
diff --git a/src/main/java/net/ladenthin/llama/LlamaModel.java b/src/main/java/net/ladenthin/llama/LlamaModel.java
@@ -58,6 +58,24 @@ public LlamaModel(ModelParameters parameters) {
 		loadModel(parameters.toArray());
 	}
 
+	/**
+	 * Load the model and forward progress updates to {@code progress}. The callback is
+	 * invoked synchronously on the constructor thread by the native loader and may
+	 * return {@code false} to abort the load (in which case this constructor throws
+	 * {@link LlamaException}).
+	 *
+	 * @param parameters the set of options
+	 * @param progress   load progress sink; {@code null} disables the callback
+	 * @throws LlamaException if loading fails or the callback aborts
+	 */
+	public LlamaModel(ModelParameters parameters, LoadProgressCallback progress) {
+		if (progress == null) {
+			loadModel(parameters.toArray());
+		} else {
+			loadModelWithProgress(parameters.toArray(), progress);
+		}
+	}
+
 	/**
 	 * Generate and return a whole answer with custom parameters. Note, that the prompt isn't preprocessed in any
 	 * way, nothing like "User: ", "###Instruction", etc. is added.
@@ -257,6 +275,8 @@ public void close() {
 
 	private native void loadModel(String... parameters) throws LlamaException;
 
+	private native void loadModelWithProgress(String[] parameters, LoadProgressCallback callback) throws LlamaException;
+
 	private native void delete();
 	
 	native void releaseTask(int taskId);
diff --git a/src/main/java/net/ladenthin/llama/LoadProgressCallback.java b/src/main/java/net/ladenthin/llama/LoadProgressCallback.java
@@ -0,0 +1,30 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama;
+
+/**
+ * Receives model-load progress updates from the native loader.
+ * <p>
+ * Pass an instance to {@link LlamaModel#LlamaModel(ModelParameters, LoadProgressCallback)}
+ * to observe the {@code llama_model_params.progress_callback} hook from llama.cpp. The
+ * callback is invoked synchronously on the loader thread (the same thread that called
+ * the constructor) with a value in {@code [0.0, 1.0]}.
+ * </p>
+ * <p>
+ * Return {@code false} to abort the load. When {@code false} is returned, the constructor
+ * throws {@link LlamaException} because the native loader aborts and reports failure.
+ * </p>
+ */
+@FunctionalInterface
+public interface LoadProgressCallback {
+
+    /**
+     * Receive a progress update.
+     *
+     * @param progress fraction in {@code [0.0, 1.0]}
+     * @return {@code true} to continue loading, {@code false} to abort
+     */
+    boolean onProgress(float progress);
+}
diff --git a/src/test/java/net/ladenthin/llama/LoadProgressCallbackTest.java b/src/test/java/net/ladenthin/llama/LoadProgressCallbackTest.java
@@ -0,0 +1,93 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama;
+
+import org.junit.Assume;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+@ClaudeGenerated(
+        purpose = "Verify LoadProgressCallback receives non-decreasing progress values in [0,1] "
+                + "during a real model load, and that returning false from the callback aborts the load."
+)
+public class LoadProgressCallbackTest {
+
+    @Test
+    public void receivesProgressUpdates() {
+        Assume.assumeTrue("Model file not found", new java.io.File(TestConstants.MODEL_PATH).exists());
+
+        List<Float> updates = new ArrayList<Float>();
+        int gpuLayers = Integer.getInteger(TestConstants.PROP_TEST_NGL, TestConstants.DEFAULT_TEST_NGL);
+
+        try (LlamaModel m = new LlamaModel(
+                new ModelParameters()
+                        .setCtxSize(128)
+                        .setModel(TestConstants.MODEL_PATH)
+                        .setGpuLayers(gpuLayers)
+                        .setFit(false),
+                progress -> {
+                    updates.add(progress);
+                    return true;
+                })) {
+            // model load completed
+        }
+
+        assertFalse("expected at least one progress update", updates.isEmpty());
+        for (Float p : updates) {
+            assertTrue("progress out of range: " + p, p >= 0.0f && p <= 1.0f);
+        }
+        // Last update should reach (or be very close to) 1.0
+        assertTrue("last progress should reach completion, got " + updates.get(updates.size() - 1),
+                updates.get(updates.size() - 1) >= 0.9f);
+        // Non-decreasing
+        for (int i = 1; i < updates.size(); i++) {
+            assertTrue("progress decreased at index " + i + ": " + updates.get(i - 1) + " -> " + updates.get(i),
+                    updates.get(i) >= updates.get(i - 1));
+        }
+        // Sanity: progress actually advanced
+        assertNotEquals("progress never advanced", updates.get(0), updates.get(updates.size() - 1));
+    }
+
+    @Test
+    public void returningFalseAbortsLoad() {
+        Assume.assumeTrue("Model file not found", new java.io.File(TestConstants.MODEL_PATH).exists());
+
+        int gpuLayers = Integer.getInteger(TestConstants.PROP_TEST_NGL, TestConstants.DEFAULT_TEST_NGL);
+        try {
+            new LlamaModel(
+                    new ModelParameters()
+                            .setCtxSize(128)
+                            .setModel(TestConstants.MODEL_PATH)
+                            .setGpuLayers(gpuLayers)
+                            .setFit(false),
+                    progress -> false).close();
+            fail("expected LlamaException when callback aborts load");
+        } catch (LlamaException expected) {
+            // pass
+        }
+    }
+
+    @Test
+    public void nullCallbackBehavesAsDefault() {
+        Assume.assumeTrue("Model file not found", new java.io.File(TestConstants.MODEL_PATH).exists());
+        int gpuLayers = Integer.getInteger(TestConstants.PROP_TEST_NGL, TestConstants.DEFAULT_TEST_NGL);
+        try (LlamaModel m = new LlamaModel(
+                new ModelParameters()
+                        .setCtxSize(128)
+                        .setModel(TestConstants.MODEL_PATH)
+                        .setGpuLayers(gpuLayers)
+                        .setFit(false),
+                null)) {
+            // no callback wired; just verifies the null-overload routes to plain loadModel
+        }
+    }
+}