bernardladenthin · bernardladenthin · May 23, 2026 · May 22, 2026 · May 22, 2026 · May 22, 2026
@@ -514,6 +514,52 @@ into `models/` out-of-band.
 clang-format -i src/main/cpp/*.cpp src/main/cpp/*.hpp   # Format C++ code
 ```
 
+### Javadoc — must build cleanly before `mvn package`
+
+The release packaging job runs `mvn package` with the `release` profile, which attaches
+a javadoc jar via `maven-javadoc-plugin`. The plugin treats Javadoc tool **errors** as
+build failures (warnings are tolerated). After changing any public/protected Java API,
+verify the javadoc build succeeds locally:
+
+```bash
+mvn clean javadoc:jar -DskipTests=true -Dgpg.skip=true
+# expected: BUILD SUCCESS
+```
+
+Common Javadoc errors that fail the build (not warnings):
+
+- **Unbalanced HTML**: `</p>` without a matching `<p>`, mismatched `<ul>`/`<li>`, stray
+  closing tags. Symptom: `error: unexpected end tag: </p>`.
+- **Invalid `{@link …}` targets**: typo'd class, method, or parameter name.
+- **Self-closing void HTML elements written as `<br>` inside `<pre>` blocks** in HTML5
+  mode (rare but seen).
+
+Common Javadoc *warnings* (do not fail the build, but should be cleaned up on new code):
+
+- `no main description` — a doc comment containing only `@param`/`@return`/`@throws`
+  tags with no leading prose. Fix: add a one-line description before the tags.
+- `no @return` / `no @param` — public method missing the tag. Fix: add it.
+- `no comment` — public method/field/enum constant has no doc comment at all.
+- `use of default constructor, which does not provide a comment` — public class with
+  no explicit constructor (the synthetic default has no Javadoc). Fix: add an explicit
+  no-arg constructor with a Javadoc comment.
+
+Preferred doc-comment shapes for getters and small value types:
+
+```java
+/**
+ * Brief one-line description of the value.
+ *
+ * @return the value
+ */
+public T getThing() { ... }
+```
+
+A bare `/** @return … */` triggers `no main description`; add a leading sentence.
+
+If the local check passes (`BUILD SUCCESS`), the `mvn package` job in
+`.github/workflows/publish.yml` will pass the `attach-javadocs` step.
+
 ## Architecture
 
 ### Two-Layer Design

@@ -598,7 +598,26 @@
     llama_backend_free();
 }
 
-JNIEXPORT void JNICALL Java_net_ladenthin_llama_LlamaModel_loadModel(JNIEnv *env, jobject obj, jobjectArray jparams) {
+// Trampoline state for llama.cpp's load_progress_callback. The native loader runs
+// on the calling JNI thread so we can capture JNIEnv directly. Lifetime is bounded
+// by the single load_model_impl call.
+namespace {
+struct load_progress_ud {
+    JNIEnv  *env;
+    jobject  callback;
+    jmethodID on_progress;
+};
+
+bool jni_load_progress_trampoline(float progress, void *user_data) {
+    auto *ud = static_cast<load_progress_ud *>(user_data);
+    return ud->env->CallBooleanMethod(ud->callback, ud->on_progress, progress) == JNI_TRUE;
+}
+} // namespace
+
+// Shared implementation of loadModel and loadModelWithProgress. When `progress` is
+// non-null, installs a load-progress trampoline; otherwise behaves identically to
+// the no-callback path.
+static void load_model_impl(JNIEnv *env, jobject obj, jobjectArray jparams, jobject progress) {
     common_params params;
 
     const jsize argc = env->GetArrayLength(jparams);
@@ -662,6 +681,21 @@
 
     LOG_INF("%s: loading model\n", __func__);
 
+    // Install the load-progress trampoline if the caller supplied a callback.
+    load_progress_ud progress_ud{};
+    if (progress != nullptr) {
+        jclass cb_cls = env->GetObjectClass(progress);
+        progress_ud.env         = env;
+        progress_ud.callback    = progress;
+        progress_ud.on_progress = env->GetMethodID(cb_cls, "onProgress", "(F)Z");
+        if (progress_ud.on_progress == nullptr) {
+            fail_load("LoadProgressCallback.onProgress(float) not found");
+            return;
+        }
+        params.load_progress_callback           = jni_load_progress_trampoline;
+        params.load_progress_callback_user_data = &progress_ud;
+    }
+
     if (!jctx->server.load_model(params)) {
         fail_load("could not load model from given file path");
         return;
@@ -706,6 +740,16 @@
     env->SetLongField(obj, f_model_pointer, reinterpret_cast<jlong>(jctx));
 }
 
+JNIEXPORT void JNICALL Java_net_ladenthin_llama_LlamaModel_loadModel(JNIEnv *env, jobject obj, jobjectArray jparams) {
+    load_model_impl(env, obj, jparams, nullptr);
+}
+
+JNIEXPORT void JNICALL Java_net_ladenthin_llama_LlamaModel_loadModelWithProgress(JNIEnv *env, jobject obj,
+                                                                              jobjectArray jparams,
+                                                                              jobject       callback) {
+    load_model_impl(env, obj, jparams, callback);
+}
+
 JNIEXPORT jstring JNICALL Java_net_ladenthin_llama_LlamaModel_getModelMetaJson(JNIEnv *env, jobject obj) {
     REQUIRE_SERVER_CONTEXT(nullptr);
     if (jctx->vocab_only) {

@@ -0,0 +1,52 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama;
+
+/**
+ * Cancellation handle for a blocking {@link LlamaModel} call. Pass an instance to
+ * {@link LlamaModel#complete(InferenceParameters, CancellationToken)} and invoke
+ * {@link #cancel()} from another thread to abort the inference loop.
+ * <p>
+ * Cancellation is cooperative: {@link #cancel()} only sets a flag, and the inference
+ * loop checks that flag between generated tokens. Effective latency is therefore one
+ * token interval (typically tens to a few hundred ms). The native task is <em>not</em>
+ * unblocked mid-token because the underlying JNI reader cannot be safely freed while
+ * another thread is blocked inside it.
+ * </p>
+ * <p>
+ * A token may be reused across calls. {@link #cancel()} and {@link #isCancelled()} are
+ * safe to invoke concurrently with the inference loop.
+ * </p>
+ */
+public final class CancellationToken {
+
+    private volatile boolean cancelled;
+
+    /** Construct a fresh, not-cancelled token. */
+    public CancellationToken() {
+        // empty
+    }
+
+    /**
+     * Cancellation flag accessor.
+     * @return {@code true} once {@link #cancel()} has been called and before {@link #reset()}
+     */
+    public boolean isCancelled() {
+        return cancelled;
+    }
+
+    /**
+     * Request cancellation. Sets the flag observed by the inference loop; the loop will
+     * return at its next token boundary. Idempotent and safe to call from any thread.
+     */
+    public void cancel() {
+        cancelled = true;
+    }
+
+    /** Clear the cancelled flag so the token can be reused. Package-private. */
+    void reset() {
+        cancelled = false;
+    }
+}
@@ -0,0 +1,53 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama;
+
+/**
+ * One choice in a chat completion response: the assistant message and the finish reason.
+ * Mirrors the OpenAI {@code choices[i]} object.
+ */
+public final class ChatChoice {
+
+    private final int index;
+    private final ChatMessage message;
+    private final String finishReason;
+
+    /**
+     * Construct a chat choice.
+     *
+     * @param index        the index in the choices array
+     * @param message      the assistant's message for this choice
+     * @param finishReason the finish reason (e.g. {@code "stop"}, {@code "length"}, {@code "tool_calls"})
+     */
+    public ChatChoice(int index, ChatMessage message, String finishReason) {
+        this.index = index;
+        this.message = message;
+        this.finishReason = finishReason;
+    }
+
+    /**
+     * Choice index.
+     * @return the integer index in the choices array
+     */
+    public int getIndex() {
+        return index;
+    }
+
+    /**
+     * Assistant message accessor.
+     * @return the assistant's reply (may include tool_calls)
+     */
+    public ChatMessage getMessage() {
+        return message;
+    }
+
+    /**
+     * Finish reason accessor.
+     * @return the OAI finish reason string, or {@code ""} if absent
+     */
+    public String getFinishReason() {
+        return finishReason;
+    }
+}
@@ -0,0 +1,113 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama;
+
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * A single message in a chat conversation: a role ({@code "user"}, {@code "assistant"},
+ * {@code "system"}, or {@code "tool"}) and its textual content. Used by {@link Session}
+ * to accumulate conversation turns and by {@link ChatRequest} / {@link ChatResponse}
+ * for the typed chat API.
+ * <p>
+ * Tool-call turns have role {@code "assistant"}, possibly empty content, and a non-empty
+ * {@link #getToolCalls()} list. Tool-result turns have role {@code "tool"}, the tool's
+ * output as content, and {@link #getToolCallId()} pointing back at the originating call.
+ * </p>
+ */
+public final class ChatMessage {
+
+    private final String role;
+    private final String content;
+    private final String toolCallId;
+    private final List<ToolCall> toolCalls;
+
+    /**
+     * Plain user/assistant/system message.
+     *
+     * @param role    the message role
+     * @param content the message text
+     */
+    public ChatMessage(String role, String content) {
+        this(role, content, null, Collections.<ToolCall>emptyList());
+    }
+
+    /**
+     * Full constructor including tool-related fields.
+     *
+     * @param role       the message role
+     * @param content    the message text (may be empty for assistant tool-call turns)
+     * @param toolCallId for tool-result turns ({@code role="tool"}), the id of the originating call; {@code null} otherwise
+     * @param toolCalls  for assistant tool-call turns, the list of calls; empty otherwise
+     */
+    public ChatMessage(String role, String content, String toolCallId, List<ToolCall> toolCalls) {
+        this.role = role;
+        this.content = content;
+        this.toolCallId = toolCallId;
+        this.toolCalls = toolCalls == null ? Collections.<ToolCall>emptyList() : toolCalls;
+    }
+
+    /**
+     * Factory for a tool-result turn.
+     *
+     * @param toolCallId the id of the originating tool call
+     * @param content    the tool's output as a string
+     * @return a {@link ChatMessage} with role {@code "tool"}
+     */
+    public static ChatMessage toolResult(String toolCallId, String content) {
+        return new ChatMessage("tool", content, toolCallId, Collections.<ToolCall>emptyList());
+    }
+
+    /**
+     * Factory for an assistant turn that issues tool calls.
+     *
+     * @param content   optional reasoning text accompanying the tool calls (may be empty)
+     * @param toolCalls the tool calls to issue
+     * @return a {@link ChatMessage} with role {@code "assistant"}
+     */
+    public static ChatMessage assistantToolCalls(String content, List<ToolCall> toolCalls) {
+        return new ChatMessage("assistant", content == null ? "" : content, null, toolCalls);
+    }
+
+    /**
+     * Message role accessor.
+     * @return the message role string
+     */
+    public String getRole() {
+        return role;
+    }
+
+    /**
+     * Message content accessor.
+     * @return the message text content
+     */
+    public String getContent() {
+        return content;
+    }
+
+    /**
+     * Tool-call id for tool-result turns.
+     * @return the originating tool call id, or {@code null} for non-tool messages
+     */
+    public String getToolCallId() {
+        return toolCallId;
+    }
+
+    /**
+     * Tool calls issued by an assistant turn.
+     * @return the calls list, never {@code null}; empty when the message is not a tool-call turn
+     */
+    public List<ToolCall> getToolCalls() {
+        return toolCalls;
+    }
+
+    @Override
+    public String toString() {
+        if (!toolCalls.isEmpty()) return role + " (tool_calls=" + toolCalls.size() + "): " + content;
+        if (toolCallId != null)   return role + " (tool_call_id=" + toolCallId + "): " + content;
+        return role + ": " + content;
+    }
+}