Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,52 @@ into `models/` out-of-band.
clang-format -i src/main/cpp/*.cpp src/main/cpp/*.hpp # Format C++ code
```

### Javadoc — must build cleanly before `mvn package`

The release packaging job runs `mvn package` with the `release` profile, which attaches
a javadoc jar via `maven-javadoc-plugin`. The plugin treats Javadoc tool **errors** as
build failures (warnings are tolerated). After changing any public/protected Java API,
verify the javadoc build succeeds locally:

```bash
mvn clean javadoc:jar -DskipTests=true -Dgpg.skip=true
# expected: BUILD SUCCESS
```

Common Javadoc errors that fail the build (not warnings):

- **Unbalanced HTML**: `</p>` without a matching `<p>`, mismatched `<ul>`/`<li>`, stray
closing tags. Symptom: `error: unexpected end tag: </p>`.
- **Invalid `{@link …}` targets**: typo'd class, method, or parameter name.
- **Self-closing void HTML elements written as `<br>` inside `<pre>` blocks** in HTML5
mode (rare but seen).

Common Javadoc *warnings* (do not fail the build, but should be cleaned up on new code):

- `no main description` — a doc comment containing only `@param`/`@return`/`@throws`
tags with no leading prose. Fix: add a one-line description before the tags.
- `no @return` / `no @param` — public method missing the tag. Fix: add it.
- `no comment` — public method/field/enum constant has no doc comment at all.
- `use of default constructor, which does not provide a comment` — public class with
no explicit constructor (the synthetic default has no Javadoc). Fix: add an explicit
no-arg constructor with a Javadoc comment.

Preferred doc-comment shapes for getters and small value types:

```java
/**
* Brief one-line description of the value.
*
* @return the value
*/
public T getThing() { ... }
```

A bare `/** @return … */` triggers `no main description`; add a leading sentence.

If the local check passes (`BUILD SUCCESS`), the `mvn package` job in
`.github/workflows/publish.yml` will pass the `attach-javadocs` step.

## Architecture

### Two-Layer Design
Expand Down
46 changes: 45 additions & 1 deletion src/main/cpp/jllama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -598,7 +598,26 @@
llama_backend_free();
}

JNIEXPORT void JNICALL Java_net_ladenthin_llama_LlamaModel_loadModel(JNIEnv *env, jobject obj, jobjectArray jparams) {
// Trampoline state for llama.cpp's load_progress_callback. The native loader runs
// on the calling JNI thread so we can capture JNIEnv directly. Lifetime is bounded
// by the single load_model_impl call.
namespace {
struct load_progress_ud {
JNIEnv *env;
jobject callback;
jmethodID on_progress;
};

bool jni_load_progress_trampoline(float progress, void *user_data) {

Check failure on line 611 in src/main/cpp/jllama.cpp

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Replace this use of "void *" with a more meaningful type.

See more on https://sonarcloud.io/project/issues?id=bernardladenthin_java-llama.cpp&issues=AZ5TsjMti-BUK6nuC9zn&open=AZ5TsjMti-BUK6nuC9zn&pullRequest=188
auto *ud = static_cast<load_progress_ud *>(user_data);
return ud->env->CallBooleanMethod(ud->callback, ud->on_progress, progress) == JNI_TRUE;
}
} // namespace

// Shared implementation of loadModel and loadModelWithProgress. When `progress` is
// non-null, installs a load-progress trampoline; otherwise behaves identically to
// the no-callback path.
static void load_model_impl(JNIEnv *env, jobject obj, jobjectArray jparams, jobject progress) {
common_params params;

const jsize argc = env->GetArrayLength(jparams);
Expand Down Expand Up @@ -662,6 +681,21 @@

LOG_INF("%s: loading model\n", __func__);

// Install the load-progress trampoline if the caller supplied a callback.
load_progress_ud progress_ud{};
if (progress != nullptr) {
jclass cb_cls = env->GetObjectClass(progress);
progress_ud.env = env;
progress_ud.callback = progress;
progress_ud.on_progress = env->GetMethodID(cb_cls, "onProgress", "(F)Z");
if (progress_ud.on_progress == nullptr) {
fail_load("LoadProgressCallback.onProgress(float) not found");
return;
}
params.load_progress_callback = jni_load_progress_trampoline;
params.load_progress_callback_user_data = &progress_ud;
}

if (!jctx->server.load_model(params)) {
fail_load("could not load model from given file path");
return;
Expand Down Expand Up @@ -706,6 +740,16 @@
env->SetLongField(obj, f_model_pointer, reinterpret_cast<jlong>(jctx));
}

JNIEXPORT void JNICALL Java_net_ladenthin_llama_LlamaModel_loadModel(JNIEnv *env, jobject obj, jobjectArray jparams) {
load_model_impl(env, obj, jparams, nullptr);
}

JNIEXPORT void JNICALL Java_net_ladenthin_llama_LlamaModel_loadModelWithProgress(JNIEnv *env, jobject obj,
jobjectArray jparams,
jobject callback) {
load_model_impl(env, obj, jparams, callback);
}

JNIEXPORT jstring JNICALL Java_net_ladenthin_llama_LlamaModel_getModelMetaJson(JNIEnv *env, jobject obj) {
REQUIRE_SERVER_CONTEXT(nullptr);
if (jctx->vocab_only) {
Expand Down
52 changes: 52 additions & 0 deletions src/main/java/net/ladenthin/llama/CancellationToken.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
//
// SPDX-License-Identifier: MIT

package net.ladenthin.llama;

/**
* Cancellation handle for a blocking {@link LlamaModel} call. Pass an instance to
* {@link LlamaModel#complete(InferenceParameters, CancellationToken)} and invoke
* {@link #cancel()} from another thread to abort the inference loop.
* <p>
* Cancellation is cooperative: {@link #cancel()} only sets a flag, and the inference
* loop checks that flag between generated tokens. Effective latency is therefore one
* token interval (typically tens to a few hundred ms). The native task is <em>not</em>
* unblocked mid-token because the underlying JNI reader cannot be safely freed while
* another thread is blocked inside it.
* </p>
* <p>
* A token may be reused across calls. {@link #cancel()} and {@link #isCancelled()} are
* safe to invoke concurrently with the inference loop.
* </p>
*/
public final class CancellationToken {

private volatile boolean cancelled;

/** Construct a fresh, not-cancelled token. */
public CancellationToken() {
// empty
}

/**
* Cancellation flag accessor.
* @return {@code true} once {@link #cancel()} has been called and before {@link #reset()}
*/
public boolean isCancelled() {
return cancelled;
}

/**
* Request cancellation. Sets the flag observed by the inference loop; the loop will
* return at its next token boundary. Idempotent and safe to call from any thread.
*/
public void cancel() {
cancelled = true;
}

/** Clear the cancelled flag so the token can be reused. Package-private. */
void reset() {
cancelled = false;
}
}
53 changes: 53 additions & 0 deletions src/main/java/net/ladenthin/llama/ChatChoice.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
//
// SPDX-License-Identifier: MIT

package net.ladenthin.llama;

/**
* One choice in a chat completion response: the assistant message and the finish reason.
* Mirrors the OpenAI {@code choices[i]} object.
*/
public final class ChatChoice {

private final int index;
private final ChatMessage message;
private final String finishReason;

/**
* Construct a chat choice.
*
* @param index the index in the choices array
* @param message the assistant's message for this choice
* @param finishReason the finish reason (e.g. {@code "stop"}, {@code "length"}, {@code "tool_calls"})
*/
public ChatChoice(int index, ChatMessage message, String finishReason) {
this.index = index;
this.message = message;
this.finishReason = finishReason;
}

/**
* Choice index.
* @return the integer index in the choices array
*/
public int getIndex() {
return index;
}

/**
* Assistant message accessor.
* @return the assistant's reply (may include tool_calls)
*/
public ChatMessage getMessage() {
return message;
}

/**
* Finish reason accessor.
* @return the OAI finish reason string, or {@code ""} if absent
*/
public String getFinishReason() {
return finishReason;
}
}
113 changes: 113 additions & 0 deletions src/main/java/net/ladenthin/llama/ChatMessage.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
//
// SPDX-License-Identifier: MIT

package net.ladenthin.llama;

import java.util.Collections;
import java.util.List;

/**
* A single message in a chat conversation: a role ({@code "user"}, {@code "assistant"},
* {@code "system"}, or {@code "tool"}) and its textual content. Used by {@link Session}
* to accumulate conversation turns and by {@link ChatRequest} / {@link ChatResponse}
* for the typed chat API.
* <p>
* Tool-call turns have role {@code "assistant"}, possibly empty content, and a non-empty
* {@link #getToolCalls()} list. Tool-result turns have role {@code "tool"}, the tool's
* output as content, and {@link #getToolCallId()} pointing back at the originating call.
* </p>
*/
public final class ChatMessage {

private final String role;
private final String content;
private final String toolCallId;
private final List<ToolCall> toolCalls;

/**
* Plain user/assistant/system message.
*
* @param role the message role
* @param content the message text
*/
public ChatMessage(String role, String content) {
this(role, content, null, Collections.<ToolCall>emptyList());
}

/**
* Full constructor including tool-related fields.
*
* @param role the message role
* @param content the message text (may be empty for assistant tool-call turns)
* @param toolCallId for tool-result turns ({@code role="tool"}), the id of the originating call; {@code null} otherwise
* @param toolCalls for assistant tool-call turns, the list of calls; empty otherwise
*/
public ChatMessage(String role, String content, String toolCallId, List<ToolCall> toolCalls) {
this.role = role;
this.content = content;
this.toolCallId = toolCallId;
this.toolCalls = toolCalls == null ? Collections.<ToolCall>emptyList() : toolCalls;
}

/**
* Factory for a tool-result turn.
*
* @param toolCallId the id of the originating tool call
* @param content the tool's output as a string
* @return a {@link ChatMessage} with role {@code "tool"}
*/
public static ChatMessage toolResult(String toolCallId, String content) {
return new ChatMessage("tool", content, toolCallId, Collections.<ToolCall>emptyList());
}

/**
* Factory for an assistant turn that issues tool calls.
*
* @param content optional reasoning text accompanying the tool calls (may be empty)
* @param toolCalls the tool calls to issue
* @return a {@link ChatMessage} with role {@code "assistant"}
*/
public static ChatMessage assistantToolCalls(String content, List<ToolCall> toolCalls) {
return new ChatMessage("assistant", content == null ? "" : content, null, toolCalls);
}

/**
* Message role accessor.
* @return the message role string
*/
public String getRole() {
return role;
}

/**
* Message content accessor.
* @return the message text content
*/
public String getContent() {
return content;
}

/**
* Tool-call id for tool-result turns.
* @return the originating tool call id, or {@code null} for non-tool messages
*/
public String getToolCallId() {
return toolCallId;
}

/**
* Tool calls issued by an assistant turn.
* @return the calls list, never {@code null}; empty when the message is not a tool-call turn
*/
public List<ToolCall> getToolCalls() {
return toolCalls;
}

@Override
public String toString() {
if (!toolCalls.isEmpty()) return role + " (tool_calls=" + toolCalls.size() + "): " + content;
if (toolCallId != null) return role + " (tool_call_id=" + toolCallId + "): " + content;
return role + ": " + content;
}
}
Loading
Loading