Add completeWithStats() for typed Usage/Timings/logprobs on plain completion

claude · claude · commit c52949903284 · 2026-05-23T07:37:51.000Z
complete() returned only the generated text, while chat() already exposed Usage/Timings/TokenLogprob via ChatResponse. This commit parity-fills the plain completion path: - New CompletionResult value type (text + Usage + Timings + List<TokenLogprob> + StopReason + raw JSON). - New LlamaModel.completeWithStats(InferenceParameters) calling the existing non-streaming JNI path and parsing the response via a new CompletionResponseParser.parseCompletionResult. - Maps the non-OAI completion fields: content -> text, tokens_evaluated -> Usage.promptTokens, tokens_predicted -> Usage.completionTokens, timings sub-object -> Timings, completion_probabilities -> List<TokenLogprob>, stop_type -> StopReason. complete() (the String-returning overload) is unchanged for backwards compatibility. 5 unit tests in CompletionResultTest (model-free): full response, missing-fields defaults, stop reason mapping (eos / limit / word), malformed input. mvn javadoc:jar BUILD SUCCESS, no new warnings. https://claude.ai/code/session_01R4ZrEy3ptJDLuUgUKuM4Gy
diff --git a/src/main/java/net/ladenthin/llama/CompletionResult.java b/src/main/java/net/ladenthin/llama/CompletionResult.java
@@ -0,0 +1,100 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama;
+
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Typed result of {@link LlamaModel#completeWithStats(InferenceParameters)}.
+ * <p>
+ * Bundles the generated text with parsed {@link Usage}, {@link Timings},
+ * per-token {@link TokenLogprob} entries (populated only when
+ * {@link InferenceParameters#setNProbs(int)} &gt; 0), and the {@link StopReason}.
+ * The raw native JSON is exposed via {@link #getRawJson()} as an escape hatch.
+ * </p>
+ */
+public final class CompletionResult {
+
+    private final String text;
+    private final Usage usage;
+    private final Timings timings;
+    private final List<TokenLogprob> logprobs;
+    private final StopReason stopReason;
+    private final String rawJson;
+
+    /**
+     * Construct a completion result.
+     *
+     * @param text       the generated text
+     * @param usage      parsed prompt/completion token counts
+     * @param timings    parsed result timings
+     * @param logprobs   typed per-token logprob entries (empty when {@code n_probs} was not requested)
+     * @param stopReason the parsed stop reason
+     * @param rawJson    the raw native JSON string
+     */
+    public CompletionResult(String text, Usage usage, Timings timings,
+                            List<TokenLogprob> logprobs, StopReason stopReason, String rawJson) {
+        this.text = text;
+        this.usage = usage;
+        this.timings = timings;
+        this.logprobs = logprobs == null ? Collections.<TokenLogprob>emptyList() : logprobs;
+        this.stopReason = stopReason;
+        this.rawJson = rawJson;
+    }
+
+    /**
+     * Generated text accessor.
+     * @return the generated text string
+     */
+    public String getText() {
+        return text;
+    }
+
+    /**
+     * Token-count usage accessor.
+     * @return parsed {@link Usage} (prompt + completion token counts)
+     */
+    public Usage getUsage() {
+        return usage;
+    }
+
+    /**
+     * Timings accessor.
+     * @return parsed {@link Timings} for this completion
+     */
+    public Timings getTimings() {
+        return timings;
+    }
+
+    /**
+     * Per-token logprob entries.
+     * @return list of {@link TokenLogprob}; empty when {@code n_probs} was not requested
+     */
+    public List<TokenLogprob> getLogprobs() {
+        return logprobs;
+    }
+
+    /**
+     * Stop reason accessor.
+     * @return the {@link StopReason} parsed from {@code stop_type}
+     */
+    public StopReason getStopReason() {
+        return stopReason;
+    }
+
+    /**
+     * Raw JSON passthrough.
+     * @return the native response JSON string
+     */
+    public String getRawJson() {
+        return rawJson;
+    }
+
+    @Override
+    public String toString() {
+        return text;
+    }
+}
diff --git a/src/main/java/net/ladenthin/llama/LlamaModel.java b/src/main/java/net/ladenthin/llama/LlamaModel.java
@@ -90,6 +90,23 @@ public String complete(InferenceParameters parameters) {
 		return completionParser.parse(json).text;
 	}
 
+	/**
+	 * Typed variant of {@link #complete(InferenceParameters)} that surfaces per-completion
+	 * {@link Usage}, {@link Timings}, {@link TokenLogprob} entries, and {@link StopReason}.
+	 * <p>
+	 * Logprobs are populated only when {@link InferenceParameters#setNProbs(int)} is &gt; 0.
+	 * The raw native JSON is preserved on {@link CompletionResult#getRawJson()}.
+	 *
+	 * @param parameters the inference configuration
+	 * @return a populated {@link CompletionResult}
+	 */
+	public CompletionResult completeWithStats(InferenceParameters parameters) {
+		parameters.setStream(false);
+		int taskId = requestCompletion(parameters.toString());
+		String json = receiveCompletionJson(taskId);
+		return completionParser.parseCompletionResult(json);
+	}
+
 	/**
 	 * Cancellable variant of {@link #complete(InferenceParameters)}. Runs in streaming mode
 	 * internally so the inference loop can observe a {@link CancellationToken#cancel()} call
diff --git a/src/main/java/net/ladenthin/llama/json/CompletionResponseParser.java b/src/main/java/net/ladenthin/llama/json/CompletionResponseParser.java
@@ -7,10 +7,13 @@
 
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import net.ladenthin.llama.CompletionResult;
 import net.ladenthin.llama.InferenceParameters;
 import net.ladenthin.llama.LlamaOutput;
 import net.ladenthin.llama.StopReason;
+import net.ladenthin.llama.Timings;
 import net.ladenthin.llama.TokenLogprob;
+import net.ladenthin.llama.Usage;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -159,6 +162,35 @@ public List<TokenLogprob> parseLogprobs(JsonNode root) {
         return result;
     }
 
+    /**
+     * Parse a {@link CompletionResult} from the non-streaming, non-OAI completion JSON
+     * emitted by {@code server_task_result_cmpl_final::to_json_non_oaicompat}.
+     * <p>
+     * Maps {@code content} → text, {@code tokens_evaluated}/{@code tokens_predicted} →
+     * {@link Usage}, the {@code timings} sub-object → {@link Timings},
+     * {@code completion_probabilities} → {@link TokenLogprob} list, and
+     * {@code stop_type} → {@link StopReason}.
+     *
+     * @param json raw JSON string from the native completion response
+     * @return a populated {@link CompletionResult}; fields default to empty/zero on parse failure
+     */
+    public CompletionResult parseCompletionResult(String json) {
+        try {
+            JsonNode node = OBJECT_MAPPER.readTree(json);
+            String text = extractContent(node);
+            Usage usage = new Usage(
+                    node.path("tokens_evaluated").asLong(0L),
+                    node.path("tokens_predicted").asLong(0L));
+            Timings timings = Timings.fromJson(node.path("timings"));
+            List<TokenLogprob> logprobs = parseLogprobs(node);
+            StopReason stopReason = StopReason.fromStopType(node.path("stop_type").asText(""));
+            return new CompletionResult(text, usage, timings, logprobs, stopReason, json);
+        } catch (IOException e) {
+            return new CompletionResult("", new Usage(0L, 0L), Timings.fromJson(null),
+                    Collections.<TokenLogprob>emptyList(), StopReason.NONE, json);
+        }
+    }
+
     private TokenLogprob parseLogprobEntry(JsonNode entry) {
         String token = entry.path("token").asText("");
         int tokenId = entry.path("id").asInt(-1);
diff --git a/src/test/java/net/ladenthin/llama/CompletionResultTest.java b/src/test/java/net/ladenthin/llama/CompletionResultTest.java
@@ -0,0 +1,88 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama;
+
+import net.ladenthin.llama.json.CompletionResponseParser;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+@ClaudeGenerated(
+        purpose = "Verify CompletionResponseParser.parseCompletionResult maps the non-OAI completion JSON "
+                + "(content + tokens_evaluated/predicted + timings + completion_probabilities + stop_type) "
+                + "into a typed CompletionResult, and handles malformed input gracefully."
+)
+public class CompletionResultTest {
+
+    private final CompletionResponseParser parser = new CompletionResponseParser();
+
+    @Test
+    public void parsesFullResponse() {
+        String json = "{\"content\":\"hello world\",\"tokens_evaluated\":12,\"tokens_predicted\":5,"
+                + "\"stop\":true,\"stop_type\":\"eos\","
+                + "\"timings\":{\"prompt_n\":12,\"prompt_ms\":200.0,\"prompt_per_second\":60.0,"
+                + "\"predicted_n\":5,\"predicted_ms\":50.0,\"predicted_per_second\":100.0,"
+                + "\"cache_n\":3},"
+                + "\"completion_probabilities\":["
+                + "{\"token\":\"hello\",\"id\":15043,\"prob\":0.9,"
+                + "\"top_probs\":[{\"token\":\"hi\",\"id\":9932,\"prob\":0.05}]}]}";
+
+        CompletionResult r = parser.parseCompletionResult(json);
+        assertEquals("hello world", r.getText());
+        assertEquals(12L, r.getUsage().getPromptTokens());
+        assertEquals(5L,  r.getUsage().getCompletionTokens());
+        assertEquals(17L, r.getUsage().getTotalTokens());
+        assertEquals(12, r.getTimings().getPromptN());
+        assertEquals(3, r.getTimings().getCacheN());
+        assertEquals(100.0, r.getTimings().getPredictedPerSecond(), 1e-9);
+        assertEquals(StopReason.EOS, r.getStopReason());
+
+        assertEquals(1, r.getLogprobs().size());
+        TokenLogprob lp = r.getLogprobs().get(0);
+        assertEquals("hello", lp.getToken());
+        assertEquals(15043, lp.getTokenId());
+        assertEquals(0.9f, lp.getLogprob(), 1e-4f);
+        assertEquals(1, lp.getTopLogprobs().size());
+
+        assertNotNull(r.getRawJson());
+    }
+
+    @Test
+    public void missingFieldsDefaultToZero() {
+        CompletionResult r = parser.parseCompletionResult("{\"content\":\"hi\",\"stop\":true,\"stop_type\":\"eos\"}");
+        assertEquals("hi", r.getText());
+        assertEquals(0L, r.getUsage().getTotalTokens());
+        assertEquals(0, r.getTimings().getPromptN());
+        assertTrue(r.getLogprobs().isEmpty());
+        assertEquals(StopReason.EOS, r.getStopReason());
+    }
+
+    @Test
+    public void stopReasonLimit() {
+        CompletionResult r = parser.parseCompletionResult(
+                "{\"content\":\"\",\"stop\":true,\"stop_type\":\"limit\",\"truncated\":true,"
+                + "\"tokens_evaluated\":1,\"tokens_predicted\":10}");
+        assertEquals(StopReason.MAX_TOKENS, r.getStopReason());
+        assertEquals(10L, r.getUsage().getCompletionTokens());
+    }
+
+    @Test
+    public void stopReasonStopString() {
+        CompletionResult r = parser.parseCompletionResult(
+                "{\"content\":\"abc\",\"stop\":true,\"stop_type\":\"word\",\"stopping_word\":\"END\"}");
+        assertEquals(StopReason.STOP_STRING, r.getStopReason());
+    }
+
+    @Test
+    public void malformedInputYieldsEmptyResult() {
+        CompletionResult r = parser.parseCompletionResult("{not json");
+        assertEquals("", r.getText());
+        assertEquals(0L, r.getUsage().getTotalTokens());
+        assertEquals(StopReason.NONE, r.getStopReason());
+        assertTrue(r.getLogprobs().isEmpty());
+    }
+}