Skip to content

Commit c529499

Browse files
committed
Add completeWithStats() for typed Usage/Timings/logprobs on plain completion
complete() returned only the generated text, while chat() already exposed Usage/Timings/TokenLogprob via ChatResponse. This commit parity-fills the plain completion path: - New CompletionResult value type (text + Usage + Timings + List<TokenLogprob> + StopReason + raw JSON). - New LlamaModel.completeWithStats(InferenceParameters) calling the existing non-streaming JNI path and parsing the response via a new CompletionResponseParser.parseCompletionResult. - Maps the non-OAI completion fields: content -> text, tokens_evaluated -> Usage.promptTokens, tokens_predicted -> Usage.completionTokens, timings sub-object -> Timings, completion_probabilities -> List<TokenLogprob>, stop_type -> StopReason. complete() (the String-returning overload) is unchanged for backwards compatibility. 5 unit tests in CompletionResultTest (model-free): full response, missing-fields defaults, stop reason mapping (eos / limit / word), malformed input. mvn javadoc:jar BUILD SUCCESS, no new warnings. https://claude.ai/code/session_01R4ZrEy3ptJDLuUgUKuM4Gy
1 parent f2c7ed1 commit c529499

4 files changed

Lines changed: 237 additions & 0 deletions

File tree

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
2+
//
3+
// SPDX-License-Identifier: MIT
4+
5+
package net.ladenthin.llama;
6+
7+
import java.util.Collections;
8+
import java.util.List;
9+
10+
/**
11+
* Typed result of {@link LlamaModel#completeWithStats(InferenceParameters)}.
12+
* <p>
13+
* Bundles the generated text with parsed {@link Usage}, {@link Timings},
14+
* per-token {@link TokenLogprob} entries (populated only when
15+
* {@link InferenceParameters#setNProbs(int)} &gt; 0), and the {@link StopReason}.
16+
* The raw native JSON is exposed via {@link #getRawJson()} as an escape hatch.
17+
* </p>
18+
*/
19+
public final class CompletionResult {
20+
21+
private final String text;
22+
private final Usage usage;
23+
private final Timings timings;
24+
private final List<TokenLogprob> logprobs;
25+
private final StopReason stopReason;
26+
private final String rawJson;
27+
28+
/**
29+
* Construct a completion result.
30+
*
31+
* @param text the generated text
32+
* @param usage parsed prompt/completion token counts
33+
* @param timings parsed result timings
34+
* @param logprobs typed per-token logprob entries (empty when {@code n_probs} was not requested)
35+
* @param stopReason the parsed stop reason
36+
* @param rawJson the raw native JSON string
37+
*/
38+
public CompletionResult(String text, Usage usage, Timings timings,
39+
List<TokenLogprob> logprobs, StopReason stopReason, String rawJson) {
40+
this.text = text;
41+
this.usage = usage;
42+
this.timings = timings;
43+
this.logprobs = logprobs == null ? Collections.<TokenLogprob>emptyList() : logprobs;
44+
this.stopReason = stopReason;
45+
this.rawJson = rawJson;
46+
}
47+
48+
/**
49+
* Generated text accessor.
50+
* @return the generated text string
51+
*/
52+
public String getText() {
53+
return text;
54+
}
55+
56+
/**
57+
* Token-count usage accessor.
58+
* @return parsed {@link Usage} (prompt + completion token counts)
59+
*/
60+
public Usage getUsage() {
61+
return usage;
62+
}
63+
64+
/**
65+
* Timings accessor.
66+
* @return parsed {@link Timings} for this completion
67+
*/
68+
public Timings getTimings() {
69+
return timings;
70+
}
71+
72+
/**
73+
* Per-token logprob entries.
74+
* @return list of {@link TokenLogprob}; empty when {@code n_probs} was not requested
75+
*/
76+
public List<TokenLogprob> getLogprobs() {
77+
return logprobs;
78+
}
79+
80+
/**
81+
* Stop reason accessor.
82+
* @return the {@link StopReason} parsed from {@code stop_type}
83+
*/
84+
public StopReason getStopReason() {
85+
return stopReason;
86+
}
87+
88+
/**
89+
* Raw JSON passthrough.
90+
* @return the native response JSON string
91+
*/
92+
public String getRawJson() {
93+
return rawJson;
94+
}
95+
96+
@Override
97+
public String toString() {
98+
return text;
99+
}
100+
}

src/main/java/net/ladenthin/llama/LlamaModel.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,23 @@ public String complete(InferenceParameters parameters) {
9090
return completionParser.parse(json).text;
9191
}
9292

93+
/**
94+
* Typed variant of {@link #complete(InferenceParameters)} that surfaces per-completion
95+
* {@link Usage}, {@link Timings}, {@link TokenLogprob} entries, and {@link StopReason}.
96+
* <p>
97+
* Logprobs are populated only when {@link InferenceParameters#setNProbs(int)} is &gt; 0.
98+
* The raw native JSON is preserved on {@link CompletionResult#getRawJson()}.
99+
*
100+
* @param parameters the inference configuration
101+
* @return a populated {@link CompletionResult}
102+
*/
103+
public CompletionResult completeWithStats(InferenceParameters parameters) {
104+
parameters.setStream(false);
105+
int taskId = requestCompletion(parameters.toString());
106+
String json = receiveCompletionJson(taskId);
107+
return completionParser.parseCompletionResult(json);
108+
}
109+
93110
/**
94111
* Cancellable variant of {@link #complete(InferenceParameters)}. Runs in streaming mode
95112
* internally so the inference loop can observe a {@link CancellationToken#cancel()} call

src/main/java/net/ladenthin/llama/json/CompletionResponseParser.java

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@
77

88
import com.fasterxml.jackson.databind.JsonNode;
99
import com.fasterxml.jackson.databind.ObjectMapper;
10+
import net.ladenthin.llama.CompletionResult;
1011
import net.ladenthin.llama.InferenceParameters;
1112
import net.ladenthin.llama.LlamaOutput;
1213
import net.ladenthin.llama.StopReason;
14+
import net.ladenthin.llama.Timings;
1315
import net.ladenthin.llama.TokenLogprob;
16+
import net.ladenthin.llama.Usage;
1417

1518
import java.io.IOException;
1619
import java.util.ArrayList;
@@ -159,6 +162,35 @@ public List<TokenLogprob> parseLogprobs(JsonNode root) {
159162
return result;
160163
}
161164

165+
/**
166+
* Parse a {@link CompletionResult} from the non-streaming, non-OAI completion JSON
167+
* emitted by {@code server_task_result_cmpl_final::to_json_non_oaicompat}.
168+
* <p>
169+
* Maps {@code content} → text, {@code tokens_evaluated}/{@code tokens_predicted} →
170+
* {@link Usage}, the {@code timings} sub-object → {@link Timings},
171+
* {@code completion_probabilities} → {@link TokenLogprob} list, and
172+
* {@code stop_type} → {@link StopReason}.
173+
*
174+
* @param json raw JSON string from the native completion response
175+
* @return a populated {@link CompletionResult}; fields default to empty/zero on parse failure
176+
*/
177+
public CompletionResult parseCompletionResult(String json) {
178+
try {
179+
JsonNode node = OBJECT_MAPPER.readTree(json);
180+
String text = extractContent(node);
181+
Usage usage = new Usage(
182+
node.path("tokens_evaluated").asLong(0L),
183+
node.path("tokens_predicted").asLong(0L));
184+
Timings timings = Timings.fromJson(node.path("timings"));
185+
List<TokenLogprob> logprobs = parseLogprobs(node);
186+
StopReason stopReason = StopReason.fromStopType(node.path("stop_type").asText(""));
187+
return new CompletionResult(text, usage, timings, logprobs, stopReason, json);
188+
} catch (IOException e) {
189+
return new CompletionResult("", new Usage(0L, 0L), Timings.fromJson(null),
190+
Collections.<TokenLogprob>emptyList(), StopReason.NONE, json);
191+
}
192+
}
193+
162194
private TokenLogprob parseLogprobEntry(JsonNode entry) {
163195
String token = entry.path("token").asText("");
164196
int tokenId = entry.path("id").asInt(-1);
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
2+
//
3+
// SPDX-License-Identifier: MIT
4+
5+
package net.ladenthin.llama;
6+
7+
import net.ladenthin.llama.json.CompletionResponseParser;
8+
import org.junit.Test;
9+
10+
import static org.junit.Assert.assertEquals;
11+
import static org.junit.Assert.assertNotNull;
12+
import static org.junit.Assert.assertTrue;
13+
14+
@ClaudeGenerated(
15+
purpose = "Verify CompletionResponseParser.parseCompletionResult maps the non-OAI completion JSON "
16+
+ "(content + tokens_evaluated/predicted + timings + completion_probabilities + stop_type) "
17+
+ "into a typed CompletionResult, and handles malformed input gracefully."
18+
)
19+
public class CompletionResultTest {
20+
21+
private final CompletionResponseParser parser = new CompletionResponseParser();
22+
23+
@Test
24+
public void parsesFullResponse() {
25+
String json = "{\"content\":\"hello world\",\"tokens_evaluated\":12,\"tokens_predicted\":5,"
26+
+ "\"stop\":true,\"stop_type\":\"eos\","
27+
+ "\"timings\":{\"prompt_n\":12,\"prompt_ms\":200.0,\"prompt_per_second\":60.0,"
28+
+ "\"predicted_n\":5,\"predicted_ms\":50.0,\"predicted_per_second\":100.0,"
29+
+ "\"cache_n\":3},"
30+
+ "\"completion_probabilities\":["
31+
+ "{\"token\":\"hello\",\"id\":15043,\"prob\":0.9,"
32+
+ "\"top_probs\":[{\"token\":\"hi\",\"id\":9932,\"prob\":0.05}]}]}";
33+
34+
CompletionResult r = parser.parseCompletionResult(json);
35+
assertEquals("hello world", r.getText());
36+
assertEquals(12L, r.getUsage().getPromptTokens());
37+
assertEquals(5L, r.getUsage().getCompletionTokens());
38+
assertEquals(17L, r.getUsage().getTotalTokens());
39+
assertEquals(12, r.getTimings().getPromptN());
40+
assertEquals(3, r.getTimings().getCacheN());
41+
assertEquals(100.0, r.getTimings().getPredictedPerSecond(), 1e-9);
42+
assertEquals(StopReason.EOS, r.getStopReason());
43+
44+
assertEquals(1, r.getLogprobs().size());
45+
TokenLogprob lp = r.getLogprobs().get(0);
46+
assertEquals("hello", lp.getToken());
47+
assertEquals(15043, lp.getTokenId());
48+
assertEquals(0.9f, lp.getLogprob(), 1e-4f);
49+
assertEquals(1, lp.getTopLogprobs().size());
50+
51+
assertNotNull(r.getRawJson());
52+
}
53+
54+
@Test
55+
public void missingFieldsDefaultToZero() {
56+
CompletionResult r = parser.parseCompletionResult("{\"content\":\"hi\",\"stop\":true,\"stop_type\":\"eos\"}");
57+
assertEquals("hi", r.getText());
58+
assertEquals(0L, r.getUsage().getTotalTokens());
59+
assertEquals(0, r.getTimings().getPromptN());
60+
assertTrue(r.getLogprobs().isEmpty());
61+
assertEquals(StopReason.EOS, r.getStopReason());
62+
}
63+
64+
@Test
65+
public void stopReasonLimit() {
66+
CompletionResult r = parser.parseCompletionResult(
67+
"{\"content\":\"\",\"stop\":true,\"stop_type\":\"limit\",\"truncated\":true,"
68+
+ "\"tokens_evaluated\":1,\"tokens_predicted\":10}");
69+
assertEquals(StopReason.MAX_TOKENS, r.getStopReason());
70+
assertEquals(10L, r.getUsage().getCompletionTokens());
71+
}
72+
73+
@Test
74+
public void stopReasonStopString() {
75+
CompletionResult r = parser.parseCompletionResult(
76+
"{\"content\":\"abc\",\"stop\":true,\"stop_type\":\"word\",\"stopping_word\":\"END\"}");
77+
assertEquals(StopReason.STOP_STRING, r.getStopReason());
78+
}
79+
80+
@Test
81+
public void malformedInputYieldsEmptyResult() {
82+
CompletionResult r = parser.parseCompletionResult("{not json");
83+
assertEquals("", r.getText());
84+
assertEquals(0L, r.getUsage().getTotalTokens());
85+
assertEquals(StopReason.NONE, r.getStopReason());
86+
assertTrue(r.getLogprobs().isEmpty());
87+
}
88+
}

0 commit comments

Comments
 (0)