Skip to content

Commit af5a7a9

Browse files
committed
fix(slot-metrics): unwrap next_token array + guard Session slot id
Follow-up to PR #251 (session slot pinning + KV-cache observability). - SlotMetrics.getDecodedTokens()/getRemainingTokens() read next_token.n_decoded / n_remain directly, but llama.cpp's server_slot::to_json (b9739) serializes next_token as a JSON array containing a single object, so both accessors silently returned 0 against a live server. Unwrap next_token[0] (tolerating a bare object defensively). The ServerMetricsTest fixture used the wrong (object) shape and masked this; it now uses the real array shape. - Session rejected a negative slot id only indirectly: every send()/stream() pins the slot via InferenceParameters.withSlotId, which throws on a negative id, so a Session built with a negative slot failed deep inside the first call. Validate slotId >= 0 at construction instead (the slot also backs save/restore/close), and cover it with a model-free SessionTest. - Strengthen the typed-slot test to assert id/is_processing on the second (idle) slot so constant-return mutants can no longer survive; PIT mutation coverage for SlotMetrics is back to 100% (228/228). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_012HTtw6W4dEptrENvFihDHW
1 parent e2c548b commit af5a7a9

4 files changed

Lines changed: 71 additions & 4 deletions

File tree

src/main/java/net/ladenthin/llama/Session.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,15 +79,24 @@ public Session(LlamaModel model, int slotId, @Nullable String systemMessage) {
7979
* the transformed instance — it cannot mutate the input.
8080
*
8181
* @param model the underlying model
82-
* @param slotId the slot id
82+
* @param slotId the slot id; must be non-negative (a session is pinned to one concrete slot
83+
* for both inference and {@link #save(String)} / {@link #restore(String)} / {@link #close()})
8384
* @param systemMessage optional system prompt
8485
* @param paramsCustomizer applied to each request's parameters; may be {@code null}
86+
* @throws IllegalArgumentException if {@code slotId} is negative
8587
*/
8688
public Session(
8789
LlamaModel model,
8890
int slotId,
8991
@Nullable String systemMessage,
9092
@Nullable UnaryOperator<InferenceParameters> paramsCustomizer) {
93+
// Validate here, not per request: every send()/stream() pins this slot id (see
94+
// buildParams), and the slot also backs save()/restore()/close(). A negative id is
95+
// meaningless for those, so reject it up front with a clear message rather than letting
96+
// InferenceParameters.withSlotId throw on the first inference call.
97+
if (slotId < 0) {
98+
throw new IllegalArgumentException("slotId must be non-negative, was " + slotId);
99+
}
91100
this.model = model;
92101
this.slotId = slotId;
93102
this.state = new SessionState(slotId, systemMessage);

src/main/java/net/ladenthin/llama/value/SlotMetrics.java

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,15 +75,29 @@ public long getCachedPromptTokens() {
7575
* @return decoded-token count
7676
*/
7777
public long getDecodedTokens() {
78-
return node.path("next_token").path("n_decoded").asLong(0L);
78+
return nextToken().path("n_decoded").asLong(0L);
7979
}
8080

8181
/**
8282
* Returns tokens remaining under the current generation limit.
8383
* @return remaining-token count
8484
*/
8585
public long getRemainingTokens() {
86-
return node.path("next_token").path("n_remain").asLong(0L);
86+
return nextToken().path("n_remain").asLong(0L);
87+
}
88+
89+
/**
90+
* Resolves the {@code next_token} payload node. llama.cpp's {@code server_slot::to_json}
91+
* (b9739) serializes {@code next_token} as a JSON <em>array containing a single object</em>,
92+
* so the counters live at {@code next_token[0]}. This unwraps that array; if a bare object
93+
* is encountered instead it is used directly, and anything else yields a missing node whose
94+
* accessors fall back to their defaults.
95+
*
96+
* @return the object node carrying {@code n_decoded} / {@code n_remain}, or a missing node
97+
*/
98+
private JsonNode nextToken() {
99+
JsonNode next = node.path("next_token");
100+
return next.isArray() ? next.path(0) : next;
87101
}
88102

89103
/**
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
2+
//
3+
// SPDX-License-Identifier: MIT
4+
package net.ladenthin.llama;
5+
6+
import static org.hamcrest.MatcherAssert.assertThat;
7+
import static org.hamcrest.Matchers.containsString;
8+
import static org.junit.jupiter.api.Assertions.assertThrows;
9+
10+
import org.junit.jupiter.api.Test;
11+
12+
/**
13+
* Model-free unit tests for {@link Session} construction invariants. A session is pinned to one
14+
* concrete slot for both inference and {@link Session#save(String)} / {@link Session#restore(String)} /
15+
* {@link Session#close()}, so a negative slot id is rejected at construction rather than surfacing
16+
* deep inside the first {@code send()} call (where {@code InferenceParameters.withSlotId} would throw).
17+
*/
18+
public class SessionTest {
19+
20+
@Test
21+
public void negativeSlotIdRejectedAtConstruction() {
22+
// The slot-id guard runs before the model is dereferenced, so a null model still exercises it.
23+
IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> new Session(null, -1, null));
24+
assertThat(ex.getMessage(), containsString("slotId must be non-negative"));
25+
}
26+
27+
@Test
28+
public void negativeSlotIdRejectedOnCustomizerConstructor() {
29+
assertThrows(IllegalArgumentException.class, () -> new Session(null, -7, null, p -> p));
30+
}
31+
}

src/test/java/net/ladenthin/llama/value/ServerMetricsTest.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
package net.ladenthin.llama.value;
66

77
import static org.junit.jupiter.api.Assertions.assertEquals;
8+
import static org.junit.jupiter.api.Assertions.assertFalse;
89
import static org.junit.jupiter.api.Assertions.assertTrue;
910

1011
import com.fasterxml.jackson.databind.ObjectMapper;
@@ -28,9 +29,11 @@ private ServerMetrics parse(String json) throws Exception {
2829
+ "\"n_prompt_tokens_processed\":10,\"t_prompt_processing\":5,"
2930
+ "\"n_tokens_predicted\":20,\"t_tokens_generation\":8,"
3031
+ "\"n_decode_total\":300,\"n_busy_slots_total\":4,\"n_tokens_max\":4096,"
32+
// next_token is an ARRAY of one object — this mirrors llama.cpp's server_slot::to_json
33+
// at b9739, not a bare object; SlotMetrics must unwrap next_token[0].
3134
+ "\"slots\":[{\"id\":0,\"n_ctx\":4096,\"is_processing\":true,"
3235
+ "\"n_prompt_tokens\":100,\"n_prompt_tokens_processed\":20,"
33-
+ "\"n_prompt_tokens_cache\":80,\"next_token\":{\"n_decoded\":7,\"n_remain\":9}},"
36+
+ "\"n_prompt_tokens_cache\":80,\"next_token\":[{\"n_decoded\":7,\"n_remain\":9}]},"
3437
+ "{\"id\":1}]}";
3538

3639
@Test
@@ -110,6 +113,16 @@ public void typedSlotMetricsExposeCacheCounts() throws Exception {
110113
assertEquals(9L, slot.getRemainingTokens());
111114
assertEquals(0, slot.asJson().path("id").asInt());
112115
assertTrue(slot.toString().contains("n_prompt_tokens_cache"));
116+
117+
// Assert against the SECOND slot (id=1, no is_processing) so the id and is_processing
118+
// accessors are pinned to non-default values a constant-return mutant cannot satisfy:
119+
// slot 0's id==0 and is_processing==true coincide with the mutated constants.
120+
SlotMetrics idle = m.getSlotMetrics().get(1);
121+
assertEquals(1, idle.getId());
122+
assertFalse(idle.isProcessing());
123+
// next_token absent on the idle slot — accessors fall back to zero, not throw.
124+
assertEquals(0L, idle.getDecodedTokens());
125+
assertEquals(0L, idle.getRemainingTokens());
113126
}
114127

115128
@Test

0 commit comments

Comments
 (0)