forked from kherud/java-llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathServerMetricsTest.java
More file actions
158 lines (138 loc) · 6.7 KB
/
Copy pathServerMetricsTest.java
File metadata and controls
158 lines (138 loc) · 6.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
//
// SPDX-License-Identifier: MIT
package net.ladenthin.llama.value;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import com.fasterxml.jackson.databind.ObjectMapper;
import net.ladenthin.llama.ClaudeGenerated;
import org.junit.jupiter.api.Test;
@ClaudeGenerated(
purpose = "Verify ServerMetrics typed getters map all fields emitted by server_task_result_metrics::to_json, "
+ "including cumulative Usage and derived cumulative Timings.")
public class ServerMetricsTest {
private static final ObjectMapper MAPPER = new ObjectMapper();
private ServerMetrics parse(String json) throws Exception {
return new ServerMetrics(MAPPER.readTree(json));
}
private static final String SAMPLE = "{\"idle\":2,\"processing\":1,\"deferred\":3,\"t_start\":1234567890,"
+ "\"n_prompt_tokens_processed_total\":100,\"t_prompt_processing_total\":50,"
+ "\"n_tokens_predicted_total\":200,\"t_tokens_generation_total\":80,"
+ "\"n_prompt_tokens_processed\":10,\"t_prompt_processing\":5,"
+ "\"n_tokens_predicted\":20,\"t_tokens_generation\":8,"
+ "\"n_decode_total\":300,\"n_busy_slots_total\":4,\"n_tokens_max\":4096,"
// next_token is an ARRAY of one object — this mirrors llama.cpp's server_slot::to_json
// at b9739, not a bare object; SlotMetrics must unwrap next_token[0].
+ "\"slots\":[{\"id\":0,\"n_ctx\":4096,\"is_processing\":true,"
+ "\"n_prompt_tokens\":100,\"n_prompt_tokens_processed\":20,"
+ "\"n_prompt_tokens_cache\":80,\"next_token\":[{\"n_decoded\":7,\"n_remain\":9}]},"
+ "{\"id\":1}]}";
@Test
public void slotCountsAndTimestamp() throws Exception {
ServerMetrics m = parse(SAMPLE);
assertEquals(2, m.getIdleSlots());
assertEquals(1, m.getProcessingSlots());
assertEquals(3, m.getDeferredTasks());
assertEquals(1234567890L, m.getStartTimestamp());
}
@Test
public void totalsAndMaxTokens() throws Exception {
ServerMetrics m = parse(SAMPLE);
assertEquals(300L, m.getDecodeTotal());
assertEquals(4L, m.getBusySlotsTotal());
assertEquals(4096, m.getTokensMax());
}
@Test
public void cumulativeUsage() throws Exception {
ServerMetrics m = parse(SAMPLE);
Usage u = m.getCumulativeUsage();
assertEquals(100L, u.getPromptTokens());
assertEquals(200L, u.getCompletionTokens());
assertEquals(300L, u.getTotalTokens());
assertEquals(100L, m.getCumulativeProcessedPromptTokens());
assertEquals(200L, m.getCumulativeGeneratedTokens());
}
@Test
public void windowUsage() throws Exception {
ServerMetrics m = parse(SAMPLE);
Usage u = m.getWindowUsage();
assertEquals(10L, u.getPromptTokens());
assertEquals(20L, u.getCompletionTokens());
assertEquals(10L, m.getWindowProcessedPromptTokens());
}
@Test
public void cumulativeTimingsDerivesRates() throws Exception {
ServerMetrics m = parse(SAMPLE);
Timings t = m.getCumulativeTimings();
assertEquals(100, t.getPromptN());
assertEquals(50.0, t.getPromptMs(), 1e-9);
// 100 tokens / 50ms = 2000/s
assertEquals(2000.0, t.getPromptPerSecond(), 1e-9);
// 200 / 80ms = 2500/s
assertEquals(2500.0, t.getPredictedPerSecond(), 1e-9);
}
@Test
public void cumulativeTimingsZeroMsYieldsZeroRate() throws Exception {
ServerMetrics m = parse("{\"n_prompt_tokens_processed_total\":5,\"t_prompt_processing_total\":0}");
assertEquals(0.0, m.getCumulativeTimings().getPromptPerSecond(), 1e-9);
}
@Test
public void slotsArrayExposed() throws Exception {
ServerMetrics m = parse(SAMPLE);
assertTrue(m.getSlots().isArray());
assertEquals(2, m.getSlots().size());
}
@Test
public void typedSlotMetricsExposeCacheCounts() throws Exception {
ServerMetrics m = parse(SAMPLE);
assertEquals(2, m.getSlotMetrics().size());
SlotMetrics slot = m.getSlotMetrics().get(0);
assertEquals(0, slot.getId());
assertEquals(4096, slot.getContextSize());
assertTrue(slot.isProcessing());
assertEquals(100L, slot.getPromptTokens());
assertEquals(20L, slot.getProcessedPromptTokens());
assertEquals(80L, slot.getCachedPromptTokens());
assertEquals(7L, slot.getDecodedTokens());
assertEquals(9L, slot.getRemainingTokens());
assertEquals(0, slot.asJson().path("id").asInt());
assertTrue(slot.toString().contains("n_prompt_tokens_cache"));
// Assert against the SECOND slot (id=1, no is_processing) so the id and is_processing
// accessors are pinned to non-default values a constant-return mutant cannot satisfy:
// slot 0's id==0 and is_processing==true coincide with the mutated constants.
SlotMetrics idle = m.getSlotMetrics().get(1);
assertEquals(1, idle.getId());
assertFalse(idle.isProcessing());
// next_token absent on the idle slot — accessors fall back to zero, not throw.
assertEquals(0L, idle.getDecodedTokens());
assertEquals(0L, idle.getRemainingTokens());
}
@Test
public void missingFieldsDefaultToZero() throws Exception {
ServerMetrics m = parse("{}");
assertEquals(0, m.getIdleSlots());
assertEquals(0L, m.getDecodeTotal());
assertEquals(0, m.getTokensMax());
assertEquals(0L, m.getCumulativeUsage().getTotalTokens());
}
@Test
public void cumulativeTimingsZeroPredictedMsYieldsZeroRate() throws Exception {
// Pins the predictedMs > 0.0 boundary: with predictedN>0 but predictedMs=0 the rate must be 0.0
// (a >= boundary mutant would divide by zero and produce a non-zero / NaN rate).
ServerMetrics m = parse("{\"n_tokens_predicted_total\":5,\"t_tokens_generation_total\":0}");
assertEquals(0.0, m.getCumulativeTimings().getPredictedPerSecond(), 1e-9);
}
@Test
public void asJsonExposesBackingNode() throws Exception {
ServerMetrics m = parse(SAMPLE);
// Dereferencing the returned node kills the "return null" mutant on asJson().
assertEquals(2, m.asJson().get("idle").asInt());
}
@Test
public void toStringSerializesNode() throws Exception {
ServerMetrics m = parse(SAMPLE);
// Assert content (not just non-null) so the empty-string return mutant on toString is killed.
assertTrue(m.toString().contains("idle"));
}
}