java-llama.cpp/src/main/java/net/ladenthin/llama/value/SlotMetrics.java at af5a7a96fb3cf971dc3e1b0de0c3c7c5f36ce785 · bernardladenthin/java-llama.cpp · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
//
// SPDX-License-Identifier: MIT

package net.ladenthin.llama.value;

import com.fasterxml.jackson.databind.JsonNode;
import lombok.EqualsAndHashCode;

/** Typed view of one entry in llama.cpp's server-metrics {@code slots} array. */
@EqualsAndHashCode
public final class SlotMetrics {

    private final JsonNode node;

    /**
     * Wrap a raw slot metrics object.
     *
     * @param node slot JSON emitted by llama.cpp
     */
    public SlotMetrics(JsonNode node) {
        this.node = node;
    }

    /**
     * Returns the zero-based server slot identifier.
     * @return slot identifier
     */
    public int getId() {
        return node.path("id").asInt(-1);
    }

    /**
     * Returns the context capacity assigned to this slot.
     * @return context capacity
     */
    public int getContextSize() {
        return node.path("n_ctx").asInt(0);
    }

    /**
     * Reports whether this slot is currently processing a task.
     * @return {@code true} while processing
     */
    public boolean isProcessing() {
        return node.path("is_processing").asBoolean(false);
    }

    /**
     * Returns the logical prompt-token count for the current or most recent task.
     * @return logical prompt-token count
     */
    public long getPromptTokens() {
        return node.path("n_prompt_tokens").asLong(0L);
    }

    /**
     * Returns prompt tokens evaluated by the model for the current or most recent task.
     * @return evaluated prompt-token count
     */
    public long getProcessedPromptTokens() {
        return node.path("n_prompt_tokens_processed").asLong(0L);
    }

    /**
     * Returns prompt tokens reused from KV cache for the current or most recent task.
     * @return cached prompt-token count
     */
    public long getCachedPromptTokens() {
        return node.path("n_prompt_tokens_cache").asLong(0L);
    }

    /**
     * Returns tokens decoded for the current or most recent task.
     * @return decoded-token count
     */
    public long getDecodedTokens() {
        return nextToken().path("n_decoded").asLong(0L);
    }

    /**
     * Returns tokens remaining under the current generation limit.
     * @return remaining-token count
     */
    public long getRemainingTokens() {
        return nextToken().path("n_remain").asLong(0L);
    }

    /**
     * Resolves the {@code next_token} payload node. llama.cpp's {@code server_slot::to_json}
     * (b9739) serializes {@code next_token} as a JSON <em>array containing a single object</em>,
     * so the counters live at {@code next_token[0]}. This unwraps that array; if a bare object
     * is encountered instead it is used directly, and anything else yields a missing node whose
     * accessors fall back to their defaults.
     *
     * @return the object node carrying {@code n_decoded} / {@code n_remain}, or a missing node
     */
    private JsonNode nextToken() {
        JsonNode next = node.path("next_token");
        return next.isArray() ? next.path(0) : next;
    }

    /**
     * Returns raw slot JSON for fields not represented by typed accessors.
     * @return raw slot JSON
     */
    public JsonNode asJson() {
        return node;
    }

    @Override
    public String toString() {
        return node.toString();
    }
}