|
| 1 | +package agents_engine.model |
| 2 | + |
| 3 | +import agents_engine.core.agent |
| 4 | +import org.junit.jupiter.api.assertThrows |
| 5 | +import kotlin.test.Test |
| 6 | +import kotlin.test.assertEquals |
| 7 | +import kotlin.test.assertNotNull |
| 8 | +import kotlin.test.assertNull |
| 9 | + |
| 10 | +// Tests for #963 — token-based budget control. |
| 11 | +// Plumbing: Ollama reports prompt_eval_count + eval_count → ModelClient |
| 12 | +// surfaces TokenUsage on LlmResponse → AgenticLoop accumulates → throws |
| 13 | +// BudgetExceededException(TOKENS) when over cap. |
| 14 | +class MaxTokensBudgetTest { |
| 15 | + |
| 16 | + @Test |
| 17 | + fun `TokenUsage total is the sum of prompt and completion`() { |
| 18 | + val u = TokenUsage(promptTokens = 30, completionTokens = 12) |
| 19 | + assertEquals(42, u.total) |
| 20 | + } |
| 21 | + |
| 22 | + @Test |
| 23 | + fun `LlmResponse Text exposes tokenUsage when constructed with one`() { |
| 24 | + val r = LlmResponse.Text("hello", TokenUsage(10, 5)) |
| 25 | + val usage = r.tokenUsage |
| 26 | + assertNotNull(usage) |
| 27 | + assertEquals(15, usage.total) |
| 28 | + } |
| 29 | + |
| 30 | + @Test |
| 31 | + fun `LlmResponse ToolCalls exposes tokenUsage when constructed with one`() { |
| 32 | + val r = LlmResponse.ToolCalls(emptyList(), TokenUsage(20, 7)) |
| 33 | + val usage = r.tokenUsage |
| 34 | + assertNotNull(usage) |
| 35 | + assertEquals(27, usage.total) |
| 36 | + } |
| 37 | + |
| 38 | + @Test |
| 39 | + fun `LlmResponse default tokenUsage is null (back-compat)`() { |
| 40 | + // Existing call sites (FakeModelClient { LlmResponse.Text("x") }) |
| 41 | + // must continue to work without specifying token usage. |
| 42 | + assertNull(LlmResponse.Text("hi").tokenUsage) |
| 43 | + assertNull(LlmResponse.ToolCalls(emptyList()).tokenUsage) |
| 44 | + } |
| 45 | + |
| 46 | + @Test |
| 47 | + fun `BudgetConfig maxTokens default is null (no cap)`() { |
| 48 | + assertNull(BudgetConfig().maxTokens) |
| 49 | + } |
| 50 | + |
| 51 | + @Test |
| 52 | + fun `BudgetBuilder exposes maxTokens via DSL`() { |
| 53 | + val b = BudgetBuilder() |
| 54 | + b.maxTokens = 1000 |
| 55 | + assertEquals(1000, b.build().maxTokens) |
| 56 | + } |
| 57 | + |
| 58 | + @Test |
| 59 | + fun `OllamaClient parseResponse extracts both prompt and completion counts`() { |
| 60 | + // Realistic Ollama response shape — token counts at the root, not on `message`. |
| 61 | + val body = """ |
| 62 | + { |
| 63 | + "model": "llama3", |
| 64 | + "message": {"role": "assistant", "content": "hello"}, |
| 65 | + "done": true, |
| 66 | + "prompt_eval_count": 25, |
| 67 | + "eval_count": 8 |
| 68 | + } |
| 69 | + """.trimIndent() |
| 70 | + val client = OllamaClient(model = "llama3") |
| 71 | + val resp = client.parseResponse(body) |
| 72 | + val usage = resp.tokenUsage |
| 73 | + assertNotNull(usage) |
| 74 | + assertEquals(25, usage.promptTokens) |
| 75 | + assertEquals(8, usage.completionTokens) |
| 76 | + assertEquals(33, usage.total) |
| 77 | + } |
| 78 | + |
| 79 | + @Test |
| 80 | + fun `OllamaClient parseResponse drops partial token reports`() { |
| 81 | + // If only one of prompt_eval_count / eval_count is present, the count |
| 82 | + // is untrustworthy — surface it as null rather than half-attributing. |
| 83 | + val body = """ |
| 84 | + { |
| 85 | + "model": "llama3", |
| 86 | + "message": {"role": "assistant", "content": "hi"}, |
| 87 | + "done": true, |
| 88 | + "prompt_eval_count": 10 |
| 89 | + } |
| 90 | + """.trimIndent() |
| 91 | + val resp = OllamaClient(model = "llama3").parseResponse(body) |
| 92 | + assertNull(resp.tokenUsage) |
| 93 | + } |
| 94 | + |
| 95 | + @Test |
| 96 | + fun `OllamaClient parseResponse handles missing token counts`() { |
| 97 | + // Provider didn't report anything — null, not zero. |
| 98 | + val body = """ |
| 99 | + { |
| 100 | + "model": "llama3", |
| 101 | + "message": {"role": "assistant", "content": "hi"}, |
| 102 | + "done": true |
| 103 | + } |
| 104 | + """.trimIndent() |
| 105 | + val resp = OllamaClient(model = "llama3").parseResponse(body) |
| 106 | + assertNull(resp.tokenUsage) |
| 107 | + } |
| 108 | + |
| 109 | + @Test |
| 110 | + fun `agentic loop accumulates tokens across turns`() { |
| 111 | + // Two turns: a tool call followed by a final text. Cap is generous |
| 112 | + // so the loop succeeds; we then verify the cumulative count by |
| 113 | + // observing that a tighter cap would have tripped (separate test). |
| 114 | + val responses = ArrayDeque<LlmResponse>() |
| 115 | + responses.add(LlmResponse.ToolCalls( |
| 116 | + listOf(ToolCall(name = "noop", arguments = emptyMap())), |
| 117 | + TokenUsage(promptTokens = 10, completionTokens = 5), |
| 118 | + )) |
| 119 | + responses.add(LlmResponse.Text( |
| 120 | + "done", |
| 121 | + TokenUsage(promptTokens = 15, completionTokens = 7), |
| 122 | + )) |
| 123 | + val mock = ModelClient { _ -> responses.removeFirst() } |
| 124 | + |
| 125 | + val a = agent<String, String>("a") { |
| 126 | + model { ollama("llama3"); client = mock } |
| 127 | + budget { maxTokens = 100 } |
| 128 | + tools { tool("noop", "") { _ -> "ok" } } |
| 129 | + skills { skill<String, String>("s", "s") { tools("noop") } } |
| 130 | + } |
| 131 | + |
| 132 | + val out = a("input") |
| 133 | + assertEquals("done", out) |
| 134 | + } |
| 135 | + |
| 136 | + @Test |
| 137 | + fun `agentic loop throws BudgetExceededException(TOKENS) when sum exceeds maxTokens`() { |
| 138 | + // First turn alone (10 + 5 = 15) is over the cap of 10. |
| 139 | + val responses = ArrayDeque<LlmResponse>() |
| 140 | + responses.add(LlmResponse.Text( |
| 141 | + "done", |
| 142 | + TokenUsage(promptTokens = 10, completionTokens = 5), |
| 143 | + )) |
| 144 | + val mock = ModelClient { _ -> responses.removeFirst() } |
| 145 | + |
| 146 | + val a = agent<String, String>("a") { |
| 147 | + model { ollama("llama3"); client = mock } |
| 148 | + budget { maxTokens = 10 } |
| 149 | + skills { skill<String, String>("s", "s") { tools() } } |
| 150 | + } |
| 151 | + |
| 152 | + val ex = assertThrows<BudgetExceededException> { a("input") } |
| 153 | + assertEquals(BudgetReason.TOKENS, ex.reason) |
| 154 | + // Message should mention both the cap and the actual usage so users |
| 155 | + // can see how badly they overshot. |
| 156 | + val msg = ex.message.orEmpty() |
| 157 | + assertEquals(true, msg.contains("10"), "message should mention cap: $msg") |
| 158 | + assertEquals(true, msg.contains("15"), "message should mention used: $msg") |
| 159 | + } |
| 160 | + |
| 161 | + @Test |
| 162 | + fun `agentic loop overrun triggers across cumulative turns, not per-turn`() { |
| 163 | + // Each turn is 5 + 5 = 10 tokens. Cap is 15. Turn 1 lands at 10 |
| 164 | + // (under cap). Turn 2 brings cumulative to 20 (over) — that's where |
| 165 | + // the throw must happen. |
| 166 | + val responses = ArrayDeque<LlmResponse>() |
| 167 | + responses.add(LlmResponse.ToolCalls( |
| 168 | + listOf(ToolCall(name = "noop", arguments = emptyMap())), |
| 169 | + TokenUsage(5, 5), |
| 170 | + )) |
| 171 | + responses.add(LlmResponse.Text("late", TokenUsage(5, 5))) |
| 172 | + val mock = ModelClient { _ -> responses.removeFirst() } |
| 173 | + |
| 174 | + val a = agent<String, String>("a") { |
| 175 | + model { ollama("llama3"); client = mock } |
| 176 | + budget { maxTokens = 15 } |
| 177 | + tools { tool("noop", "") { _ -> "ok" } } |
| 178 | + skills { skill<String, String>("s", "s") { tools("noop") } } |
| 179 | + } |
| 180 | + |
| 181 | + val ex = assertThrows<BudgetExceededException> { a("input") } |
| 182 | + assertEquals(BudgetReason.TOKENS, ex.reason) |
| 183 | + } |
| 184 | + |
| 185 | + @Test |
| 186 | + fun `loop with null tokenUsage on responses ignores the token cap entirely`() { |
| 187 | + // Provider doesn't report token usage. The loop must not accumulate |
| 188 | + // anything (a null is not zero) and the cap effectively does nothing — |
| 189 | + // matching the "best-effort" contract documented on BudgetConfig. |
| 190 | + // If the implementation accidentally treated null as zero, no cap |
| 191 | + // would fire either; the key assertion is that the loop completes |
| 192 | + // normally rather than tripping a phantom budget. |
| 193 | + val mock = ModelClient { _ -> LlmResponse.Text("done") } // no usage |
| 194 | + |
| 195 | + val a = agent<String, String>("a") { |
| 196 | + model { ollama("llama3"); client = mock } |
| 197 | + budget { maxTokens = 1 } // hyper-tight cap; null usage means it must not fire |
| 198 | + skills { skill<String, String>("s", "s") { tools() } } |
| 199 | + } |
| 200 | + |
| 201 | + assertEquals("done", a("input")) |
| 202 | + } |
| 203 | +} |
0 commit comments