|
| 1 | +package agents_engine.model |
| 2 | + |
| 3 | +import kotlinx.coroutines.flow.toList |
| 4 | +import kotlinx.coroutines.runBlocking |
| 5 | +import org.junit.jupiter.api.Assumptions.assumeTrue |
| 6 | +import org.junit.jupiter.api.Tag |
| 7 | +import java.net.URI |
| 8 | +import java.net.http.HttpClient |
| 9 | +import java.net.http.HttpRequest |
| 10 | +import java.net.http.HttpResponse |
| 11 | +import java.nio.file.Files |
| 12 | +import java.nio.file.Paths |
| 13 | +import java.time.Duration |
| 14 | +import kotlin.test.Test |
| 15 | +import kotlin.test.assertIs |
| 16 | +import kotlin.test.assertTrue |
| 17 | + |
| 18 | +/** |
| 19 | + * Live `chatStream(...)` integration tests across the three shipped providers |
| 20 | + * (Ollama / Anthropic / OpenAI), driven through the v0.4.6 streaming |
| 21 | + * foundation (#1722). |
| 22 | + * |
| 23 | + * As of v0.4.6 no adapter overrides `chatStream` — they inherit the default |
| 24 | + * `chat()`-wrapping implementation on [ModelClient]. So this file pins the |
| 25 | + * contract that downstream native-streaming overrides must continue to |
| 26 | + * satisfy: |
| 27 | + * |
| 28 | + * - terminal chunk is [LlmChunk.End] |
| 29 | + * - everything before End is one or more [LlmChunk.TextDelta] for a plain |
| 30 | + * text answer (no tool calls) |
| 31 | + * - concatenating the TextDeltas reproduces the model's answer |
| 32 | + * |
| 33 | + * When per-adapter native streaming lands (Anthropic SSE / OpenAI SSE / Ollama |
| 34 | + * `stream: true`), the "exactly one TextDelta" behaviour relaxes — providers |
| 35 | + * will emit many partials — but the ordered-sequence + assembled-text + |
| 36 | + * terminal-End shape stays. |
| 37 | + * |
| 38 | + * Gating: tagged `live-llm` (excluded from default `./gradlew test`); each |
| 39 | + * test skips via JUnit `Assumptions` when its provider isn't reachable |
| 40 | + * (no key file / no env var / no local Ollama). |
| 41 | + */ |
| 42 | +class FibStreamingLiveIntegrationTest { |
| 43 | + |
| 44 | + private val anthropicKey: String? = loadKey("anthropic-key", "ANTHROPIC_API_KEY") |
| 45 | + private val openaiKey: String? = loadKey("openai-key", "OPENAI_API_KEY") |
| 46 | + |
| 47 | + private val claudeModel: String = System.getenv("CLAUDE_TEST_MODEL") ?: "claude-haiku-4-5-20251001" |
| 48 | + private val openaiModel: String = System.getenv("OPENAI_TEST_MODEL") ?: "gpt-4o-mini" |
| 49 | + private val ollamaModel: String = System.getenv("OLLAMA_TEST_MODEL") ?: "gpt-oss:20b-cloud" |
| 50 | + |
| 51 | + // fib(10) = 55. A bare-integer system prompt keeps the assertion robust |
| 52 | + // across providers — the model has nowhere to hide a sentence around it. |
| 53 | + private val systemPrompt = |
| 54 | + "You are a number generator. Reply with ONLY the decimal integer answer — " + |
| 55 | + "no words, no punctuation, no explanation." |
| 56 | + private val userPrompt = "What is the 10th Fibonacci number? (fib(0)=0, fib(1)=1, ..., fib(10)=?)" |
| 57 | + private val expectedAnswer = "55" |
| 58 | + |
| 59 | + @Tag("live-llm") |
| 60 | + @Test |
| 61 | + fun `Claude — chatStream emits ordered TextDelta(s) + End containing fib(10)=55`() = runBlocking { |
| 62 | + assumeTrue(anthropicKey != null, "skipping: no Anthropic key at .secrets/anthropic-key or ANTHROPIC_API_KEY") |
| 63 | + val client = ClaudeClient( |
| 64 | + apiKey = anthropicKey!!, |
| 65 | + model = claudeModel, |
| 66 | + temperature = 0.0, |
| 67 | + maxTokens = 32, |
| 68 | + ) |
| 69 | + val chunks = client.chatStream(fibMessages()).toList() |
| 70 | + assertStreamingShapeAndAnswer("Claude", chunks) |
| 71 | + } |
| 72 | + |
| 73 | + @Tag("live-llm") |
| 74 | + @Test |
| 75 | + fun `OpenAI — chatStream emits ordered TextDelta(s) + End containing fib(10)=55`() = runBlocking { |
| 76 | + assumeTrue(openaiKey != null, "skipping: no OpenAI key at .secrets/openai-key or OPENAI_API_KEY") |
| 77 | + val client = OpenAiClient( |
| 78 | + apiKey = openaiKey!!, |
| 79 | + model = openaiModel, |
| 80 | + temperature = 0.0, |
| 81 | + maxTokens = 32, |
| 82 | + ) |
| 83 | + val chunks = client.chatStream(fibMessages()).toList() |
| 84 | + assertStreamingShapeAndAnswer("OpenAI", chunks) |
| 85 | + } |
| 86 | + |
| 87 | + @Tag("live-llm") |
| 88 | + @Test |
| 89 | + fun `Ollama — chatStream emits ordered TextDelta(s) + End containing fib(10)=55`() = runBlocking { |
| 90 | + assumeTrue(isOllamaReachable(), "skipping: no Ollama at localhost:11434") |
| 91 | + val client = OllamaClient( |
| 92 | + host = "localhost", |
| 93 | + port = 11434, |
| 94 | + model = ollamaModel, |
| 95 | + temperature = 0.0, |
| 96 | + ) |
| 97 | + val chunks = client.chatStream(fibMessages()).toList() |
| 98 | + assertStreamingShapeAndAnswer("Ollama", chunks) |
| 99 | + } |
| 100 | + |
| 101 | + private fun fibMessages(): List<LlmMessage> = listOf( |
| 102 | + LlmMessage(role = "system", content = systemPrompt), |
| 103 | + LlmMessage(role = "user", content = userPrompt), |
| 104 | + ) |
| 105 | + |
| 106 | + private fun assertStreamingShapeAndAnswer(provider: String, chunks: List<LlmChunk>) { |
| 107 | + assertTrue(chunks.isNotEmpty(), "[$provider] no chunks received") |
| 108 | + val last = chunks.last() |
| 109 | + assertIs<LlmChunk.End>(last, "[$provider] terminal chunk must be End; got $last") |
| 110 | + val preTerminal = chunks.dropLast(1) |
| 111 | + assertTrue( |
| 112 | + preTerminal.isNotEmpty() && preTerminal.all { it is LlmChunk.TextDelta }, |
| 113 | + "[$provider] expected one-or-more TextDeltas before End for a plain-text answer; got: $chunks", |
| 114 | + ) |
| 115 | + val assembled = preTerminal |
| 116 | + .filterIsInstance<LlmChunk.TextDelta>() |
| 117 | + .joinToString("") { it.text } |
| 118 | + assertTrue( |
| 119 | + expectedAnswer in assembled, |
| 120 | + "[$provider] expected '$expectedAnswer' in assembled output, got: '$assembled'", |
| 121 | + ) |
| 122 | + } |
| 123 | + |
| 124 | + private fun loadKey(fileName: String, envVar: String): String? { |
| 125 | + val path = Paths.get(".secrets", fileName) |
| 126 | + if (Files.isReadable(path)) { |
| 127 | + val raw = Files.readString(path).trim() |
| 128 | + if (raw.isNotEmpty()) return raw |
| 129 | + } |
| 130 | + return System.getenv(envVar)?.takeIf { it.isNotBlank() } |
| 131 | + } |
| 132 | + |
| 133 | + private fun isOllamaReachable(): Boolean = try { |
| 134 | + val client = HttpClient.newBuilder().connectTimeout(Duration.ofMillis(500)).build() |
| 135 | + val request = HttpRequest.newBuilder() |
| 136 | + .uri(URI.create("http://localhost:11434/api/tags")) |
| 137 | + .timeout(Duration.ofMillis(1500)) |
| 138 | + .GET() |
| 139 | + .build() |
| 140 | + client.send(request, HttpResponse.BodyHandlers.discarding()).statusCode() in 200..299 |
| 141 | + } catch (_: Throwable) { |
| 142 | + false |
| 143 | + } |
| 144 | +} |
0 commit comments