Skip to content

Commit b9903dc

Browse files
Skobeltsynclaude
andcommitted
test(#1733): live chatStream fib(10) coverage across Ollama / OpenAI / Claude
Three @tag("live-llm") tests in FibStreamingLiveIntegrationTest exercise the v0.4.6 streaming foundation (#1722) against the three shipped providers. Each test instantiates the provider client directly, asks for fib(10) with a bare-integer system prompt, collects chatStream(...) into a list, and asserts: - non-empty, - terminal chunk is LlmChunk.End, - everything before End is one-or-more TextDelta (no tool calls), - concatenated TextDeltas contain "55". Today all three adapters inherit the default chat()-wrapping chatStream, so the shape today is exactly one TextDelta + End. The assertion accepts many TextDeltas too — that's what per-adapter native SSE overrides (Anthropic / OpenAI / Ollama stream:true) will produce, so this file becomes the regression bound for the per-adapter streaming follow-up work. Verified live: Claude 3.7s, OpenAI 8.4s, Ollama 4.2s (all pass). UUID: 81D1EF86-FAA5-4CB2-B499-8E36DB1BC838 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 748ccc6 commit b9903dc

1 file changed

Lines changed: 144 additions & 0 deletions

File tree

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
package agents_engine.model
2+
3+
import kotlinx.coroutines.flow.toList
4+
import kotlinx.coroutines.runBlocking
5+
import org.junit.jupiter.api.Assumptions.assumeTrue
6+
import org.junit.jupiter.api.Tag
7+
import java.net.URI
8+
import java.net.http.HttpClient
9+
import java.net.http.HttpRequest
10+
import java.net.http.HttpResponse
11+
import java.nio.file.Files
12+
import java.nio.file.Paths
13+
import java.time.Duration
14+
import kotlin.test.Test
15+
import kotlin.test.assertIs
16+
import kotlin.test.assertTrue
17+
18+
/**
19+
* Live `chatStream(...)` integration tests across the three shipped providers
20+
* (Ollama / Anthropic / OpenAI), driven through the v0.4.6 streaming
21+
* foundation (#1722).
22+
*
23+
* As of v0.4.6 no adapter overrides `chatStream` — they inherit the default
24+
* `chat()`-wrapping implementation on [ModelClient]. So this file pins the
25+
* contract that downstream native-streaming overrides must continue to
26+
* satisfy:
27+
*
28+
* - terminal chunk is [LlmChunk.End]
29+
* - everything before End is one or more [LlmChunk.TextDelta] for a plain
30+
* text answer (no tool calls)
31+
* - concatenating the TextDeltas reproduces the model's answer
32+
*
33+
* When per-adapter native streaming lands (Anthropic SSE / OpenAI SSE / Ollama
34+
* `stream: true`), the "exactly one TextDelta" behaviour relaxes — providers
35+
* will emit many partials — but the ordered-sequence + assembled-text +
36+
* terminal-End shape stays.
37+
*
38+
* Gating: tagged `live-llm` (excluded from default `./gradlew test`); each
39+
* test skips via JUnit `Assumptions` when its provider isn't reachable
40+
* (no key file / no env var / no local Ollama).
41+
*/
42+
class FibStreamingLiveIntegrationTest {
43+
44+
private val anthropicKey: String? = loadKey("anthropic-key", "ANTHROPIC_API_KEY")
45+
private val openaiKey: String? = loadKey("openai-key", "OPENAI_API_KEY")
46+
47+
private val claudeModel: String = System.getenv("CLAUDE_TEST_MODEL") ?: "claude-haiku-4-5-20251001"
48+
private val openaiModel: String = System.getenv("OPENAI_TEST_MODEL") ?: "gpt-4o-mini"
49+
private val ollamaModel: String = System.getenv("OLLAMA_TEST_MODEL") ?: "gpt-oss:20b-cloud"
50+
51+
// fib(10) = 55. A bare-integer system prompt keeps the assertion robust
52+
// across providers — the model has nowhere to hide a sentence around it.
53+
private val systemPrompt =
54+
"You are a number generator. Reply with ONLY the decimal integer answer — " +
55+
"no words, no punctuation, no explanation."
56+
private val userPrompt = "What is the 10th Fibonacci number? (fib(0)=0, fib(1)=1, ..., fib(10)=?)"
57+
private val expectedAnswer = "55"
58+
59+
@Tag("live-llm")
60+
@Test
61+
fun `Claude — chatStream emits ordered TextDelta(s) + End containing fib(10)=55`() = runBlocking {
62+
assumeTrue(anthropicKey != null, "skipping: no Anthropic key at .secrets/anthropic-key or ANTHROPIC_API_KEY")
63+
val client = ClaudeClient(
64+
apiKey = anthropicKey!!,
65+
model = claudeModel,
66+
temperature = 0.0,
67+
maxTokens = 32,
68+
)
69+
val chunks = client.chatStream(fibMessages()).toList()
70+
assertStreamingShapeAndAnswer("Claude", chunks)
71+
}
72+
73+
@Tag("live-llm")
74+
@Test
75+
fun `OpenAI — chatStream emits ordered TextDelta(s) + End containing fib(10)=55`() = runBlocking {
76+
assumeTrue(openaiKey != null, "skipping: no OpenAI key at .secrets/openai-key or OPENAI_API_KEY")
77+
val client = OpenAiClient(
78+
apiKey = openaiKey!!,
79+
model = openaiModel,
80+
temperature = 0.0,
81+
maxTokens = 32,
82+
)
83+
val chunks = client.chatStream(fibMessages()).toList()
84+
assertStreamingShapeAndAnswer("OpenAI", chunks)
85+
}
86+
87+
@Tag("live-llm")
88+
@Test
89+
fun `Ollama — chatStream emits ordered TextDelta(s) + End containing fib(10)=55`() = runBlocking {
90+
assumeTrue(isOllamaReachable(), "skipping: no Ollama at localhost:11434")
91+
val client = OllamaClient(
92+
host = "localhost",
93+
port = 11434,
94+
model = ollamaModel,
95+
temperature = 0.0,
96+
)
97+
val chunks = client.chatStream(fibMessages()).toList()
98+
assertStreamingShapeAndAnswer("Ollama", chunks)
99+
}
100+
101+
private fun fibMessages(): List<LlmMessage> = listOf(
102+
LlmMessage(role = "system", content = systemPrompt),
103+
LlmMessage(role = "user", content = userPrompt),
104+
)
105+
106+
private fun assertStreamingShapeAndAnswer(provider: String, chunks: List<LlmChunk>) {
107+
assertTrue(chunks.isNotEmpty(), "[$provider] no chunks received")
108+
val last = chunks.last()
109+
assertIs<LlmChunk.End>(last, "[$provider] terminal chunk must be End; got $last")
110+
val preTerminal = chunks.dropLast(1)
111+
assertTrue(
112+
preTerminal.isNotEmpty() && preTerminal.all { it is LlmChunk.TextDelta },
113+
"[$provider] expected one-or-more TextDeltas before End for a plain-text answer; got: $chunks",
114+
)
115+
val assembled = preTerminal
116+
.filterIsInstance<LlmChunk.TextDelta>()
117+
.joinToString("") { it.text }
118+
assertTrue(
119+
expectedAnswer in assembled,
120+
"[$provider] expected '$expectedAnswer' in assembled output, got: '$assembled'",
121+
)
122+
}
123+
124+
private fun loadKey(fileName: String, envVar: String): String? {
125+
val path = Paths.get(".secrets", fileName)
126+
if (Files.isReadable(path)) {
127+
val raw = Files.readString(path).trim()
128+
if (raw.isNotEmpty()) return raw
129+
}
130+
return System.getenv(envVar)?.takeIf { it.isNotBlank() }
131+
}
132+
133+
private fun isOllamaReachable(): Boolean = try {
134+
val client = HttpClient.newBuilder().connectTimeout(Duration.ofMillis(500)).build()
135+
val request = HttpRequest.newBuilder()
136+
.uri(URI.create("http://localhost:11434/api/tags"))
137+
.timeout(Duration.ofMillis(1500))
138+
.GET()
139+
.build()
140+
client.send(request, HttpResponse.BodyHandlers.discarding()).statusCode() in 200..299
141+
} catch (_: Throwable) {
142+
false
143+
}
144+
}

0 commit comments

Comments
 (0)