Skip to content

Commit 89d892e

Browse files
Skobeltsynclaude
andcommitted
test(#1737): integration coverage for agent.session — failure, concurrency, agentic-stub, live π
Four scenarios on top of #1736's basic-events test. AgentSessionIntegrationTest.kt (no live LLM): - Failure path — implementedBy throws, terminal event is Failed carrying the original exception (identity-equal), session.await() rethrows with the same type + message (Kotlin coroutines' CompletableDeferred copies the stack-trace-recovered cause, so identity equality only holds on AgentEvent.Failed.cause). - Concurrent sessions — two parallel session() calls on the same agent, both produce uncorrupted SkillStarted/Completed/Completed sequences with the right typed outputs. Pins the closure-captured skill-name holder design. - Agentic-stub bracketing — agent with stub ModelClient running one agentic turn. Asserts SkillStarted/SkillCompleted/Completed bracket the loop and that NO Token/ToolCall* events appear yet (step-2 intentional gap; step 3 will rewire and this assertion relaxes). AgentSessionLiveTest.kt (tagged live-llm): - π to 20 decimal places against Ollama (gpt-oss:120b-cloud default). Robust pass condition: output contains the first 15 decimal digits 3.14159265358979. Diagnostic stdout reports whether the full 20-digit canonical 3.14159265358979323846 landed (it does on gpt-oss:120b-cloud, full20=true). Event ordering asserted: SkillStarted first, SkillCompleted somewhere middle, Completed last — flexible to the step-3 rewire that'll add intermediate Token / ToolCall* events. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 8173f24 commit 89d892e

2 files changed

Lines changed: 239 additions & 0 deletions

File tree

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
package agents_engine.runtime.events
2+
3+
import agents_engine.core.agent
4+
import agents_engine.model.LlmResponse
5+
import agents_engine.model.ModelClient
6+
import agents_engine.model.TokenUsage
7+
import kotlinx.coroutines.async
8+
import kotlinx.coroutines.coroutineScope
9+
import kotlinx.coroutines.flow.toList
10+
import kotlinx.coroutines.test.runTest
11+
import kotlin.test.Test
12+
import kotlin.test.assertEquals
13+
import kotlin.test.assertFailsWith
14+
import kotlin.test.assertIs
15+
import kotlin.test.assertSame
16+
import kotlin.test.assertTrue
17+
18+
// #1737 — integration coverage for the v0.5.0 session surface beyond the
19+
// happy implementedBy path. These pin contracts that step 3 will need to
20+
// preserve when the agentic loop is rewired onto a FlowCollector.
21+
22+
class AgentSessionIntegrationTest {
23+
24+
@Test
25+
fun `failure path — Failed terminates events and the same exception rethrows from await`() = runTest {
26+
val boom = IllegalStateException("boom")
27+
val failingAgent = agent<String, String>("fails") {
28+
skills {
29+
skill<String, String>("explode", "Throws unconditionally") {
30+
implementedBy { throw boom }
31+
}
32+
}
33+
}
34+
35+
val session = failingAgent.session("anything")
36+
val events = session.events.toList()
37+
38+
// Terminal event must be Failed — carries the original exception, not a wrapped one.
39+
assertTrue(events.isNotEmpty(), "expected at least one event before terminal Failed")
40+
val terminal = events.last()
41+
assertIs<AgentEvent.Failed>(terminal, "last event must be Failed; got: $terminal")
42+
assertEquals("fails", terminal.agentId)
43+
assertSame(boom, terminal.cause, "Failed.cause must be the original exception, not a wrapper")
44+
45+
// No Completed event must appear — Failed and Completed are mutually exclusive per the premortem.
46+
assertTrue(events.none { it is AgentEvent.Completed<*> }, "Completed must NOT appear on the failure path")
47+
48+
// session.await() rethrows an IllegalStateException with the same message.
49+
// Kotlin coroutines' CompletableDeferred copies the cause with a recovered
50+
// stack trace before rethrowing, so identity equality doesn't hold here —
51+
// AgentEvent.Failed.cause carries the original instance (identity-checked
52+
// above), and await() preserves type + message.
53+
val thrown = assertFailsWith<IllegalStateException> { session.await() }
54+
assertEquals(boom.message, thrown.message, "await() must rethrow with the original message")
55+
}
56+
57+
@Test
58+
fun `concurrent sessions — two parallel invocations on the same agent don't share skill-name state`() = runTest {
59+
val echoAgent = agent<String, String>("echo") {
60+
skills {
61+
skill<String, String>("uppercase", "Uppercases the input") {
62+
implementedBy { it.uppercase() }
63+
}
64+
}
65+
}
66+
67+
// Launch two sessions in parallel. The closure-captured skill-name
68+
// holder is allocated per session.launch{}; if it were shared
69+
// (e.g., a global var), one session's events could carry the
70+
// other's skill name (still "uppercase" here — but the test would
71+
// catch any data-race-induced corruption like a null skill name).
72+
val (eventsA, outputA, eventsB, outputB) = coroutineScope {
73+
val sessionA = echoAgent.session("alpha")
74+
val sessionB = echoAgent.session("bravo")
75+
val a = async { sessionA.events.toList() }
76+
val b = async { sessionB.events.toList() }
77+
val outA = sessionA.await()
78+
val outB = sessionB.await()
79+
Quad(a.await(), outA, b.await(), outB)
80+
}
81+
82+
assertEquals("ALPHA", outputA)
83+
assertEquals("BRAVO", outputB)
84+
85+
for ((label, events) in listOf("A" to eventsA, "B" to eventsB)) {
86+
assertEquals(3, events.size, "session $label: expected 3 events; got: $events")
87+
val started = events[0]; assertIs<AgentEvent.SkillStarted>(started)
88+
assertEquals("uppercase", started.skillName, "session $label: skill name must not be corrupted by the other session")
89+
val completed = events[1]; assertIs<AgentEvent.SkillCompleted>(completed)
90+
assertEquals("uppercase", completed.skillName, "session $label: skill name on SkillCompleted")
91+
assertIs<AgentEvent.Completed<String>>(events[2])
92+
}
93+
}
94+
95+
@Test
96+
fun `agentic-stub bracketing — SkillStarted SkillCompleted Completed wrap the loop, no Token or ToolCall events yet`() = runTest {
97+
// Stub model: completes the agentic loop in one turn.
98+
val usage = TokenUsage(promptTokens = 7, completionTokens = 4)
99+
val stub = ModelClient { _ -> LlmResponse.Text("done", usage) }
100+
101+
val agenticAgent = agent<String, String>("agentic") {
102+
prompt("Test stub agent.")
103+
model { ollama("llama3"); client = stub }
104+
skills {
105+
skill<String, String>("respond", "Echoes back via the model") { tools() }
106+
}
107+
}
108+
109+
val session = agenticAgent.session("kick")
110+
val events = session.events.toList()
111+
val output = session.await()
112+
113+
assertEquals("done", output, "agentic skill output must equal the stub text")
114+
// Step 2 contract: only SkillStarted / SkillCompleted / Completed surface for agentic skills.
115+
// When step 3 rewires executeAgentic onto a FlowCollector, this assertion will need to
116+
// relax — at that point this test pins the new contract instead.
117+
assertTrue(
118+
events.none { it is AgentEvent.Token || it is AgentEvent.ToolCallStarted ||
119+
it is AgentEvent.ToolCallArgumentsDelta || it is AgentEvent.ToolCallFinished },
120+
"step 2 must not yet emit Token / ToolCall* events for agentic skills; got: $events",
121+
)
122+
assertEquals(3, events.size, "expected exactly [SkillStarted, SkillCompleted, Completed]; got: $events")
123+
val started = events[0]; assertIs<AgentEvent.SkillStarted>(started); assertEquals("respond", started.skillName)
124+
val completed = events[1]; assertIs<AgentEvent.SkillCompleted>(completed); assertEquals("respond", completed.skillName)
125+
val terminal = events[2]; assertIs<AgentEvent.Completed<String>>(terminal); assertEquals("done", terminal.output)
126+
}
127+
128+
// Tiny generic 4-tuple — assertable via destructuring in the concurrent test.
129+
private data class Quad<A, B, C, D>(val a: A, val b: B, val c: C, val d: D)
130+
}
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
package agents_engine.runtime.events
2+
3+
import agents_engine.core.agent
4+
import kotlinx.coroutines.flow.toList
5+
import kotlinx.coroutines.runBlocking
6+
import org.junit.jupiter.api.Assumptions.assumeTrue
7+
import org.junit.jupiter.api.Tag
8+
import org.junit.jupiter.api.Test
9+
import java.net.URI
10+
import java.net.http.HttpClient
11+
import java.net.http.HttpRequest
12+
import java.net.http.HttpResponse
13+
import java.time.Duration
14+
import kotlin.test.assertEquals
15+
import kotlin.test.assertIs
16+
import kotlin.test.assertTrue
17+
18+
/**
19+
* #1737 — live-LLM end-to-end exercise of `agent.session(input)` against a
20+
* real Ollama. Tagged `live-llm` so the default suite skips it; runs via
21+
* `./gradlew integrationTest`. Skips cleanly when Ollama is not reachable
22+
* at `localhost:11434`.
23+
*
24+
* Verifiable assertion target: π. The agent is asked to recite π to 20
25+
* decimal places — the canonical sequence is `3.14159265358979323846`. We
26+
* check the output contains the leading 15 decimal digits (`3.14159265358979`)
27+
* as a robust pass condition (every reasonable LLM hits 15; only very small
28+
* models miss it), and additionally log whether the full 20-digit sequence
29+
* landed for diagnostic purposes. This keeps the test stable across model
30+
* choices while still proving the streaming session round-tripped a useful
31+
* answer through the agentic loop.
32+
*/
33+
class AgentSessionLiveTest {
34+
35+
private val ollamaModel: String = System.getenv("AGENTSKT_TEST_OLLAMA_MODEL") ?: "gpt-oss:120b-cloud"
36+
37+
@Tag("live-llm")
38+
@Test
39+
fun `session against Ollama — π to 20 decimal places, events ordered, output contains canonical digits`() = runBlocking {
40+
assumeTrue(isOllamaReachable(), "skipping: no Ollama at localhost:11434")
41+
42+
val piAgent = agent<String, String>("pi-reciter") {
43+
prompt(
44+
"You are a numeric assistant. When the user asks for π (pi), respond with the value to " +
45+
"EXACTLY 20 decimal places. Output ONLY the number — no words, no equals sign, no units, " +
46+
"no commentary. Example format: 3.14159265358979323846"
47+
)
48+
model {
49+
ollama(ollamaModel)
50+
host = "localhost"
51+
port = 11434
52+
temperature = 0.0 // Determinism matters here.
53+
}
54+
skills {
55+
skill<String, String>("recite", "Returns π to the requested precision") { tools() }
56+
}
57+
}
58+
59+
val session = piAgent.session("Give me π to 20 decimal places.")
60+
val events = session.events.toList()
61+
val output = session.await()
62+
63+
// ── Event-flow shape (step 2 contract) ─────────────────────────────
64+
// SkillStarted at index 0, SkillCompleted somewhere before the terminal,
65+
// and Completed as the last event. We don't pin exact size because step 3
66+
// will add Token / ToolCall* events; this test should stay green through
67+
// that rewire.
68+
assertTrue(events.isNotEmpty(), "session must emit at least one event")
69+
val started = events.first()
70+
assertIs<AgentEvent.SkillStarted>(started, "first event must be SkillStarted; got: $started")
71+
assertEquals("pi-reciter", started.agentId)
72+
assertEquals("recite", started.skillName)
73+
74+
val terminal = events.last()
75+
assertIs<AgentEvent.Completed<String>>(terminal, "last event must be Completed<String>; got: $terminal")
76+
assertEquals("pi-reciter", terminal.agentId)
77+
assertEquals(output, terminal.output, "Completed.output must match session.await()")
78+
assertTrue(
79+
events.any { it is AgentEvent.SkillCompleted },
80+
"SkillCompleted must appear between SkillStarted and Completed; got: $events",
81+
)
82+
83+
// ── Output content ─────────────────────────────────────────────────
84+
// Robust pass: 15 decimal digits. The full 20-digit sequence is the
85+
// ambitious target; we report on it but don't fail when a model is a
86+
// touch loose on the tail.
87+
val canonical20 = "3.14159265358979323846"
88+
val robust15 = "3.14159265358979"
89+
assertTrue(
90+
output.contains(robust15),
91+
"expected output to contain π's first 15 decimal digits ($robust15); got: \"$output\"",
92+
)
93+
val hitFull20 = output.contains(canonical20)
94+
println("AgentSessionLiveTest: π model=$ollamaModel; full20=$hitFull20; output=\"$output\"")
95+
}
96+
97+
private fun isOllamaReachable(): Boolean = try {
98+
val client = HttpClient.newBuilder().connectTimeout(Duration.ofMillis(500)).build()
99+
val request = HttpRequest.newBuilder()
100+
.uri(URI.create("http://localhost:11434/api/tags"))
101+
.timeout(Duration.ofMillis(1500))
102+
.GET()
103+
.build()
104+
val response = client.send(request, HttpResponse.BodyHandlers.discarding())
105+
response.statusCode() in 200..299
106+
} catch (_: Throwable) {
107+
false
108+
}
109+
}

0 commit comments

Comments
 (0)