test(#1737): integration coverage for agent.session — failure, concurrency, agentic-stub, live π

Skobeltsyn · claude · Skobeltsyn · commit 89d892e8f3fc · 2026-05-15T23:54:20.000+03:00
Four scenarios on top of #1736's basic-events test.

AgentSessionIntegrationTest.kt (no live LLM):
- Failure path — implementedBy throws, terminal event is Failed
  carrying the original exception (identity-equal), session.await()
  rethrows with the same type + message (Kotlin coroutines'
  CompletableDeferred copies the stack-trace-recovered cause, so
  identity equality only holds on AgentEvent.Failed.cause).
- Concurrent sessions — two parallel session() calls on the same
  agent, both produce uncorrupted SkillStarted/Completed/Completed
  sequences with the right typed outputs. Pins the closure-captured
  skill-name holder design.
- Agentic-stub bracketing — agent with stub ModelClient running one
  agentic turn. Asserts SkillStarted/SkillCompleted/Completed bracket
  the loop and that NO Token/ToolCall* events appear yet (step-2
  intentional gap; step 3 will rewire and this assertion relaxes).

AgentSessionLiveTest.kt (tagged live-llm):
- π to 20 decimal places against Ollama (gpt-oss:120b-cloud default).
  Robust pass condition: output contains the first 15 decimal digits
  3.14159265358979. Diagnostic stdout reports whether the full
  20-digit canonical 3.14159265358979323846 landed (it does on
  gpt-oss:120b-cloud, full20=true). Event ordering asserted:
  SkillStarted first, SkillCompleted somewhere middle, Completed
  last — flexible to the step-3 rewire that'll add intermediate
  Token / ToolCall* events.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/src/test/kotlin/agents_engine/runtime/events/AgentSessionIntegrationTest.kt b/src/test/kotlin/agents_engine/runtime/events/AgentSessionIntegrationTest.kt
@@ -0,0 +1,130 @@
+package agents_engine.runtime.events
+
+import agents_engine.core.agent
+import agents_engine.model.LlmResponse
+import agents_engine.model.ModelClient
+import agents_engine.model.TokenUsage
+import kotlinx.coroutines.async
+import kotlinx.coroutines.coroutineScope
+import kotlinx.coroutines.flow.toList
+import kotlinx.coroutines.test.runTest
+import kotlin.test.Test
+import kotlin.test.assertEquals
+import kotlin.test.assertFailsWith
+import kotlin.test.assertIs
+import kotlin.test.assertSame
+import kotlin.test.assertTrue
+
+// #1737 — integration coverage for the v0.5.0 session surface beyond the
+// happy implementedBy path. These pin contracts that step 3 will need to
+// preserve when the agentic loop is rewired onto a FlowCollector.
+
+class AgentSessionIntegrationTest {
+
+    @Test
+    fun `failure path — Failed terminates events and the same exception rethrows from await`() = runTest {
+        val boom = IllegalStateException("boom")
+        val failingAgent = agent<String, String>("fails") {
+            skills {
+                skill<String, String>("explode", "Throws unconditionally") {
+                    implementedBy { throw boom }
+                }
+            }
+        }
+
+        val session = failingAgent.session("anything")
+        val events = session.events.toList()
+
+        // Terminal event must be Failed — carries the original exception, not a wrapped one.
+        assertTrue(events.isNotEmpty(), "expected at least one event before terminal Failed")
+        val terminal = events.last()
+        assertIs<AgentEvent.Failed>(terminal, "last event must be Failed; got: $terminal")
+        assertEquals("fails", terminal.agentId)
+        assertSame(boom, terminal.cause, "Failed.cause must be the original exception, not a wrapper")
+
+        // No Completed event must appear — Failed and Completed are mutually exclusive per the premortem.
+        assertTrue(events.none { it is AgentEvent.Completed<*> }, "Completed must NOT appear on the failure path")
+
+        // session.await() rethrows an IllegalStateException with the same message.
+        // Kotlin coroutines' CompletableDeferred copies the cause with a recovered
+        // stack trace before rethrowing, so identity equality doesn't hold here —
+        // AgentEvent.Failed.cause carries the original instance (identity-checked
+        // above), and await() preserves type + message.
+        val thrown = assertFailsWith<IllegalStateException> { session.await() }
+        assertEquals(boom.message, thrown.message, "await() must rethrow with the original message")
+    }
+
+    @Test
+    fun `concurrent sessions — two parallel invocations on the same agent don't share skill-name state`() = runTest {
+        val echoAgent = agent<String, String>("echo") {
+            skills {
+                skill<String, String>("uppercase", "Uppercases the input") {
+                    implementedBy { it.uppercase() }
+                }
+            }
+        }
+
+        // Launch two sessions in parallel. The closure-captured skill-name
+        // holder is allocated per session.launch{}; if it were shared
+        // (e.g., a global var), one session's events could carry the
+        // other's skill name (still "uppercase" here — but the test would
+        // catch any data-race-induced corruption like a null skill name).
+        val (eventsA, outputA, eventsB, outputB) = coroutineScope {
+            val sessionA = echoAgent.session("alpha")
+            val sessionB = echoAgent.session("bravo")
+            val a = async { sessionA.events.toList() }
+            val b = async { sessionB.events.toList() }
+            val outA = sessionA.await()
+            val outB = sessionB.await()
+            Quad(a.await(), outA, b.await(), outB)
+        }
+
+        assertEquals("ALPHA", outputA)
+        assertEquals("BRAVO", outputB)
+
+        for ((label, events) in listOf("A" to eventsA, "B" to eventsB)) {
+            assertEquals(3, events.size, "session $label: expected 3 events; got: $events")
+            val started = events[0]; assertIs<AgentEvent.SkillStarted>(started)
+            assertEquals("uppercase", started.skillName, "session $label: skill name must not be corrupted by the other session")
+            val completed = events[1]; assertIs<AgentEvent.SkillCompleted>(completed)
+            assertEquals("uppercase", completed.skillName, "session $label: skill name on SkillCompleted")
+            assertIs<AgentEvent.Completed<String>>(events[2])
+        }
+    }
+
+    @Test
+    fun `agentic-stub bracketing — SkillStarted SkillCompleted Completed wrap the loop, no Token or ToolCall events yet`() = runTest {
+        // Stub model: completes the agentic loop in one turn.
+        val usage = TokenUsage(promptTokens = 7, completionTokens = 4)
+        val stub = ModelClient { _ -> LlmResponse.Text("done", usage) }
+
+        val agenticAgent = agent<String, String>("agentic") {
+            prompt("Test stub agent.")
+            model { ollama("llama3"); client = stub }
+            skills {
+                skill<String, String>("respond", "Echoes back via the model") { tools() }
+            }
+        }
+
+        val session = agenticAgent.session("kick")
+        val events = session.events.toList()
+        val output = session.await()
+
+        assertEquals("done", output, "agentic skill output must equal the stub text")
+        // Step 2 contract: only SkillStarted / SkillCompleted / Completed surface for agentic skills.
+        // When step 3 rewires executeAgentic onto a FlowCollector, this assertion will need to
+        // relax — at that point this test pins the new contract instead.
+        assertTrue(
+            events.none { it is AgentEvent.Token || it is AgentEvent.ToolCallStarted ||
+                it is AgentEvent.ToolCallArgumentsDelta || it is AgentEvent.ToolCallFinished },
+            "step 2 must not yet emit Token / ToolCall* events for agentic skills; got: $events",
+        )
+        assertEquals(3, events.size, "expected exactly [SkillStarted, SkillCompleted, Completed]; got: $events")
+        val started = events[0]; assertIs<AgentEvent.SkillStarted>(started); assertEquals("respond", started.skillName)
+        val completed = events[1]; assertIs<AgentEvent.SkillCompleted>(completed); assertEquals("respond", completed.skillName)
+        val terminal = events[2]; assertIs<AgentEvent.Completed<String>>(terminal); assertEquals("done", terminal.output)
+    }
+
+    // Tiny generic 4-tuple — assertable via destructuring in the concurrent test.
+    private data class Quad<A, B, C, D>(val a: A, val b: B, val c: C, val d: D)
+}
diff --git a/src/test/kotlin/agents_engine/runtime/events/AgentSessionLiveTest.kt b/src/test/kotlin/agents_engine/runtime/events/AgentSessionLiveTest.kt
@@ -0,0 +1,109 @@
+package agents_engine.runtime.events
+
+import agents_engine.core.agent
+import kotlinx.coroutines.flow.toList
+import kotlinx.coroutines.runBlocking
+import org.junit.jupiter.api.Assumptions.assumeTrue
+import org.junit.jupiter.api.Tag
+import org.junit.jupiter.api.Test
+import java.net.URI
+import java.net.http.HttpClient
+import java.net.http.HttpRequest
+import java.net.http.HttpResponse
+import java.time.Duration
+import kotlin.test.assertEquals
+import kotlin.test.assertIs
+import kotlin.test.assertTrue
+
+/**
+ * #1737 — live-LLM end-to-end exercise of `agent.session(input)` against a
+ * real Ollama. Tagged `live-llm` so the default suite skips it; runs via
+ * `./gradlew integrationTest`. Skips cleanly when Ollama is not reachable
+ * at `localhost:11434`.
+ *
+ * Verifiable assertion target: π. The agent is asked to recite π to 20
+ * decimal places — the canonical sequence is `3.14159265358979323846`. We
+ * check the output contains the leading 15 decimal digits (`3.14159265358979`)
+ * as a robust pass condition (every reasonable LLM hits 15; only very small
+ * models miss it), and additionally log whether the full 20-digit sequence
+ * landed for diagnostic purposes. This keeps the test stable across model
+ * choices while still proving the streaming session round-tripped a useful
+ * answer through the agentic loop.
+ */
+class AgentSessionLiveTest {
+
+    private val ollamaModel: String = System.getenv("AGENTSKT_TEST_OLLAMA_MODEL") ?: "gpt-oss:120b-cloud"
+
+    @Tag("live-llm")
+    @Test
+    fun `session against Ollama — π to 20 decimal places, events ordered, output contains canonical digits`() = runBlocking {
+        assumeTrue(isOllamaReachable(), "skipping: no Ollama at localhost:11434")
+
+        val piAgent = agent<String, String>("pi-reciter") {
+            prompt(
+                "You are a numeric assistant. When the user asks for π (pi), respond with the value to " +
+                    "EXACTLY 20 decimal places. Output ONLY the number — no words, no equals sign, no units, " +
+                    "no commentary. Example format: 3.14159265358979323846"
+            )
+            model {
+                ollama(ollamaModel)
+                host = "localhost"
+                port = 11434
+                temperature = 0.0  // Determinism matters here.
+            }
+            skills {
+                skill<String, String>("recite", "Returns π to the requested precision") { tools() }
+            }
+        }
+
+        val session = piAgent.session("Give me π to 20 decimal places.")
+        val events = session.events.toList()
+        val output = session.await()
+
+        // ── Event-flow shape (step 2 contract) ─────────────────────────────
+        // SkillStarted at index 0, SkillCompleted somewhere before the terminal,
+        // and Completed as the last event. We don't pin exact size because step 3
+        // will add Token / ToolCall* events; this test should stay green through
+        // that rewire.
+        assertTrue(events.isNotEmpty(), "session must emit at least one event")
+        val started = events.first()
+        assertIs<AgentEvent.SkillStarted>(started, "first event must be SkillStarted; got: $started")
+        assertEquals("pi-reciter", started.agentId)
+        assertEquals("recite", started.skillName)
+
+        val terminal = events.last()
+        assertIs<AgentEvent.Completed<String>>(terminal, "last event must be Completed<String>; got: $terminal")
+        assertEquals("pi-reciter", terminal.agentId)
+        assertEquals(output, terminal.output, "Completed.output must match session.await()")
+        assertTrue(
+            events.any { it is AgentEvent.SkillCompleted },
+            "SkillCompleted must appear between SkillStarted and Completed; got: $events",
+        )
+
+        // ── Output content ─────────────────────────────────────────────────
+        // Robust pass: 15 decimal digits. The full 20-digit sequence is the
+        // ambitious target; we report on it but don't fail when a model is a
+        // touch loose on the tail.
+        val canonical20 = "3.14159265358979323846"
+        val robust15    = "3.14159265358979"
+        assertTrue(
+            output.contains(robust15),
+            "expected output to contain π's first 15 decimal digits ($robust15); got: \"$output\"",
+        )
+        val hitFull20 = output.contains(canonical20)
+        println("AgentSessionLiveTest: π model=$ollamaModel; full20=$hitFull20; output=\"$output\"")
+    }
+
+    private fun isOllamaReachable(): Boolean = try {
+        val client = HttpClient.newBuilder().connectTimeout(Duration.ofMillis(500)).build()
+        val request = HttpRequest.newBuilder()
+            .uri(URI.create("http://localhost:11434/api/tags"))
+            .timeout(Duration.ofMillis(1500))
+            .GET()
+            .build()
+        val response = client.send(request, HttpResponse.BodyHandlers.discarding())
+        response.statusCode() in 200..299
+    } catch (_: Throwable) {
+        false
+    }
+}