diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index f06b9e75..e5d6e3e2 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -466,6 +466,35 @@ jobs:
             ${{ github.workspace }}/target/surefire-reports/TEST-*.xml
           if-no-files-found: warn
 
+  # ---------------------------------------------------------------------------
+  # vmlens interleaving analysis — pure-Java, needs no native library or models.
+  # Staged to a single smoke test for now (see the `vmlens` profile in pom.xml).
+  # ---------------------------------------------------------------------------
+  vmlens:
+    name: Test (vmlens interleavings)
+    needs: startgate
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+      - uses: actions/setup-java@v5
+        with:
+          distribution: 'temurin'
+          java-version: ${{ env.JAVA_VERSION }}
+          cache: maven
+      - name: Test under vmlens (interleaving analysis)
+        # Add each new test in the `vmlens` package to this -Dtest list (surefire
+        # -Dtest matches simple class names, not package globs; the default suite is
+        # excluded from the vmlens package via pom.xml managed surefire <excludes>).
+        run: >-
+          mvn --batch-mode --no-transfer-progress -Pvmlens test
+          -Dtest=VmlensInterleavingSmokeTest,SessionStateInterleavingTest -DfailIfNoTests=false
+      - uses: actions/upload-artifact@v7
+        if: always()
+        with:
+          name: vmlens-report
+          path: target/vmlens-report/
+          if-no-files-found: ignore
+
   test-java-macos-arm64-metal:
     name: Java Tests macOS 14 arm64 (Metal)
     needs: build-macos-arm64-metal
diff --git a/pom.xml b/pom.xml
index 8385d506..afb61c02 100644
--- a/pom.xml
+++ b/pom.xml
@@ -219,6 +219,21 @@ SPDX-License-Identifier: MIT
 			<version>${reactor.version}</version>
 			<scope>test</scope>
 		</dependency>
+		<!--
+			vmlens interleaving-analysis API. Test-scoped and transitive-dependency-free
+			(its own deps are all test-scope, hence not propagated), so it is safe on the
+			default test classpath under dependencyConvergence. Needed so the
+			VmlensInterleavingSmoke* test compiles in every build; it is only *executed*
+			under the `vmlens` profile (see that profile + the vmlens CI job). The default
+			surefire run excludes it (managed surefire <excludes> below) so the vmlens
+			agent's JDK 9 API class is never loaded outside the agent-driven run.
+		-->
+		<dependency>
+			<groupId>com.vmlens</groupId>
+			<artifactId>api</artifactId>
+			<version>${vmlens.version}</version>
+			<scope>test</scope>
+		</dependency>
 	</dependencies>
 
 	<build>
@@ -278,6 +293,19 @@ SPDX-License-Identifier: MIT
 					<groupId>org.apache.maven.plugins</groupId>
 					<artifactId>maven-surefire-plugin</artifactId>
 					<version>3.5.6</version>
+					<configuration>
+						<!--
+							Tests in the `vmlens` package are meaningful only when run
+							under the vmlens agent (the `vmlens` profile / CI job). Without the
+							agent its AllInterleavings loop body never executes (a vacuous pass
+							that also prints an "agent not configured" warning), so exclude it
+							from the ordinary surefire run. The vmlens job re-includes them with
+							`-Dtest=...`, which overrides this exclude.
+						-->
+						<excludes>
+							<exclude>**/vmlens/*.java</exclude>
+						</excludes>
+					</configuration>
 				</plugin>
 				<plugin>
 					<groupId>org.apache.maven.plugins</groupId>
@@ -881,14 +909,6 @@ SPDX-License-Identifier: MIT
 		</profile>
 		<profile>
 			<id>vmlens</id>
-			<dependencies>
-				<dependency>
-					<groupId>com.vmlens</groupId>
-					<artifactId>api</artifactId>
-					<version>${vmlens.version}</version>
-					<scope>test</scope>
-				</dependency>
-			</dependencies>
 			<build>
 				<plugins>
 					<plugin>
@@ -896,16 +916,15 @@ SPDX-License-Identifier: MIT
 						<artifactId>vmlens-maven-plugin</artifactId>
 						<configuration>
 							<!--
-								Lincheck generates its own TestThreadExecution class on the fly.
-								That bytecode clashes with vmlens's load-time instrumentation
-								(java.lang.VerifyError). Skip the Lincheck test under vmlens;
-								the default test job still runs it.
-								**/*$* is the plugin default - kept to preserve inner-class skip.
+								Run vmlens interleaving analysis over the whole `vmlens` test
+								package (smoke test + SessionStateInterleavingTest). The `com.vmlens:api`
+								test dependency lives in the main <dependencies> block.
+								Expand <includes> as more concurrency tests are added (the
+								streambuffer repo runs vmlens over its whole suite).
 							-->
-							<excludes>
-								<exclude>**/*$*</exclude>
-								<exclude>**/CancellationTokenLincheckTest.java</exclude>
-							</excludes>
+							<includes>
+								<include>**/vmlens/*.java</include>
+							</includes>
 						</configuration>
 						<executions>
 							<execution>
diff --git a/src/main/java/net/ladenthin/llama/Session.java b/src/main/java/net/ladenthin/llama/Session.java
index fe8654ac..9efe15e7 100644
--- a/src/main/java/net/ladenthin/llama/Session.java
+++ b/src/main/java/net/ladenthin/llama/Session.java
@@ -9,7 +9,7 @@
 import lombok.ToString;
 import net.ladenthin.llama.parameters.InferenceParameters;
 import net.ladenthin.llama.value.ChatMessage;
-import net.ladenthin.llama.value.ChatTranscript;
+import net.ladenthin.llama.value.Pair;
 import org.jspecify.annotations.Nullable;
 
 /**
@@ -30,11 +30,11 @@
  * {@link #commitStreamedReply(String)}.
  * </p>
  *
- * <p>{@code toString} is generated by Lombok over the slot id, system message, and
- * accumulated turns. The owning {@link LlamaModel} is excluded because its
- * {@code toString} would render native state. The {@code paramsCustomizer}
- * {@link UnaryOperator} is excluded because lambda {@code toString} is the implementation
- * hash, not useful in logs. The intrinsic {@code lock} is excluded as a noise field.
+ * <p>{@code toString} is generated by Lombok over the slot id and the
+ * {@link SessionState} (which renders the transcript and streaming flag). The owning
+ * {@link LlamaModel} is excluded because its {@code toString} would render native
+ * state. The {@code paramsCustomizer} {@link UnaryOperator} is excluded because lambda
+ * {@code toString} is the implementation hash, not useful in logs.
  * {@code equals}/{@code hashCode} are intentionally NOT generated: a session is a
  * mutable lifecycle handle managed by identity.</p>
  */
@@ -48,23 +48,18 @@ public final class Session implements AutoCloseable {
     private final int slotId;
 
     /**
-     * Append-only transcript with two-phase commit semantics. See the
-     * {@link net.ladenthin.llama.value.ChatTranscript} class Javadoc for the full invariant statement
-     * and the {@code ChatTranscriptTest} class for the running-documentation
-     * tests that pin the contract.
+     * The lock-guarded streaming-flag + transcript state machine. Extracted to
+     * {@link SessionState} so its concurrency contract (the two-phase commit and the
+     * streaming guard) is testable without the native model; see that class and the
+     * {@code SessionStateInterleavingTest}. This {@code Session} only adds the model
+     * calls, injected as callbacks that {@link SessionState} runs under its lock.
      */
-    private final ChatTranscript transcript;
+    private final SessionState state;
 
     // Lambda UnaryOperator — toString is the implementation hash, not useful in logs.
     @ToString.Exclude
     private final @Nullable UnaryOperator<InferenceParameters> paramsCustomizer;
 
-    // Intrinsic lock used only for synchronisation; rendering its identity adds noise.
-    @ToString.Exclude
-    private final Object lock = new Object();
-
-    private boolean streamingActive;
-
     /**
      * Create a session bound to a specific slot id, with an optional system prompt
      * applied to every {@link #send(String)} call.
@@ -95,7 +90,7 @@ public Session(
             @Nullable UnaryOperator<InferenceParameters> paramsCustomizer) {
         this.model = model;
         this.slotId = slotId;
-        this.transcript = new ChatTranscript(systemMessage);
+        this.state = new SessionState(slotId, systemMessage);
         this.paramsCustomizer = paramsCustomizer;
     }
 
@@ -106,22 +101,13 @@ public Session(
      * @return the assistant's reply text
      */
     public String send(String userMessage) {
-        synchronized (lock) {
-            if (streamingActive) {
-                throw new IllegalStateException("stream in progress on slot " + slotId
-                        + " (transcript=" + transcript.size() + " turns)"
-                        + "; call commitStreamedReply(...) before send(...)");
-            }
-            // Two-phase commit: build the wire-format with the pending user turn
-            // outside the transcript via messagesWithPendingUserTurn(...). On
-            // model success, commit BOTH turns atomically through appendRound(...).
-            // On model failure, nothing was committed — no rollback logic needed.
-            // Invariant pinned by ChatTranscriptTest.
-            InferenceParameters params = buildParamsWithPendingUserTurn(userMessage);
-            String reply = model.chatCompleteText(params);
-            transcript.appendRound(userMessage, reply);
-            return reply;
-        }
+        // Two-phase commit lives in SessionState.send(...): it guards against an
+        // in-progress stream, builds the wire-format with the pending user turn, runs
+        // the model call below under the lock, and on success commits BOTH turns
+        // atomically. On model failure nothing is committed — no rollback needed.
+        return state.send(
+                userMessage,
+                (systemMessage, wireMessages) -> model.chatCompleteText(buildParams(systemMessage, wireMessages)));
     }
 
     /**
@@ -134,20 +120,13 @@ public String send(String userMessage) {
      * @return a {@link LlamaIterable} that yields assistant reply chunks
      */
     public LlamaIterable stream(String userMessage) {
-        synchronized (lock) {
-            if (streamingActive) {
-                throw new IllegalStateException("stream in progress on slot " + slotId
-                        + " (transcript=" + transcript.size() + " turns)"
-                        + "; call commitStreamedReply(...) before stream(...)");
-            }
-            // Two-phase commit: see send(). The user turn is committed only after
-            // generateChat successfully returns the iterable; the assistant turn is
-            // committed separately by commitStreamedReply(...).
-            LlamaIterable iterable = model.generateChat(buildParamsWithPendingUserTurn(userMessage));
-            transcript.appendUserTurn(userMessage);
-            streamingActive = true;
-            return iterable;
-        }
+        // SessionState.beginStream(...) guards against an in-progress stream, runs the
+        // model call below under the lock, and on success commits the user turn and
+        // marks streaming active; the assistant turn is committed separately by
+        // commitStreamedReply(...).
+        return state.beginStream(
+                userMessage,
+                (systemMessage, wireMessages) -> model.generateChat(buildParams(systemMessage, wireMessages)));
     }
 
     /**
@@ -157,15 +136,7 @@ public LlamaIterable stream(String userMessage) {
      * @param assistantText the assistant text accumulated from a prior {@link #stream(String)} call
      */
     public void commitStreamedReply(String assistantText) {
-        synchronized (lock) {
-            if (!streamingActive) {
-                throw new IllegalStateException("no stream in progress on slot " + slotId
-                        + " (transcript=" + transcript.size() + " turns)"
-                        + "; call stream(...) first");
-            }
-            transcript.appendAssistantTurn(assistantText);
-            streamingActive = false;
-        }
+        state.commitStreamedReply(assistantText);
     }
 
     /**
@@ -175,14 +146,7 @@ public void commitStreamedReply(String assistantText) {
      * @return the JSON response from the native save action
      */
     public String save(String filepath) {
-        synchronized (lock) {
-            if (streamingActive) {
-                throw new IllegalStateException("stream in progress on slot " + slotId
-                        + " (transcript=" + transcript.size() + " turns)"
-                        + "; call commitStreamedReply(...) before save(...)");
-            }
-            return model.saveSlot(slotId, filepath);
-        }
+        return state.runWhenNotStreaming("save", () -> model.saveSlot(slotId, filepath));
     }
 
     /**
@@ -192,14 +156,7 @@ public String save(String filepath) {
      * @return the JSON response from the native restore action
      */
     public String restore(String filepath) {
-        synchronized (lock) {
-            if (streamingActive) {
-                throw new IllegalStateException("stream in progress on slot " + slotId
-                        + " (transcript=" + transcript.size() + " turns)"
-                        + "; call commitStreamedReply(...) before restore(...)");
-            }
-            return model.restoreSlot(slotId, filepath);
-        }
+        return state.runWhenNotStreaming("restore", () -> model.restoreSlot(slotId, filepath));
     }
 
     /**
@@ -207,33 +164,28 @@ public String restore(String filepath) {
      * @return the accumulated transcript so far, in order, including the system message if any
      */
     public List<ChatMessage> getMessages() {
-        synchronized (lock) {
-            return transcript.snapshot();
-        }
+        return state.snapshot();
     }
 
     /** Erase the bound slot's KV cache. Does not modify the in-memory transcript. */
     @Override
     public void close() {
-        synchronized (lock) {
-            model.eraseSlot(slotId);
-        }
+        state.runUnderLock(() -> model.eraseSlot(slotId));
     }
 
     /**
-     * Build inference parameters with a pending user turn appended to the existing
-     * transcript — without mutating the underlying {@link net.ladenthin.llama.value.ChatTranscript}. The
-     * actual transcript mutation happens AFTER the model call returns successfully,
-     * either via {@link net.ladenthin.llama.value.ChatTranscript#appendRound(String, String)} (send path)
-     * or {@link net.ladenthin.llama.value.ChatTranscript#appendUserTurn(String)} (stream path).
+     * Build inference parameters from the system message and the wire-format messages
+     * supplied by {@link SessionState} (the committed turns plus the pending user
+     * turn), applying the optional {@code paramsCustomizer}. The transcript itself is
+     * never mutated here; {@link SessionState} commits turns only after the model call
+     * returns successfully.
      *
-     * @param pendingUserMessage the user turn to include in the wire format
-     * @return inference parameters carrying transcript + pending user turn
+     * @param systemMessage the system prompt, or {@code null} when none was configured
+     * @param wireMessages the committed turns plus the pending user turn
+     * @return inference parameters carrying the system message + wire messages
      */
-    private InferenceParameters buildParamsWithPendingUserTurn(String pendingUserMessage) {
-        InferenceParameters params = InferenceParameters.empty()
-                .withMessages(
-                        transcript.getSystemMessage(), transcript.messagesWithPendingUserTurn(pendingUserMessage));
+    private InferenceParameters buildParams(@Nullable String systemMessage, List<Pair<String, String>> wireMessages) {
+        InferenceParameters params = InferenceParameters.empty().withMessages(systemMessage, wireMessages);
         return paramsCustomizer == null ? params : paramsCustomizer.apply(params);
     }
 }
diff --git a/src/main/java/net/ladenthin/llama/SessionState.java b/src/main/java/net/ladenthin/llama/SessionState.java
new file mode 100644
index 00000000..11713db0
--- /dev/null
+++ b/src/main/java/net/ladenthin/llama/SessionState.java
@@ -0,0 +1,184 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama;
+
+import java.util.List;
+import java.util.function.BiFunction;
+import java.util.function.Supplier;
+import lombok.ToString;
+import net.ladenthin.llama.value.ChatMessage;
+import net.ladenthin.llama.value.ChatTranscript;
+import net.ladenthin.llama.value.Pair;
+import org.jspecify.annotations.Nullable;
+
+/**
+ * The lock-guarded conversation state machine behind {@link Session}: a
+ * {@code streamingActive} flag plus a {@link ChatTranscript}, all serialised on a
+ * single intrinsic lock. Extracted from {@link Session} so the concurrency contract
+ * — the compound atomicity of the streaming guard and the two-phase transcript
+ * commit — is testable independently of {@link LlamaModel} and its native library
+ * (the same testability rationale that produced {@link ChatTranscript}).
+ *
+ * <p>The native model call is injected as a callback that runs <em>under the
+ * lock</em>, between the guard check and the transcript commit, so {@link Session}
+ * keeps exactly its previous serialisation semantics while this class owns no model
+ * state and can be exercised with plain lambdas.</p>
+ *
+ * <h2>Compound-atomicity invariant</h2>
+ *
+ * <p>Under every interleaving of concurrent callers, the {@code streamingActive}
+ * flag and the transcript move together: {@link #send(String, BiFunction)} and
+ * {@link #beginStream(String, BiFunction)} fail-fast with
+ * {@link IllegalStateException} while a stream is in progress, and the transcript
+ * therefore always stays a strictly alternating {@code user, assistant, …}
+ * sequence. This is the property the {@code SessionStateInterleavingTest} pins under
+ * vmlens.</p>
+ *
+ * <h2>{@code toString} contract</h2>
+ *
+ * <p>Lombok-generated over the slot id, transcript, and streaming flag. The
+ * intrinsic {@code lock} is excluded as a noise field. {@code equals}/{@code
+ * hashCode} are intentionally NOT generated: like {@link Session} this is a mutable
+ * lifecycle handle identified by identity.</p>
+ */
+@ToString
+public final class SessionState {
+
+    // Intrinsic lock used only for synchronisation; rendering its identity adds noise.
+    @ToString.Exclude
+    private final Object lock = new Object();
+
+    private final int slotId;
+
+    private final ChatTranscript transcript;
+
+    private boolean streamingActive;
+
+    /**
+     * Create a new state machine for the given slot, with an optional system prompt.
+     *
+     * @param slotId the slot id, used only to render diagnostic messages
+     * @param systemMessage optional system prompt (may be {@code null} or empty)
+     */
+    public SessionState(int slotId, @Nullable String systemMessage) {
+        this.slotId = slotId;
+        this.transcript = new ChatTranscript(systemMessage);
+    }
+
+    /**
+     * Run a synchronous send round atomically: fail-fast if a stream is in
+     * progress, invoke {@code modelCall} (under the lock) with the system message
+     * and the wire-format messages carrying a pending user turn, then commit the
+     * user turn and the returned reply as one round.
+     *
+     * @param userMessage the user turn to commit on success
+     * @param modelCall the model invocation; receives {@code (systemMessage,
+     *     wireMessages)} and returns the assistant reply text
+     * @return the assistant reply produced by {@code modelCall}
+     * @throws IllegalStateException if a stream is currently in progress
+     */
+    public String send(String userMessage, BiFunction<@Nullable String, List<Pair<String, String>>, String> modelCall) {
+        synchronized (lock) {
+            requireNotStreaming("send");
+            String reply =
+                    modelCall.apply(transcript.getSystemMessage(), transcript.messagesWithPendingUserTurn(userMessage));
+            transcript.appendRound(userMessage, reply);
+            return reply;
+        }
+    }
+
+    /**
+     * Begin a streaming round atomically: fail-fast if a stream is in progress,
+     * invoke {@code modelCall} (under the lock) to obtain the stream handle, then
+     * commit the user turn and mark streaming active. The matching assistant turn
+     * is committed later via {@link #commitStreamedReply(String)}.
+     *
+     * @param <R> the stream-handle type returned by {@code modelCall}
+     * @param userMessage the user turn to commit on success
+     * @param modelCall the model invocation; receives {@code (systemMessage,
+     *     wireMessages)} and returns the stream handle
+     * @return the stream handle produced by {@code modelCall}
+     * @throws IllegalStateException if a stream is currently in progress
+     */
+    public <R> R beginStream(
+            String userMessage, BiFunction<@Nullable String, List<Pair<String, String>>, R> modelCall) {
+        synchronized (lock) {
+            requireNotStreaming("stream");
+            R streamHandle =
+                    modelCall.apply(transcript.getSystemMessage(), transcript.messagesWithPendingUserTurn(userMessage));
+            transcript.appendUserTurn(userMessage);
+            streamingActive = true;
+            return streamHandle;
+        }
+    }
+
+    /**
+     * Commit the assistant reply accumulated from a prior
+     * {@link #beginStream(String, BiFunction)} and clear the streaming flag.
+     *
+     * @param assistantText the assistant text to append
+     * @throws IllegalStateException if no stream is currently in progress
+     */
+    public void commitStreamedReply(String assistantText) {
+        synchronized (lock) {
+            if (!streamingActive) {
+                throw new IllegalStateException("no stream in progress on slot " + slotId
+                        + " (transcript=" + transcript.size() + " turns)"
+                        + "; call stream(...) first");
+            }
+            transcript.appendAssistantTurn(assistantText);
+            streamingActive = false;
+        }
+    }
+
+    /**
+     * Run an action under the lock, but only when no stream is in progress
+     * (used for slot save/restore, which must not race a streaming round).
+     *
+     * @param <R> the action result type
+     * @param operation the operation name used in the diagnostic message
+     * @param action the action to run while holding the lock
+     * @return the action result
+     * @throws IllegalStateException if a stream is currently in progress
+     */
+    public <R> R runWhenNotStreaming(String operation, Supplier<R> action) {
+        synchronized (lock) {
+            requireNotStreaming(operation);
+            return action.get();
+        }
+    }
+
+    /**
+     * Run an action under the lock with no streaming guard (used for slot erase on
+     * {@code close()}, which is valid regardless of streaming state).
+     *
+     * @param action the action to run while holding the lock
+     */
+    public void runUnderLock(Runnable action) {
+        synchronized (lock) {
+            action.run();
+        }
+    }
+
+    /**
+     * Return an unmodifiable snapshot of the transcript, including the system
+     * message if one was configured.
+     *
+     * @return the unmodifiable transcript snapshot
+     */
+    public List<ChatMessage> snapshot() {
+        synchronized (lock) {
+            return transcript.snapshot();
+        }
+    }
+
+    private void requireNotStreaming(String operation) {
+        if (streamingActive) {
+            throw new IllegalStateException("stream in progress on slot " + slotId
+                    + " (transcript=" + transcript.size() + " turns)"
+                    + "; call commitStreamedReply(...) before " + operation + "(...)");
+        }
+    }
+}
diff --git a/src/test/java/net/ladenthin/llama/SessionStateTest.java b/src/test/java/net/ladenthin/llama/SessionStateTest.java
new file mode 100644
index 00000000..65dd6147
--- /dev/null
+++ b/src/test/java/net/ladenthin/llama/SessionStateTest.java
@@ -0,0 +1,111 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+package net.ladenthin.llama;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.is;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.util.List;
+import java.util.stream.Collectors;
+import net.ladenthin.llama.value.ChatMessage;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Model-free unit tests pinning the {@link SessionState} contract extracted from
+ * {@link Session}: the two-phase transcript commit and the streaming guard. These
+ * run in the ordinary suite (no native library), complementing the model-gated
+ * {@code SessionConcurrencyTest} and the agent-driven
+ * {@code SessionStateInterleavingTest}.
+ */
+public class SessionStateTest {
+
+    private static List<String> roles(List<ChatMessage> messages) {
+        return messages.stream().map(ChatMessage::getRole).collect(Collectors.toList());
+    }
+
+    @Test
+    public void send_commitsRound_andReturnsReply() {
+        SessionState state = new SessionState(0, null);
+
+        String reply = state.send("hello", (systemMessage, wireMessages) -> "hi there");
+
+        assertThat(reply, is("hi there"));
+        assertThat(roles(state.snapshot()), contains("user", "assistant"));
+    }
+
+    @Test
+    public void send_passesSystemMessageAndPendingUserTurn_toModelCall() {
+        SessionState state = new SessionState(0, "be terse");
+
+        state.send("first", (systemMessage, wireMessages) -> {
+            assertThat(systemMessage, is("be terse"));
+            // wire format carries the pending user turn before it is committed
+            assertThat(wireMessages.get(wireMessages.size() - 1).getValue(), is("first"));
+            return "ok";
+        });
+
+        assertThat(roles(state.snapshot()), contains("system", "user", "assistant"));
+    }
+
+    @Test
+    public void beginStream_thenCommit_completesRound_andClearsGuard() {
+        SessionState state = new SessionState(0, null);
+
+        String handle = state.beginStream("q", (systemMessage, wireMessages) -> "HANDLE");
+        assertThat(handle, is("HANDLE"));
+        // only the user turn is committed until the reply is committed
+        assertThat(roles(state.snapshot()), contains("user"));
+
+        state.commitStreamedReply("a");
+        assertThat(roles(state.snapshot()), contains("user", "assistant"));
+
+        // guard cleared: a follow-up send succeeds
+        state.send("again", (systemMessage, wireMessages) -> "b");
+        assertThat(roles(state.snapshot()), contains("user", "assistant", "user", "assistant"));
+    }
+
+    @Test
+    public void send_whileStreaming_throwsWithDiagnosticMessage() {
+        SessionState state = new SessionState(7, null);
+        state.beginStream("q", (systemMessage, wireMessages) -> "HANDLE");
+
+        IllegalStateException ex = assertThrows(IllegalStateException.class, () -> state.send("x", (s, w) -> "y"));
+        assertThat(ex.getMessage(), containsString("stream in progress on slot 7"));
+        assertThat(ex.getMessage(), containsString("before send(...)"));
+    }
+
+    @Test
+    public void runWhenNotStreaming_throwsWhileStreaming_andRunsOtherwise() {
+        SessionState state = new SessionState(3, null);
+
+        assertThat(state.runWhenNotStreaming("save", () -> "saved"), is("saved"));
+
+        state.beginStream("q", (systemMessage, wireMessages) -> "HANDLE");
+        IllegalStateException ex =
+                assertThrows(IllegalStateException.class, () -> state.runWhenNotStreaming("restore", () -> "restored"));
+        assertThat(ex.getMessage(), containsString("before restore(...)"));
+    }
+
+    @Test
+    public void commitStreamedReply_withoutStream_throws() {
+        SessionState state = new SessionState(0, null);
+
+        IllegalStateException ex = assertThrows(IllegalStateException.class, () -> state.commitStreamedReply("a"));
+        assertThat(ex.getMessage(), containsString("no stream in progress"));
+    }
+
+    @Test
+    public void runUnderLock_runsActionRegardlessOfStreamingState() {
+        SessionState state = new SessionState(0, null);
+        state.beginStream("q", (systemMessage, wireMessages) -> "HANDLE");
+
+        boolean[] ran = {false};
+        state.runUnderLock(() -> ran[0] = true);
+
+        assertThat(ran[0], is(true));
+    }
+}
diff --git a/src/test/java/net/ladenthin/llama/vmlens/SessionStateInterleavingTest.java b/src/test/java/net/ladenthin/llama/vmlens/SessionStateInterleavingTest.java
new file mode 100644
index 00000000..08a4df45
--- /dev/null
+++ b/src/test/java/net/ladenthin/llama/vmlens/SessionStateInterleavingTest.java
@@ -0,0 +1,97 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+package net.ladenthin.llama.vmlens;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.nullValue;
+
+import com.vmlens.api.AllInterleavings;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicReference;
+import net.ladenthin.llama.SessionState;
+import net.ladenthin.llama.value.ChatMessage;
+import org.junit.jupiter.api.Test;
+
+/**
+ * vmlens interleaving analysis of the {@link SessionState} streaming-guard and
+ * two-phase transcript commit.
+ *
+ * <p>One thread runs a synchronous {@code send} round while another runs a
+ * {@code beginStream} followed by {@code commitStreamedReply}. Both mutate the
+ * shared {@code streamingActive} flag and the transcript under {@code SessionState}'s
+ * single lock. The compound-atomicity invariant that must hold under <em>every</em>
+ * interleaving: the resulting transcript is always a strictly alternating
+ * {@code user, assistant, …} sequence (no torn round, no two same-role turns in a
+ * row), the streaming flag is never left stuck (a follow-up {@code send} succeeds),
+ * and the only acceptable failure is the {@link IllegalStateException} a {@code send}
+ * raises when it loses the race to an in-progress stream.</p>
+ *
+ * <p>This is the multi-variable check-then-act ordering class that the existing
+ * single-{@code volatile} {@code CancellationToken} Lincheck/jcstress tests do not
+ * cover and that the model-gated {@code SessionConcurrencyTest} cannot explore
+ * exhaustively. The model call is injected as a pure-Java lambda, so no native
+ * library is needed. Like the rest of the package it runs only under the vmlens
+ * agent (see the {@code vmlens} profile and the {@code maven-surefire-plugin}
+ * {@code <excludes>} in {@code pom.xml}).</p>
+ *
+ * <p>Raw {@link Thread} usage is intentional: vmlens explores the interleavings of
+ * the threads it directly manages.</p>
+ */
+public class SessionStateInterleavingTest {
+
+    /**
+     * Drives a synchronous send against a stream round through every interleaving and
+     * asserts strict user/assistant alternation plus a non-stuck streaming flag.
+     *
+     * @throws InterruptedException if joining a worker thread is interrupted
+     */
+    @Test
+    public void sendRacingStreamKeepsStrictAlternation() throws InterruptedException {
+        try (AllInterleavings allInterleavings = new AllInterleavings("SessionState.streamGuard")) {
+            while (allInterleavings.hasNext()) {
+                final SessionState state = new SessionState(0, null);
+                state.send("u0", (systemMessage, wireMessages) -> "a0"); // seed one aligned round
+
+                final AtomicReference<Throwable> failure = new AtomicReference<>();
+
+                final Thread sender = new Thread(() -> {
+                    try {
+                        state.send("u1", (systemMessage, wireMessages) -> "a1");
+                    } catch (IllegalStateException lostRaceToStream) {
+                        // Acceptable: a stream was in progress when send checked the guard.
+                    } catch (Throwable t) {
+                        failure.compareAndSet(null, t);
+                    }
+                });
+                final Thread streamer = new Thread(() -> {
+                    try {
+                        state.beginStream("u2", (systemMessage, wireMessages) -> "streamHandle");
+                        state.commitStreamedReply("a2");
+                    } catch (Throwable t) {
+                        failure.compareAndSet(null, t);
+                    }
+                });
+
+                sender.start();
+                streamer.start();
+                sender.join();
+                streamer.join();
+
+                assertThat(failure.get(), is(nullValue()));
+                assertStrictAlternation(state.snapshot());
+                // The streaming flag must be cleared: a follow-up send must not throw.
+                state.send("u3", (systemMessage, wireMessages) -> "a3");
+                assertStrictAlternation(state.snapshot());
+            }
+        }
+    }
+
+    private static void assertStrictAlternation(List<ChatMessage> messages) {
+        for (int i = 0; i < messages.size(); i++) {
+            final String expectedRole = (i % 2 == 0) ? "user" : "assistant";
+            assertThat("role at index " + i, messages.get(i).getRole(), is(expectedRole));
+        }
+    }
+}
diff --git a/src/test/java/net/ladenthin/llama/vmlens/VmlensInterleavingSmokeTest.java b/src/test/java/net/ladenthin/llama/vmlens/VmlensInterleavingSmokeTest.java
new file mode 100644
index 00000000..45f09dd2
--- /dev/null
+++ b/src/test/java/net/ladenthin/llama/vmlens/VmlensInterleavingSmokeTest.java
@@ -0,0 +1,57 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+package net.ladenthin.llama.vmlens;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.is;
+
+import com.vmlens.api.AllInterleavings;
+import java.util.concurrent.atomic.AtomicLong;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Minimal vmlens interleaving-analysis smoke test demonstrating the setup.
+ *
+ * <p>Two threads each increment a shared {@link AtomicLong}; because
+ * {@link AtomicLong#incrementAndGet()} is atomic, the final value must always be
+ * {@code 2} regardless of how the two threads interleave. vmlens drives the body
+ * of the {@link AllInterleavings} loop through every possible interleaving and
+ * fails if any ordering violates the invariant.</p>
+ *
+ * <p>This test is meaningful only when executed under the vmlens java agent,
+ * which is wired up by the {@code vmlens} Maven profile and the dedicated vmlens
+ * CI job ({@code mvn -Pvmlens test}). Without the agent
+ * {@link AllInterleavings#hasNext()} returns {@code false} and the loop body is
+ * skipped, so the ordinary surefire run excludes this class (see the
+ * {@code maven-surefire-plugin} {@code <excludes>} in {@code pom.xml}). It is the
+ * staged "one class for now" baseline; widen the profile's {@code <includes>} as
+ * more concurrency tests are added.</p>
+ */
+public class VmlensInterleavingSmokeTest {
+
+    /**
+     * Verifies that two concurrent atomic increments always sum to two under
+     * every thread interleaving explored by vmlens.
+     *
+     * @throws InterruptedException if joining a worker thread is interrupted
+     */
+    @Test
+    public void atomicIncrementsAreLinearizable() throws InterruptedException {
+        try (AllInterleavings allInterleavings = new AllInterleavings("VmlensInterleavingSmokeTest.atomicIncrements")) {
+            while (allInterleavings.hasNext()) {
+                final AtomicLong counter = new AtomicLong();
+
+                final Thread first = new Thread(counter::incrementAndGet);
+                final Thread second = new Thread(counter::incrementAndGet);
+
+                first.start();
+                second.start();
+                first.join();
+                second.join();
+
+                assertThat(counter.get(), is(2L));
+            }
+        }
+    }
+}