Add Session multi-turn helper + ChatMessage value type (§2.6)

claude · claude · commit e4f531c9520b · 2026-05-22T22:14:06.000Z
Session is a thin wrapper over LlamaModel: it owns a slot id, an accumulating user/assistant transcript, and an optional system message and parameter customizer. send(userMessage) appends both sides of the turn and runs chatCompleteText with the full history. stream(userMessage) returns a LlamaIterable for streamed replies; commitStreamedReply records the assistant turn once the caller has accumulated the text. save/restore delegate to existing LlamaModel.saveSlot/restoreSlot. close() erases the slot's KV cache. Single-threaded use only in this pass — per-session locking is the M-effort follow-up. ChatMessage is the minimal value type for the transcript; will be reused by ChatResponse when §2.2 lands. https://claude.ai/code/session_01R4ZrEy3ptJDLuUgUKuM4Gy
diff --git a/src/main/java/net/ladenthin/llama/ChatMessage.java b/src/main/java/net/ladenthin/llama/ChatMessage.java
@@ -0,0 +1,34 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama;
+
+/**
+ * A single message in a chat conversation: a role ({@code "user"}, {@code "assistant"},
+ * or {@code "system"}) and its textual content. Used by {@link Session} to accumulate
+ * conversation turns.
+ */
+public final class ChatMessage {
+
+    private final String role;
+    private final String content;
+
+    public ChatMessage(String role, String content) {
+        this.role = role;
+        this.content = content;
+    }
+
+    public String getRole() {
+        return role;
+    }
+
+    public String getContent() {
+        return content;
+    }
+
+    @Override
+    public String toString() {
+        return role + ": " + content;
+    }
+}
diff --git a/src/main/java/net/ladenthin/llama/Session.java b/src/main/java/net/ladenthin/llama/Session.java
@@ -0,0 +1,125 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.function.Consumer;
+
+/**
+ * Thin multi-turn conversation wrapper over a {@link LlamaModel} slot. Maintains an
+ * accumulating list of {@link ChatMessage} turns and forwards each {@link #send(String)}
+ * to the underlying chat-completion API with the full transcript so far. KV-cache state
+ * for the bound slot can be persisted via {@link #save(String)} and restored with
+ * {@link #restore(String)}, which delegate to {@link LlamaModel#saveSlot(int, String)}
+ * and {@link LlamaModel#restoreSlot(int, String)}.
+ * <p>
+ * This wrapper is intentionally not thread-safe; callers must serialize access to a
+ * single {@code Session} instance. Concurrency support is a follow-up (M-effort) item.
+ * </p>
+ */
+public final class Session implements AutoCloseable {
+
+    private final LlamaModel model;
+    private final int slotId;
+    private final String systemMessage;
+    private final List<Pair<String, String>> turns = new ArrayList<Pair<String, String>>();
+    private final Consumer<InferenceParameters> paramsCustomizer;
+
+    /**
+     * Create a session bound to a specific slot id, with an optional system prompt
+     * applied to every {@link #send(String)} call.
+     *
+     * @param model the underlying model
+     * @param slotId the slot id used by {@link #save(String)} / {@link #restore(String)}
+     * @param systemMessage optional system prompt (may be {@code null} or empty)
+     */
+    public Session(LlamaModel model, int slotId, String systemMessage) {
+        this(model, slotId, systemMessage, null);
+    }
+
+    /**
+     * Create a session with a customizer that gets to mutate the
+     * {@link InferenceParameters} for every call (e.g. set temperature, n_predict).
+     *
+     * @param model the underlying model
+     * @param slotId the slot id
+     * @param systemMessage optional system prompt
+     * @param paramsCustomizer applied to each request's parameters; may be {@code null}
+     */
+    public Session(LlamaModel model, int slotId, String systemMessage,
+                   Consumer<InferenceParameters> paramsCustomizer) {
+        this.model = model;
+        this.slotId = slotId;
+        this.systemMessage = systemMessage;
+        this.paramsCustomizer = paramsCustomizer;
+    }
+
+    /** Send a user message and return the assistant's text reply, appending both to the transcript. */
+    public String send(String userMessage) {
+        turns.add(new Pair<String, String>("user", userMessage));
+        InferenceParameters params = buildParams();
+        String reply = model.chatCompleteText(params);
+        turns.add(new Pair<String, String>("assistant", reply));
+        return reply;
+    }
+
+    /**
+     * Streaming variant of {@link #send(String)}. The returned iterable yields chunks of
+     * the assistant reply; consume it fully (or via try-with-resources) before calling
+     * {@link #send(String)} again, because the assistant turn is only appended to the
+     * transcript when the caller invokes {@link #commitStreamedReply(String)}.
+     */
+    public LlamaIterable stream(String userMessage) {
+        turns.add(new Pair<String, String>("user", userMessage));
+        return model.generateChat(buildParams());
+    }
+
+    /**
+     * Record an assistant reply that was produced by a previous {@link #stream(String)}
+     * call. Called by the caller after it has accumulated the streamed text.
+     */
+    public void commitStreamedReply(String assistantText) {
+        turns.add(new Pair<String, String>("assistant", assistantText));
+    }
+
+    /** Save this session's slot KV cache to {@code filepath}. */
+    public String save(String filepath) {
+        return model.saveSlot(slotId, filepath);
+    }
+
+    /** Restore this session's slot KV cache from {@code filepath}. */
+    public String restore(String filepath) {
+        return model.restoreSlot(slotId, filepath);
+    }
+
+    /** The accumulated turns so far, in order. */
+    public List<ChatMessage> getMessages() {
+        List<ChatMessage> out = new ArrayList<ChatMessage>(turns.size() + 1);
+        if (systemMessage != null && !systemMessage.isEmpty()) {
+            out.add(new ChatMessage("system", systemMessage));
+        }
+        for (Pair<String, String> p : turns) {
+            out.add(new ChatMessage(p.getKey(), p.getValue()));
+        }
+        return Collections.unmodifiableList(out);
+    }
+
+    /** Erase the bound slot's KV cache. Does not modify the in-memory transcript. */
+    @Override
+    public void close() {
+        model.eraseSlot(slotId);
+    }
+
+    private InferenceParameters buildParams() {
+        InferenceParameters params = new InferenceParameters("")
+                .setMessages(systemMessage, new ArrayList<Pair<String, String>>(turns));
+        if (paramsCustomizer != null) {
+            paramsCustomizer.accept(params);
+        }
+        return params;
+    }
+}
diff --git a/src/test/java/net/ladenthin/llama/ChatMessageTest.java b/src/test/java/net/ladenthin/llama/ChatMessageTest.java
@@ -0,0 +1,27 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+@ClaudeGenerated(
+        purpose = "Verify ChatMessage value class accessors and toString format used by Session.getMessages()."
+)
+public class ChatMessageTest {
+
+    @Test
+    public void accessors() {
+        ChatMessage m = new ChatMessage("user", "hi");
+        assertEquals("user", m.getRole());
+        assertEquals("hi", m.getContent());
+    }
+
+    @Test
+    public void toStringFormat() {
+        assertEquals("assistant: hello", new ChatMessage("assistant", "hello").toString());
+    }
+}
diff --git a/src/test/java/net/ladenthin/llama/LlamaModelTest.java b/src/test/java/net/ladenthin/llama/LlamaModelTest.java
@@ -314,6 +314,34 @@ public void testCompleteAsyncCancelPropagates() throws Exception {
 		Assert.assertNotNull(model.complete(new InferenceParameters(prefix).setNPredict(3)));
 	}
 
+	/**
+	 * Regression: {@link Session} must accumulate user/assistant turns across
+	 * multiple {@link Session#send(String)} calls and expose them via
+	 * {@link Session#getMessages()}. Save/restore round-trip is exercised
+	 * separately in slot save/restore tests.
+	 */
+	@Test
+	public void testSessionMultiTurn() {
+		try (Session session = new Session(model, 0, "You are a terse assistant.",
+				params -> params.setNPredict(8).setSeed(1))) {
+			String r1 = session.send("Say hi.");
+			Assert.assertNotNull(r1);
+			String r2 = session.send("Say bye.");
+			Assert.assertNotNull(r2);
+
+			java.util.List<ChatMessage> msgs = session.getMessages();
+			// system + user + assistant + user + assistant
+			Assert.assertEquals(5, msgs.size());
+			Assert.assertEquals("system", msgs.get(0).getRole());
+			Assert.assertEquals("user", msgs.get(1).getRole());
+			Assert.assertEquals("Say hi.", msgs.get(1).getContent());
+			Assert.assertEquals("assistant", msgs.get(2).getRole());
+			Assert.assertEquals("user", msgs.get(3).getRole());
+			Assert.assertEquals("Say bye.", msgs.get(3).getContent());
+			Assert.assertEquals("assistant", msgs.get(4).getRole());
+		}
+	}
+
 	@Test
 	public void testEmbedding() {
 		float[] embedding = model.embed(prefix);