fix(gemini): preserve thought signatures and token usage

Tom-Ryder · Tom-Ryder · commit 90b124a4c697 · 2026-04-02T10:37:54.000-07:00
diff --git a/Sources/AgentRunKit/LLM/GeminiClient.swift b/Sources/AgentRunKit/LLM/GeminiClient.swift
@@ -222,6 +222,12 @@ extension GeminiClient {
                 }()
                 let arguments = try encodeFunctionCallArgs(functionCall.args)
                 toolCalls.append(ToolCall(id: callId, name: functionCall.name, arguments: arguments))
+                if let signature = part.thoughtSignature, !signature.isEmpty {
+                    reasoningDetails.append(GeminiReasoningDetail.functionCallSignature(
+                        toolCallID: callId,
+                        signature: signature
+                    ))
+                }
             } else if let text = part.text {
                 if part.thought == true {
                     reasoningText = reasoningText.map { $0 + "\n" + text } ?? text
diff --git a/Sources/AgentRunKit/LLM/GeminiClientStreaming.swift b/Sources/AgentRunKit/LLM/GeminiClientStreaming.swift
@@ -61,6 +61,14 @@ extension GeminiClient {
                 await state.flushThinkingBlock()
                 let toolIndex = await state.incrementToolCallCount()
                 let callId = functionCall.id ?? "gemini_call_\(toolIndex)"
+                if let signature = part.thoughtSignature, !signature.isEmpty {
+                    continuation.yield(.reasoningDetails([
+                        GeminiReasoningDetail.functionCallSignature(
+                            toolCallID: callId,
+                            signature: signature
+                        )
+                    ]))
+                }
                 continuation.yield(.toolCallStart(
                     index: toolIndex, id: callId, name: functionCall.name
                 ))
diff --git a/Sources/AgentRunKit/LLM/GeminiClientTypes.swift b/Sources/AgentRunKit/LLM/GeminiClientTypes.swift
@@ -170,12 +170,10 @@ struct GeminiUsageMetadata: Decodable {
     let cachedContentTokenCount: Int?
 
     var tokenUsage: TokenUsage {
-        let thoughts = thoughtsTokenCount ?? 0
-        let candidates = candidatesTokenCount ?? 0
-        return TokenUsage(
+        TokenUsage(
             input: promptTokenCount ?? 0,
-            output: max(0, candidates - thoughts),
-            reasoning: thoughts,
+            output: candidatesTokenCount ?? 0,
+            reasoning: thoughtsTokenCount ?? 0,
             cacheRead: cachedContentTokenCount
         )
     }
@@ -191,6 +189,38 @@ struct GeminiErrorDetail: Decodable {
     let status: String
 }
 
+enum GeminiReasoningDetail {
+    private static let functionCallSignatureType = "gemini.function_call"
+
+    static func functionCallSignature(
+        toolCallID: String,
+        signature: String
+    ) -> JSONValue {
+        .object([
+            "type": .string(functionCallSignatureType),
+            "tool_call_id": .string(toolCallID),
+            "thought_signature": .string(signature)
+        ])
+    }
+
+    static func functionCallSignatures(
+        from details: [JSONValue]
+    ) -> [String: String] {
+        var signatures: [String: String] = [:]
+        for detail in details {
+            guard case let .object(dict) = detail,
+                  case .string(functionCallSignatureType) = dict["type"],
+                  case let .string(toolCallID) = dict["tool_call_id"],
+                  case let .string(signature) = dict["thought_signature"]
+            else {
+                continue
+            }
+            signatures[toolCallID] = signature
+        }
+        return signatures
+    }
+}
+
 enum GeminiMessageMapper {
     static func mapMessages(
         _ messages: [ChatMessage]
@@ -253,6 +283,9 @@ enum GeminiMessageMapper {
         _ msg: AssistantMessage
     ) throws -> GeminiContent {
         var parts: [GeminiPart] = []
+        let functionCallSignatures = GeminiReasoningDetail.functionCallSignatures(
+            from: msg.reasoningDetails ?? []
+        )
 
         if let details = msg.reasoningDetails {
             for detail in details {
@@ -282,7 +315,8 @@ enum GeminiMessageMapper {
             parts.append(GeminiPart(
                 functionCall: GeminiFunctionCall(
                     id: call.id, name: call.name, args: args
-                )
+                ),
+                thoughtSignature: functionCallSignatures[call.id]
             ))
         }
 
diff --git a/Tests/AgentRunKitTests/GeminiClientTests.swift b/Tests/AgentRunKitTests/GeminiClientTests.swift
@@ -432,6 +432,28 @@ struct GeminiMessageMapperTests {
         #expect(mapped[0].parts[0].thoughtSignature == nil)
     }
 
+    @Test
+    func assistantFunctionCallThoughtSignatureRoundTrips() throws {
+        let msg = AssistantMessage(
+            content: "",
+            toolCalls: [
+                ToolCall(id: "call_sig", name: "search", arguments: "{\"q\":\"test\"}")
+            ],
+            reasoningDetails: [
+                GeminiReasoningDetail.functionCallSignature(
+                    toolCallID: "call_sig",
+                    signature: "sig_fc"
+                )
+            ]
+        )
+        let (_, mapped) = try GeminiMessageMapper.mapMessages([.assistant(msg)])
+
+        #expect(mapped.count == 1)
+        #expect(mapped[0].parts.count == 1)
+        #expect(mapped[0].parts[0].functionCall?.id == "call_sig")
+        #expect(mapped[0].parts[0].thoughtSignature == "sig_fc")
+    }
+
     @Test
     func multimodalThrows() {
         do {
diff --git a/Tests/AgentRunKitTests/GeminiResponseParsingTests.swift b/Tests/AgentRunKitTests/GeminiResponseParsingTests.swift
@@ -93,7 +93,7 @@ struct GeminiResponseParsingTests {
             Issue.record("Expected object in reasoning details")
         }
         #expect(msg.tokenUsage?.reasoning == 50)
-        #expect(msg.tokenUsage?.output == 150)
+        #expect(msg.tokenUsage?.output == 200)
     }
 
     @Test
@@ -285,7 +285,7 @@ struct GeminiResponseParsingTests {
         #expect(msg.reasoning?.content == "Think first\nThink again")
         #expect(msg.reasoningDetails?.count == 2)
         #expect(msg.tokenUsage?.reasoning == 40)
-        #expect(msg.tokenUsage?.output == 80)
+        #expect(msg.tokenUsage?.output == 120)
     }
 
     @Test
@@ -398,3 +398,45 @@ struct GeminiResponseParsingTests {
         #expect(inputDict["units"] == .string("celsius"))
     }
 }
+
+struct GeminiFunctionCallReasoningDetailsTests {
+    private func makeClient() -> GeminiClient {
+        GeminiClient(apiKey: "test-key", model: "gemini-2.5-pro")
+    }
+
+    @Test
+    func functionCallThoughtSignatureIsPreservedInReasoningDetails() throws {
+        let json = """
+        {
+            "candidates": [{
+                "content": {
+                    "role": "model",
+                    "parts": [
+                        {
+                            "functionCall": {"id": "call_sig", "name": "search", "args": {"q": "test"}},
+                            "thoughtSignature": "sig_fc"
+                        }
+                    ]
+                },
+                "finishReason": "STOP"
+            }],
+            "usageMetadata": {
+                "promptTokenCount": 40,
+                "candidatesTokenCount": 20
+            }
+        }
+        """
+        let msg = try makeClient().parseResponse(Data(json.utf8))
+
+        #expect(msg.toolCalls.count == 1)
+        #expect(msg.toolCalls[0].id == "call_sig")
+        #expect(msg.reasoningDetails?.count == 1)
+        if case let .object(dict) = msg.reasoningDetails?[0] {
+            #expect(dict["type"] == .string("gemini.function_call"))
+            #expect(dict["tool_call_id"] == .string("call_sig"))
+            #expect(dict["thought_signature"] == .string("sig_fc"))
+        } else {
+            Issue.record("Expected function-call reasoning detail")
+        }
+    }
+}
diff --git a/Tests/AgentRunKitTests/GeminiStreamingTests.swift b/Tests/AgentRunKitTests/GeminiStreamingTests.swift
@@ -85,6 +85,29 @@ struct GeminiStreamingTests {
         }
     }
 
+    @Test
+    func functionCallStreamingEmitsThoughtSignatureDetail() async throws {
+        let lines = [
+            "data: {\"candidates\":[{\"content\":{\"role\":\"model\",\"parts\":[{\"functionCall\":{\"id\":\"call_01\",\"name\":\"get_weather\",\"args\":{\"city\":\"NYC\"}},\"thoughtSignature\":\"sig_fc\"}]},\"finishReason\":\"STOP\"}],\"usageMetadata\":{\"promptTokenCount\":20,\"candidatesTokenCount\":15}}",
+        ]
+        let deltas = try await collectDeltas(from: lines)
+
+        let detailDeltas = deltas.filter {
+            if case .reasoningDetails = $0 { return true }; return false
+        }
+        #expect(detailDeltas.count == 1)
+        if case let .reasoningDetails(details) = detailDeltas[0] {
+            #expect(details.count == 1)
+            if case let .object(dict) = details[0] {
+                #expect(dict["type"] == .string("gemini.function_call"))
+                #expect(dict["tool_call_id"] == .string("call_01"))
+                #expect(dict["thought_signature"] == .string("sig_fc"))
+            } else {
+                Issue.record("Expected function-call reasoning detail")
+            }
+        }
+    }
+
     @Test
     func thinkingStreaming() async throws {
         let lines = [
@@ -148,7 +171,7 @@ struct GeminiStreamingTests {
         #expect(finishedDeltas.count == 1)
         if case let .finished(usage) = finishedDeltas[0] {
             #expect(usage?.input == 100)
-            #expect(usage?.output == 40)
+            #expect(usage?.output == 50)
             #expect(usage?.reasoning == 10)
             #expect(usage?.cacheRead == 20)
         }