Add tool_results e2e tests for Python, Go, and .NET

Morabbin · Copilot · Morabbin · commit 100aeb77e77c · 2026-04-01T16:02:32.000+01:00
Mirror the Node e2e tests from tool_results.test.ts in all three
remaining SDK languages, reusing the shared YAML snapshots. Each
suite covers:
- structured ToolResultObject with success resultType
- failure resultType
- toolTelemetry preservation (verifies LLM content has no stringified
  JSON and no toolTelemetry/resultType leakage)

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;
diff --git a/dotnet/test/ToolResultsTests.cs b/dotnet/test/ToolResultsTests.cs
@@ -0,0 +1,113 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *--------------------------------------------------------------------------------------------*/
+
+using GitHub.Copilot.SDK.Test.Harness;
+using Microsoft.Extensions.AI;
+using System.ComponentModel;
+using Xunit;
+using Xunit.Abstractions;
+
+namespace GitHub.Copilot.SDK.Test;
+
+public class ToolResultsTests(E2ETestFixture fixture, ITestOutputHelper output) : E2ETestBase(fixture, "tool_results", output)
+{
+    [Fact]
+    public async Task Should_Handle_Structured_ToolResultObject_From_Custom_Tool()
+    {
+        var session = await CreateSessionAsync(new SessionConfig
+        {
+            Tools = [AIFunctionFactory.Create(GetWeather, "get_weather")],
+            OnPermissionRequest = PermissionHandler.ApproveAll,
+        });
+
+        await session.SendAsync(new MessageOptions
+        {
+            Prompt = "What's the weather in Paris?"
+        });
+
+        var assistantMessage = await TestHelper.GetFinalAssistantMessageAsync(session);
+        Assert.NotNull(assistantMessage);
+        Assert.Matches("(?i)sunny|72", assistantMessage!.Data.Content ?? string.Empty);
+
+        [Description("Gets weather for a city")]
+        static ToolResultAIContent GetWeather([Description("City name")] string city)
+            => new(new()
+            {
+                TextResultForLlm = $"The weather in {city} is sunny and 72°F",
+                ResultType = "success",
+            });
+    }
+
+    [Fact]
+    public async Task Should_Handle_Tool_Result_With_Failure_ResultType()
+    {
+        var session = await CreateSessionAsync(new SessionConfig
+        {
+            Tools = [AIFunctionFactory.Create(CheckStatus, "check_status")],
+            OnPermissionRequest = PermissionHandler.ApproveAll,
+        });
+
+        await session.SendAsync(new MessageOptions
+        {
+            Prompt = "Check the status of the service using check_status. If it fails, say 'service is down'."
+        });
+
+        var assistantMessage = await TestHelper.GetFinalAssistantMessageAsync(session);
+        Assert.NotNull(assistantMessage);
+        Assert.Contains("service is down", assistantMessage!.Data.Content?.ToLowerInvariant() ?? string.Empty);
+
+        [Description("Checks the status of a service")]
+        static ToolResultAIContent CheckStatus()
+            => new(new()
+            {
+                TextResultForLlm = "Service unavailable",
+                ResultType = "failure",
+                Error = "API timeout",
+            });
+    }
+
+    [Fact]
+    public async Task Should_Preserve_ToolTelemetry_And_Not_Stringify_Structured_Results_For_LLM()
+    {
+        var session = await CreateSessionAsync(new SessionConfig
+        {
+            Tools = [AIFunctionFactory.Create(AnalyzeCode, "analyze_code")],
+            OnPermissionRequest = PermissionHandler.ApproveAll,
+        });
+
+        await session.SendAsync(new MessageOptions
+        {
+            Prompt = "Analyze the file main.ts for issues."
+        });
+
+        var assistantMessage = await TestHelper.GetFinalAssistantMessageAsync(session);
+        Assert.NotNull(assistantMessage);
+        Assert.Contains("no issues", assistantMessage!.Data.Content?.ToLowerInvariant() ?? string.Empty);
+
+        // Verify the LLM received just textResultForLlm, not stringified JSON
+        var traffic = await Ctx.GetExchangesAsync();
+        var lastConversation = traffic[^1];
+
+        var toolResults = lastConversation.Request.Messages
+            .Where(m => m.Role == "tool")
+            .ToList();
+
+        Assert.Single(toolResults);
+        Assert.DoesNotContain("toolTelemetry", toolResults[0].Content);
+        Assert.DoesNotContain("resultType", toolResults[0].Content);
+
+        [Description("Analyzes code for issues")]
+        static ToolResultAIContent AnalyzeCode([Description("File to analyze")] string file)
+            => new(new()
+            {
+                TextResultForLlm = $"Analysis of {file}: no issues found",
+                ResultType = "success",
+                ToolTelemetry = new Dictionary<string, object>
+                {
+                    ["metrics"] = new Dictionary<string, object> { ["analysisTimeMs"] = 150 },
+                    ["properties"] = new Dictionary<string, object> { ["analyzer"] = "eslint" },
+                },
+            });
+    }
+}
diff --git a/go/internal/e2e/tool_results_test.go b/go/internal/e2e/tool_results_test.go
@@ -0,0 +1,171 @@
+package e2e
+
+import (
+	"strings"
+	"testing"
+
+	copilot "github.com/github/copilot-sdk/go"
+	"github.com/github/copilot-sdk/go/internal/e2e/testharness"
+)
+
+func TestToolResults(t *testing.T) {
+	ctx := testharness.NewTestContext(t)
+	client := ctx.NewClient()
+	t.Cleanup(func() { client.ForceStop() })
+
+	t.Run("should handle structured toolresultobject from custom tool", func(t *testing.T) {
+		ctx.ConfigureForTest(t)
+
+		type WeatherParams struct {
+			City string `json:"city" jsonschema:"City name"`
+		}
+
+		session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{
+			OnPermissionRequest: copilot.PermissionHandler.ApproveAll,
+			Tools: []copilot.Tool{
+				copilot.DefineTool("get_weather", "Gets weather for a city",
+					func(params WeatherParams, inv copilot.ToolInvocation) (copilot.ToolResult, error) {
+						return copilot.ToolResult{
+							TextResultForLLM: "The weather in " + params.City + " is sunny and 72°F",
+							ResultType:       "success",
+						}, nil
+					}),
+			},
+		})
+		if err != nil {
+			t.Fatalf("Failed to create session: %v", err)
+		}
+
+		_, err = session.Send(t.Context(), copilot.MessageOptions{Prompt: "What's the weather in Paris?"})
+		if err != nil {
+			t.Fatalf("Failed to send message: %v", err)
+		}
+
+		answer, err := testharness.GetFinalAssistantMessage(t.Context(), session)
+		if err != nil {
+			t.Fatalf("Failed to get assistant message: %v", err)
+		}
+
+		content := ""
+		if answer.Data.Content != nil {
+			content = *answer.Data.Content
+		}
+		if !strings.Contains(strings.ToLower(content), "sunny") && !strings.Contains(content, "72") {
+			t.Errorf("Expected answer to mention sunny or 72, got %q", content)
+		}
+	})
+
+	t.Run("should handle tool result with failure resulttype", func(t *testing.T) {
+		ctx.ConfigureForTest(t)
+
+		session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{
+			OnPermissionRequest: copilot.PermissionHandler.ApproveAll,
+			Tools: []copilot.Tool{
+				{
+					Name:        "check_status",
+					Description: "Checks the status of a service",
+					Handler: func(inv copilot.ToolInvocation) (copilot.ToolResult, error) {
+						return copilot.ToolResult{
+							TextResultForLLM: "Service unavailable",
+							ResultType:       "failure",
+							Error:            "API timeout",
+						}, nil
+					},
+				},
+			},
+		})
+		if err != nil {
+			t.Fatalf("Failed to create session: %v", err)
+		}
+
+		_, err = session.Send(t.Context(), copilot.MessageOptions{
+			Prompt: "Check the status of the service using check_status. If it fails, say 'service is down'.",
+		})
+		if err != nil {
+			t.Fatalf("Failed to send message: %v", err)
+		}
+
+		answer, err := testharness.GetFinalAssistantMessage(t.Context(), session)
+		if err != nil {
+			t.Fatalf("Failed to get assistant message: %v", err)
+		}
+
+		content := ""
+		if answer.Data.Content != nil {
+			content = *answer.Data.Content
+		}
+		if !strings.Contains(strings.ToLower(content), "service is down") {
+			t.Errorf("Expected 'service is down', got %q", content)
+		}
+	})
+
+	t.Run("should preserve tooltelemetry and not stringify structured results for llm", func(t *testing.T) {
+		ctx.ConfigureForTest(t)
+
+		type AnalyzeParams struct {
+			File string `json:"file" jsonschema:"File to analyze"`
+		}
+
+		session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{
+			OnPermissionRequest: copilot.PermissionHandler.ApproveAll,
+			Tools: []copilot.Tool{
+				copilot.DefineTool("analyze_code", "Analyzes code for issues",
+					func(params AnalyzeParams, inv copilot.ToolInvocation) (copilot.ToolResult, error) {
+						return copilot.ToolResult{
+							TextResultForLLM: "Analysis of " + params.File + ": no issues found",
+							ResultType:       "success",
+							ToolTelemetry: map[string]any{
+								"metrics":    map[string]any{"analysisTimeMs": 150},
+								"properties": map[string]any{"analyzer": "eslint"},
+							},
+						}, nil
+					}),
+			},
+		})
+		if err != nil {
+			t.Fatalf("Failed to create session: %v", err)
+		}
+
+		_, err = session.Send(t.Context(), copilot.MessageOptions{Prompt: "Analyze the file main.ts for issues."})
+		if err != nil {
+			t.Fatalf("Failed to send message: %v", err)
+		}
+
+		answer, err := testharness.GetFinalAssistantMessage(t.Context(), session)
+		if err != nil {
+			t.Fatalf("Failed to get assistant message: %v", err)
+		}
+
+		content := ""
+		if answer.Data.Content != nil {
+			content = *answer.Data.Content
+		}
+		if !strings.Contains(strings.ToLower(content), "no issues") {
+			t.Errorf("Expected 'no issues', got %q", content)
+		}
+
+		// Verify the LLM received just textResultForLlm, not stringified JSON
+		traffic, err := ctx.GetExchanges()
+		if err != nil {
+			t.Fatalf("Failed to get exchanges: %v", err)
+		}
+
+		lastConversation := traffic[len(traffic)-1]
+		var toolResults []testharness.ChatCompletionMessage
+		for _, msg := range lastConversation.Request.Messages {
+			if msg.Role == "tool" {
+				toolResults = append(toolResults, msg)
+			}
+		}
+
+		if len(toolResults) != 1 {
+			t.Fatalf("Expected 1 tool result, got %d", len(toolResults))
+		}
+		if strings.Contains(toolResults[0].Content, "toolTelemetry") {
+			t.Error("Tool result content should not contain 'toolTelemetry'")
+		}
+		if strings.Contains(toolResults[0].Content, "resultType") {
+			t.Error("Tool result content should not contain 'resultType'")
+		}
+	})
+}
diff --git a/python/e2e/test_tool_results.py b/python/e2e/test_tool_results.py
@@ -0,0 +1,91 @@
+"""E2E Tool Results Tests"""
+
+import pytest
+from pydantic import BaseModel, Field
+
+from copilot import define_tool
+from copilot.session import PermissionHandler
+from copilot.tools import ToolInvocation, ToolResult
+
+from .testharness import E2ETestContext, get_final_assistant_message
+
+pytestmark = pytest.mark.asyncio(loop_scope="module")
+
+
+class TestToolResults:
+    async def test_should_handle_structured_toolresultobject_from_custom_tool(
+        self, ctx: E2ETestContext
+    ):
+        class WeatherParams(BaseModel):
+            city: str = Field(description="City name")
+
+        @define_tool("get_weather", description="Gets weather for a city")
+        def get_weather(params: WeatherParams, invocation: ToolInvocation) -> ToolResult:
+            return ToolResult(
+                text_result_for_llm=f"The weather in {params.city} is sunny and 72°F",
+                result_type="success",
+            )
+
+        session = await ctx.client.create_session(
+            on_permission_request=PermissionHandler.approve_all, tools=[get_weather]
+        )
+
+        await session.send("What's the weather in Paris?")
+        assistant_message = await get_final_assistant_message(session)
+        assert "sunny" in assistant_message.data.content.lower() or "72" in assistant_message.data.content
+
+    async def test_should_handle_tool_result_with_failure_resulttype(
+        self, ctx: E2ETestContext
+    ):
+        @define_tool("check_status", description="Checks the status of a service")
+        def check_status(invocation: ToolInvocation) -> ToolResult:
+            return ToolResult(
+                text_result_for_llm="Service unavailable",
+                result_type="failure",
+                error="API timeout",
+            )
+
+        session = await ctx.client.create_session(
+            on_permission_request=PermissionHandler.approve_all, tools=[check_status]
+        )
+
+        answer = await session.send_and_wait(
+            "Check the status of the service using check_status. If it fails, say 'service is down'."
+        )
+        assert answer is not None
+        assert "service is down" in answer.data.content.lower()
+
+    async def test_should_preserve_tooltelemetry_and_not_stringify_structured_results_for_llm(
+        self, ctx: E2ETestContext
+    ):
+        class AnalyzeParams(BaseModel):
+            file: str = Field(description="File to analyze")
+
+        @define_tool("analyze_code", description="Analyzes code for issues")
+        def analyze_code(params: AnalyzeParams, invocation: ToolInvocation) -> ToolResult:
+            return ToolResult(
+                text_result_for_llm=f"Analysis of {params.file}: no issues found",
+                result_type="success",
+                tool_telemetry={
+                    "metrics": {"analysisTimeMs": 150},
+                    "properties": {"analyzer": "eslint"},
+                },
+            )
+
+        session = await ctx.client.create_session(
+            on_permission_request=PermissionHandler.approve_all, tools=[analyze_code]
+        )
+
+        await session.send("Analyze the file main.ts for issues.")
+        assistant_message = await get_final_assistant_message(session)
+        assert "no issues" in assistant_message.data.content.lower()
+
+        # Verify the LLM received just textResultForLlm, not stringified JSON
+        traffic = await ctx.get_exchanges()
+        last_conversation = traffic[-1]
+        tool_results = [
+            m for m in last_conversation["request"]["messages"] if m["role"] == "tool"
+        ]
+        assert len(tool_results) == 1
+        assert "toolTelemetry" not in tool_results[0]["content"]
+        assert "resultType" not in tool_results[0]["content"]