Skip to content

Commit 100aeb7

Browse files
MorabbinCopilot
andcommitted
Add tool_results e2e tests for Python, Go, and .NET
Mirror the Node e2e tests from tool_results.test.ts in all three remaining SDK languages, reusing the shared YAML snapshots. Each suite covers: - structured ToolResultObject with success resultType - failure resultType - toolTelemetry preservation (verifies LLM content has no stringified JSON and no toolTelemetry/resultType leakage) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 6677e8d commit 100aeb7

3 files changed

Lines changed: 375 additions & 0 deletions

File tree

dotnet/test/ToolResultsTests.cs

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/*---------------------------------------------------------------------------------------------
2+
* Copyright (c) Microsoft Corporation. All rights reserved.
3+
*--------------------------------------------------------------------------------------------*/
4+
5+
using GitHub.Copilot.SDK.Test.Harness;
6+
using Microsoft.Extensions.AI;
7+
using System.ComponentModel;
8+
using Xunit;
9+
using Xunit.Abstractions;
10+
11+
namespace GitHub.Copilot.SDK.Test;
12+
13+
public class ToolResultsTests(E2ETestFixture fixture, ITestOutputHelper output) : E2ETestBase(fixture, "tool_results", output)
14+
{
15+
[Fact]
16+
public async Task Should_Handle_Structured_ToolResultObject_From_Custom_Tool()
17+
{
18+
var session = await CreateSessionAsync(new SessionConfig
19+
{
20+
Tools = [AIFunctionFactory.Create(GetWeather, "get_weather")],
21+
OnPermissionRequest = PermissionHandler.ApproveAll,
22+
});
23+
24+
await session.SendAsync(new MessageOptions
25+
{
26+
Prompt = "What's the weather in Paris?"
27+
});
28+
29+
var assistantMessage = await TestHelper.GetFinalAssistantMessageAsync(session);
30+
Assert.NotNull(assistantMessage);
31+
Assert.Matches("(?i)sunny|72", assistantMessage!.Data.Content ?? string.Empty);
32+
33+
[Description("Gets weather for a city")]
34+
static ToolResultAIContent GetWeather([Description("City name")] string city)
35+
=> new(new()
36+
{
37+
TextResultForLlm = $"The weather in {city} is sunny and 72°F",
38+
ResultType = "success",
39+
});
40+
}
41+
42+
[Fact]
43+
public async Task Should_Handle_Tool_Result_With_Failure_ResultType()
44+
{
45+
var session = await CreateSessionAsync(new SessionConfig
46+
{
47+
Tools = [AIFunctionFactory.Create(CheckStatus, "check_status")],
48+
OnPermissionRequest = PermissionHandler.ApproveAll,
49+
});
50+
51+
await session.SendAsync(new MessageOptions
52+
{
53+
Prompt = "Check the status of the service using check_status. If it fails, say 'service is down'."
54+
});
55+
56+
var assistantMessage = await TestHelper.GetFinalAssistantMessageAsync(session);
57+
Assert.NotNull(assistantMessage);
58+
Assert.Contains("service is down", assistantMessage!.Data.Content?.ToLowerInvariant() ?? string.Empty);
59+
60+
[Description("Checks the status of a service")]
61+
static ToolResultAIContent CheckStatus()
62+
=> new(new()
63+
{
64+
TextResultForLlm = "Service unavailable",
65+
ResultType = "failure",
66+
Error = "API timeout",
67+
});
68+
}
69+
70+
[Fact]
71+
public async Task Should_Preserve_ToolTelemetry_And_Not_Stringify_Structured_Results_For_LLM()
72+
{
73+
var session = await CreateSessionAsync(new SessionConfig
74+
{
75+
Tools = [AIFunctionFactory.Create(AnalyzeCode, "analyze_code")],
76+
OnPermissionRequest = PermissionHandler.ApproveAll,
77+
});
78+
79+
await session.SendAsync(new MessageOptions
80+
{
81+
Prompt = "Analyze the file main.ts for issues."
82+
});
83+
84+
var assistantMessage = await TestHelper.GetFinalAssistantMessageAsync(session);
85+
Assert.NotNull(assistantMessage);
86+
Assert.Contains("no issues", assistantMessage!.Data.Content?.ToLowerInvariant() ?? string.Empty);
87+
88+
// Verify the LLM received just textResultForLlm, not stringified JSON
89+
var traffic = await Ctx.GetExchangesAsync();
90+
var lastConversation = traffic[^1];
91+
92+
var toolResults = lastConversation.Request.Messages
93+
.Where(m => m.Role == "tool")
94+
.ToList();
95+
96+
Assert.Single(toolResults);
97+
Assert.DoesNotContain("toolTelemetry", toolResults[0].Content);
98+
Assert.DoesNotContain("resultType", toolResults[0].Content);
99+
100+
[Description("Analyzes code for issues")]
101+
static ToolResultAIContent AnalyzeCode([Description("File to analyze")] string file)
102+
=> new(new()
103+
{
104+
TextResultForLlm = $"Analysis of {file}: no issues found",
105+
ResultType = "success",
106+
ToolTelemetry = new Dictionary<string, object>
107+
{
108+
["metrics"] = new Dictionary<string, object> { ["analysisTimeMs"] = 150 },
109+
["properties"] = new Dictionary<string, object> { ["analyzer"] = "eslint" },
110+
},
111+
});
112+
}
113+
}
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
package e2e
2+
3+
import (
4+
"strings"
5+
"testing"
6+
7+
copilot "github.com/github/copilot-sdk/go"
8+
"github.com/github/copilot-sdk/go/internal/e2e/testharness"
9+
)
10+
11+
func TestToolResults(t *testing.T) {
12+
ctx := testharness.NewTestContext(t)
13+
client := ctx.NewClient()
14+
t.Cleanup(func() { client.ForceStop() })
15+
16+
t.Run("should handle structured toolresultobject from custom tool", func(t *testing.T) {
17+
ctx.ConfigureForTest(t)
18+
19+
type WeatherParams struct {
20+
City string `json:"city" jsonschema:"City name"`
21+
}
22+
23+
session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{
24+
OnPermissionRequest: copilot.PermissionHandler.ApproveAll,
25+
Tools: []copilot.Tool{
26+
copilot.DefineTool("get_weather", "Gets weather for a city",
27+
func(params WeatherParams, inv copilot.ToolInvocation) (copilot.ToolResult, error) {
28+
return copilot.ToolResult{
29+
TextResultForLLM: "The weather in " + params.City + " is sunny and 72°F",
30+
ResultType: "success",
31+
}, nil
32+
}),
33+
},
34+
})
35+
if err != nil {
36+
t.Fatalf("Failed to create session: %v", err)
37+
}
38+
39+
_, err = session.Send(t.Context(), copilot.MessageOptions{Prompt: "What's the weather in Paris?"})
40+
if err != nil {
41+
t.Fatalf("Failed to send message: %v", err)
42+
}
43+
44+
answer, err := testharness.GetFinalAssistantMessage(t.Context(), session)
45+
if err != nil {
46+
t.Fatalf("Failed to get assistant message: %v", err)
47+
}
48+
49+
content := ""
50+
if answer.Data.Content != nil {
51+
content = *answer.Data.Content
52+
}
53+
if !strings.Contains(strings.ToLower(content), "sunny") && !strings.Contains(content, "72") {
54+
t.Errorf("Expected answer to mention sunny or 72, got %q", content)
55+
}
56+
})
57+
58+
t.Run("should handle tool result with failure resulttype", func(t *testing.T) {
59+
ctx.ConfigureForTest(t)
60+
61+
session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{
62+
OnPermissionRequest: copilot.PermissionHandler.ApproveAll,
63+
Tools: []copilot.Tool{
64+
{
65+
Name: "check_status",
66+
Description: "Checks the status of a service",
67+
Handler: func(inv copilot.ToolInvocation) (copilot.ToolResult, error) {
68+
return copilot.ToolResult{
69+
TextResultForLLM: "Service unavailable",
70+
ResultType: "failure",
71+
Error: "API timeout",
72+
}, nil
73+
},
74+
},
75+
},
76+
})
77+
if err != nil {
78+
t.Fatalf("Failed to create session: %v", err)
79+
}
80+
81+
_, err = session.Send(t.Context(), copilot.MessageOptions{
82+
Prompt: "Check the status of the service using check_status. If it fails, say 'service is down'.",
83+
})
84+
if err != nil {
85+
t.Fatalf("Failed to send message: %v", err)
86+
}
87+
88+
answer, err := testharness.GetFinalAssistantMessage(t.Context(), session)
89+
if err != nil {
90+
t.Fatalf("Failed to get assistant message: %v", err)
91+
}
92+
93+
content := ""
94+
if answer.Data.Content != nil {
95+
content = *answer.Data.Content
96+
}
97+
if !strings.Contains(strings.ToLower(content), "service is down") {
98+
t.Errorf("Expected 'service is down', got %q", content)
99+
}
100+
})
101+
102+
t.Run("should preserve tooltelemetry and not stringify structured results for llm", func(t *testing.T) {
103+
ctx.ConfigureForTest(t)
104+
105+
type AnalyzeParams struct {
106+
File string `json:"file" jsonschema:"File to analyze"`
107+
}
108+
109+
session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{
110+
OnPermissionRequest: copilot.PermissionHandler.ApproveAll,
111+
Tools: []copilot.Tool{
112+
copilot.DefineTool("analyze_code", "Analyzes code for issues",
113+
func(params AnalyzeParams, inv copilot.ToolInvocation) (copilot.ToolResult, error) {
114+
return copilot.ToolResult{
115+
TextResultForLLM: "Analysis of " + params.File + ": no issues found",
116+
ResultType: "success",
117+
ToolTelemetry: map[string]any{
118+
"metrics": map[string]any{"analysisTimeMs": 150},
119+
"properties": map[string]any{"analyzer": "eslint"},
120+
},
121+
}, nil
122+
}),
123+
},
124+
})
125+
if err != nil {
126+
t.Fatalf("Failed to create session: %v", err)
127+
}
128+
129+
_, err = session.Send(t.Context(), copilot.MessageOptions{Prompt: "Analyze the file main.ts for issues."})
130+
if err != nil {
131+
t.Fatalf("Failed to send message: %v", err)
132+
}
133+
134+
answer, err := testharness.GetFinalAssistantMessage(t.Context(), session)
135+
if err != nil {
136+
t.Fatalf("Failed to get assistant message: %v", err)
137+
}
138+
139+
content := ""
140+
if answer.Data.Content != nil {
141+
content = *answer.Data.Content
142+
}
143+
if !strings.Contains(strings.ToLower(content), "no issues") {
144+
t.Errorf("Expected 'no issues', got %q", content)
145+
}
146+
147+
// Verify the LLM received just textResultForLlm, not stringified JSON
148+
traffic, err := ctx.GetExchanges()
149+
if err != nil {
150+
t.Fatalf("Failed to get exchanges: %v", err)
151+
}
152+
153+
lastConversation := traffic[len(traffic)-1]
154+
var toolResults []testharness.ChatCompletionMessage
155+
for _, msg := range lastConversation.Request.Messages {
156+
if msg.Role == "tool" {
157+
toolResults = append(toolResults, msg)
158+
}
159+
}
160+
161+
if len(toolResults) != 1 {
162+
t.Fatalf("Expected 1 tool result, got %d", len(toolResults))
163+
}
164+
if strings.Contains(toolResults[0].Content, "toolTelemetry") {
165+
t.Error("Tool result content should not contain 'toolTelemetry'")
166+
}
167+
if strings.Contains(toolResults[0].Content, "resultType") {
168+
t.Error("Tool result content should not contain 'resultType'")
169+
}
170+
})
171+
}

python/e2e/test_tool_results.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
"""E2E Tool Results Tests"""
2+
3+
import pytest
4+
from pydantic import BaseModel, Field
5+
6+
from copilot import define_tool
7+
from copilot.session import PermissionHandler
8+
from copilot.tools import ToolInvocation, ToolResult
9+
10+
from .testharness import E2ETestContext, get_final_assistant_message
11+
12+
pytestmark = pytest.mark.asyncio(loop_scope="module")
13+
14+
15+
class TestToolResults:
16+
async def test_should_handle_structured_toolresultobject_from_custom_tool(
17+
self, ctx: E2ETestContext
18+
):
19+
class WeatherParams(BaseModel):
20+
city: str = Field(description="City name")
21+
22+
@define_tool("get_weather", description="Gets weather for a city")
23+
def get_weather(params: WeatherParams, invocation: ToolInvocation) -> ToolResult:
24+
return ToolResult(
25+
text_result_for_llm=f"The weather in {params.city} is sunny and 72°F",
26+
result_type="success",
27+
)
28+
29+
session = await ctx.client.create_session(
30+
on_permission_request=PermissionHandler.approve_all, tools=[get_weather]
31+
)
32+
33+
await session.send("What's the weather in Paris?")
34+
assistant_message = await get_final_assistant_message(session)
35+
assert "sunny" in assistant_message.data.content.lower() or "72" in assistant_message.data.content
36+
37+
async def test_should_handle_tool_result_with_failure_resulttype(
38+
self, ctx: E2ETestContext
39+
):
40+
@define_tool("check_status", description="Checks the status of a service")
41+
def check_status(invocation: ToolInvocation) -> ToolResult:
42+
return ToolResult(
43+
text_result_for_llm="Service unavailable",
44+
result_type="failure",
45+
error="API timeout",
46+
)
47+
48+
session = await ctx.client.create_session(
49+
on_permission_request=PermissionHandler.approve_all, tools=[check_status]
50+
)
51+
52+
answer = await session.send_and_wait(
53+
"Check the status of the service using check_status. If it fails, say 'service is down'."
54+
)
55+
assert answer is not None
56+
assert "service is down" in answer.data.content.lower()
57+
58+
async def test_should_preserve_tooltelemetry_and_not_stringify_structured_results_for_llm(
59+
self, ctx: E2ETestContext
60+
):
61+
class AnalyzeParams(BaseModel):
62+
file: str = Field(description="File to analyze")
63+
64+
@define_tool("analyze_code", description="Analyzes code for issues")
65+
def analyze_code(params: AnalyzeParams, invocation: ToolInvocation) -> ToolResult:
66+
return ToolResult(
67+
text_result_for_llm=f"Analysis of {params.file}: no issues found",
68+
result_type="success",
69+
tool_telemetry={
70+
"metrics": {"analysisTimeMs": 150},
71+
"properties": {"analyzer": "eslint"},
72+
},
73+
)
74+
75+
session = await ctx.client.create_session(
76+
on_permission_request=PermissionHandler.approve_all, tools=[analyze_code]
77+
)
78+
79+
await session.send("Analyze the file main.ts for issues.")
80+
assistant_message = await get_final_assistant_message(session)
81+
assert "no issues" in assistant_message.data.content.lower()
82+
83+
# Verify the LLM received just textResultForLlm, not stringified JSON
84+
traffic = await ctx.get_exchanges()
85+
last_conversation = traffic[-1]
86+
tool_results = [
87+
m for m in last_conversation["request"]["messages"] if m["role"] == "tool"
88+
]
89+
assert len(tool_results) == 1
90+
assert "toolTelemetry" not in tool_results[0]["content"]
91+
assert "resultType" not in tool_results[0]["content"]

0 commit comments

Comments
 (0)