diff --git a/dotnet/src/Client.cs b/dotnet/src/Client.cs index ada241baa..07502ee2d 100644 --- a/dotnet/src/Client.cs +++ b/dotnet/src/Client.cs @@ -506,7 +506,8 @@ public async Task CreateSessionAsync(SessionConfig config, Cance Commands: config.Commands?.Select(c => new CommandWireDefinition(c.Name, c.Description)).ToList(), RequestElicitation: config.OnElicitationRequest != null, Traceparent: traceparent, - Tracestate: tracestate); + Tracestate: tracestate, + ModelCapabilities: config.ModelCapabilities); var response = await InvokeRpcAsync( connection.Rpc, "session.create", [request], cancellationToken); @@ -626,7 +627,8 @@ public async Task ResumeSessionAsync(string sessionId, ResumeSes Commands: config.Commands?.Select(c => new CommandWireDefinition(c.Name, c.Description)).ToList(), RequestElicitation: config.OnElicitationRequest != null, Traceparent: traceparent, - Tracestate: tracestate); + Tracestate: tracestate, + ModelCapabilities: config.ModelCapabilities); var response = await InvokeRpcAsync( connection.Rpc, "session.resume", [request], cancellationToken); @@ -1605,7 +1607,8 @@ internal record CreateSessionRequest( List? Commands = null, bool? RequestElicitation = null, string? Traceparent = null, - string? Tracestate = null); + string? Tracestate = null, + ModelCapabilitiesOverride? ModelCapabilities = null); internal record ToolDefinition( string Name, @@ -1656,7 +1659,8 @@ internal record ResumeSessionRequest( List? Commands = null, bool? RequestElicitation = null, string? Traceparent = null, - string? Tracestate = null); + string? Tracestate = null, + ModelCapabilitiesOverride? ModelCapabilities = null); internal record ResumeSessionResponse( string SessionId, @@ -1797,6 +1801,7 @@ private static LogLevel MapLevel(TraceEventType eventType) [JsonSerializable(typeof(ListSessionsResponse))] [JsonSerializable(typeof(GetSessionMetadataRequest))] [JsonSerializable(typeof(GetSessionMetadataResponse))] + [JsonSerializable(typeof(ModelCapabilitiesOverride))] [JsonSerializable(typeof(PermissionRequestResult))] [JsonSerializable(typeof(PermissionRequestResponseV2))] [JsonSerializable(typeof(ProviderConfig))] diff --git a/dotnet/src/Generated/Rpc.cs b/dotnet/src/Generated/Rpc.cs index 3c1035e20..9907641b5 100644 --- a/dotnet/src/Generated/Rpc.cs +++ b/dotnet/src/Generated/Rpc.cs @@ -55,6 +55,22 @@ public class ModelCapabilitiesSupports public bool? ReasoningEffort { get; set; } } +/// Vision-specific limits. +public class ModelCapabilitiesLimitsVision +{ + /// MIME types the model accepts. + [JsonPropertyName("supported_media_types")] + public List SupportedMediaTypes { get => field ??= []; set; } + + /// Maximum number of images per prompt. + [JsonPropertyName("max_prompt_images")] + public double MaxPromptImages { get; set; } + + /// Maximum image size in bytes. + [JsonPropertyName("max_prompt_image_size")] + public double MaxPromptImageSize { get; set; } +} + /// Token limits for prompts, outputs, and context window. public class ModelCapabilitiesLimits { @@ -69,6 +85,10 @@ public class ModelCapabilitiesLimits /// Maximum total context window size in tokens. [JsonPropertyName("max_context_window_tokens")] public double MaxContextWindowTokens { get; set; } + + /// Vision-specific limits. + [JsonPropertyName("vision")] + public ModelCapabilitiesLimitsVision? Vision { get; set; } } /// Model capabilities and limits. @@ -299,6 +319,66 @@ public class SessionModelSwitchToResult public string? ModelId { get; set; } } +/// Feature flags indicating what the model supports. +public class ModelCapabilitiesOverrideSupports +{ + /// Gets or sets the vision value. + [JsonPropertyName("vision")] + public bool? Vision { get; set; } + + /// Gets or sets the reasoningEffort value. + [JsonPropertyName("reasoningEffort")] + public bool? ReasoningEffort { get; set; } +} + +/// RPC data type for ModelCapabilitiesOverrideLimitsVision operations. +public class ModelCapabilitiesOverrideLimitsVision +{ + /// MIME types the model accepts. + [JsonPropertyName("supported_media_types")] + public List? SupportedMediaTypes { get; set; } + + /// Maximum number of images per prompt. + [JsonPropertyName("max_prompt_images")] + public double? MaxPromptImages { get; set; } + + /// Maximum image size in bytes. + [JsonPropertyName("max_prompt_image_size")] + public double? MaxPromptImageSize { get; set; } +} + +/// Token limits for prompts, outputs, and context window. +public class ModelCapabilitiesOverrideLimits +{ + /// Gets or sets the max_prompt_tokens value. + [JsonPropertyName("max_prompt_tokens")] + public double? MaxPromptTokens { get; set; } + + /// Gets or sets the max_output_tokens value. + [JsonPropertyName("max_output_tokens")] + public double? MaxOutputTokens { get; set; } + + /// Maximum total context window size in tokens. + [JsonPropertyName("max_context_window_tokens")] + public double? MaxContextWindowTokens { get; set; } + + /// Gets or sets the vision value. + [JsonPropertyName("vision")] + public ModelCapabilitiesOverrideLimitsVision? Vision { get; set; } +} + +/// Override individual model capabilities resolved by the runtime. +public class ModelCapabilitiesOverride +{ + /// Feature flags indicating what the model supports. + [JsonPropertyName("supports")] + public ModelCapabilitiesOverrideSupports? Supports { get; set; } + + /// Token limits for prompts, outputs, and context window. + [JsonPropertyName("limits")] + public ModelCapabilitiesOverrideLimits? Limits { get; set; } +} + /// RPC data type for SessionModelSwitchTo operations. internal class SessionModelSwitchToRequest { @@ -313,6 +393,10 @@ internal class SessionModelSwitchToRequest /// Reasoning effort level to use for the model. [JsonPropertyName("reasoningEffort")] public string? ReasoningEffort { get; set; } + + /// Override individual model capabilities resolved by the runtime. + [JsonPropertyName("modelCapabilities")] + public ModelCapabilitiesOverride? ModelCapabilities { get; set; } } /// RPC data type for SessionModeGet operations. @@ -1537,9 +1621,9 @@ public async Task GetCurrentAsync(CancellationToke } /// Calls "session.model.switchTo". - public async Task SwitchToAsync(string modelId, string? reasoningEffort = null, CancellationToken cancellationToken = default) + public async Task SwitchToAsync(string modelId, string? reasoningEffort = null, ModelCapabilitiesOverride? modelCapabilities = null, CancellationToken cancellationToken = default) { - var request = new SessionModelSwitchToRequest { SessionId = _sessionId, ModelId = modelId, ReasoningEffort = reasoningEffort }; + var request = new SessionModelSwitchToRequest { SessionId = _sessionId, ModelId = modelId, ReasoningEffort = reasoningEffort, ModelCapabilities = modelCapabilities }; return await CopilotClient.InvokeRpcAsync(_rpc, "session.model.switchTo", [request], cancellationToken); } } @@ -2003,6 +2087,11 @@ public async Task KillAsync(string processId, SessionShe [JsonSerializable(typeof(ModelBilling))] [JsonSerializable(typeof(ModelCapabilities))] [JsonSerializable(typeof(ModelCapabilitiesLimits))] +[JsonSerializable(typeof(ModelCapabilitiesLimitsVision))] +[JsonSerializable(typeof(ModelCapabilitiesOverride))] +[JsonSerializable(typeof(ModelCapabilitiesOverrideLimits))] +[JsonSerializable(typeof(ModelCapabilitiesOverrideLimitsVision))] +[JsonSerializable(typeof(ModelCapabilitiesOverrideSupports))] [JsonSerializable(typeof(ModelCapabilitiesSupports))] [JsonSerializable(typeof(ModelPolicy))] [JsonSerializable(typeof(ModelsListResult))] diff --git a/dotnet/src/Generated/SessionEvents.cs b/dotnet/src/Generated/SessionEvents.cs index 8b5c0a5f1..d3e764ee8 100644 --- a/dotnet/src/Generated/SessionEvents.cs +++ b/dotnet/src/Generated/SessionEvents.cs @@ -181,7 +181,7 @@ public partial class SessionErrorEvent : SessionEvent public required SessionErrorData Data { get; set; } } -/// Payload indicating the agent is idle; includes any background tasks still in flight. +/// Payload indicating the session is fully idle with no background tasks in flight. /// Represents the session.idle event. public partial class SessionIdleEvent : SessionEvent { @@ -791,7 +791,7 @@ public partial class UserInputRequestedEvent : SessionEvent public required UserInputRequestedData Data { get; set; } } -/// User input request completion notification signaling UI dismissal. +/// User input request completion with the user's response. /// Represents the user_input.completed event. public partial class UserInputCompletedEvent : SessionEvent { @@ -817,7 +817,7 @@ public partial class ElicitationRequestedEvent : SessionEvent public required ElicitationRequestedData Data { get; set; } } -/// Elicitation request completion notification signaling UI dismissal. +/// Elicitation request completion with the user's response. /// Represents the elicitation.completed event. public partial class ElicitationCompletedEvent : SessionEvent { @@ -986,7 +986,7 @@ public partial class ExitPlanModeRequestedEvent : SessionEvent public required ExitPlanModeRequestedData Data { get; set; } } -/// Plan mode exit completion notification signaling UI dismissal. +/// Plan mode exit completion with the user's approval decision and optional feedback. /// Represents the exit_plan_mode.completed event. public partial class ExitPlanModeCompletedEvent : SessionEvent { @@ -1209,14 +1209,9 @@ public partial class SessionErrorData public string? Url { get; set; } } -/// Payload indicating the agent is idle; includes any background tasks still in flight. +/// Payload indicating the session is fully idle with no background tasks in flight. public partial class SessionIdleData { - /// Background tasks still running when the agent became idle. - [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] - [JsonPropertyName("backgroundTasks")] - public SessionIdleDataBackgroundTasks? BackgroundTasks { get; set; } - /// True when the preceding agentic loop was cancelled via abort signal. [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] [JsonPropertyName("aborted")] @@ -2313,12 +2308,22 @@ public partial class UserInputRequestedData public string? ToolCallId { get; set; } } -/// User input request completion notification signaling UI dismissal. +/// User input request completion with the user's response. public partial class UserInputCompletedData { /// Request ID of the resolved user input request; clients should dismiss any UI for this request. [JsonPropertyName("requestId")] public required string RequestId { get; set; } + + /// The user's answer to the input request. + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("answer")] + public string? Answer { get; set; } + + /// Whether the answer was typed as free-form text rather than selected from choices. + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("wasFreeform")] + public bool? WasFreeform { get; set; } } /// Elicitation request; may be form-based (structured input) or URL-based (browser redirect). @@ -2358,12 +2363,22 @@ public partial class ElicitationRequestedData public string? Url { get; set; } } -/// Elicitation request completion notification signaling UI dismissal. +/// Elicitation request completion with the user's response. public partial class ElicitationCompletedData { /// Request ID of the resolved elicitation request; clients should dismiss any UI for this request. [JsonPropertyName("requestId")] public required string RequestId { get; set; } + + /// The user action: "accept" (submitted form), "decline" (explicitly refused), or "cancel" (dismissed). + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("action")] + public ElicitationCompletedDataAction? Action { get; set; } + + /// The submitted form data when action is 'accept'; keys match the requested schema fields. + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("content")] + public Dictionary? Content { get; set; } } /// Sampling request from an MCP server; contains the server name and a requestId for correlation. @@ -2543,12 +2558,32 @@ public partial class ExitPlanModeRequestedData public required string RecommendedAction { get; set; } } -/// Plan mode exit completion notification signaling UI dismissal. +/// Plan mode exit completion with the user's approval decision and optional feedback. public partial class ExitPlanModeCompletedData { /// Request ID of the resolved exit plan mode request; clients should dismiss any UI for this request. [JsonPropertyName("requestId")] public required string RequestId { get; set; } + + /// Whether the plan was approved by the user. + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("approved")] + public bool? Approved { get; set; } + + /// Which action the user selected (e.g. 'autopilot', 'interactive', 'exit_only'). + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("selectedAction")] + public string? SelectedAction { get; set; } + + /// Whether edits should be auto-approved without confirmation. + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("autoApproveEdits")] + public bool? AutoApproveEdits { get; set; } + + /// Free-form feedback from the user if they requested changes to the plan. + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("feedback")] + public string? Feedback { get; set; } } /// Event payload for . @@ -2694,51 +2729,6 @@ public partial class SessionResumeDataContext public string? BaseCommit { get; set; } } -/// A background agent task. -/// Nested data type for SessionIdleDataBackgroundTasksAgentsItem. -public partial class SessionIdleDataBackgroundTasksAgentsItem -{ - /// Unique identifier of the background agent. - [JsonPropertyName("agentId")] - public required string AgentId { get; set; } - - /// Type of the background agent. - [JsonPropertyName("agentType")] - public required string AgentType { get; set; } - - /// Human-readable description of the agent task. - [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] - [JsonPropertyName("description")] - public string? Description { get; set; } -} - -/// A background shell command. -/// Nested data type for SessionIdleDataBackgroundTasksShellsItem. -public partial class SessionIdleDataBackgroundTasksShellsItem -{ - /// Unique identifier of the background shell. - [JsonPropertyName("shellId")] - public required string ShellId { get; set; } - - /// Human-readable description of the shell command. - [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] - [JsonPropertyName("description")] - public string? Description { get; set; } -} - -/// Background tasks still running when the agent became idle. -/// Nested data type for SessionIdleDataBackgroundTasks. -public partial class SessionIdleDataBackgroundTasks -{ - /// Currently running background agents. - [JsonPropertyName("agents")] - public required SessionIdleDataBackgroundTasksAgentsItem[] Agents { get; set; } - - /// Currently running background shell commands. - [JsonPropertyName("shells")] - public required SessionIdleDataBackgroundTasksShellsItem[] Shells { get; set; } -} - /// Repository context for the handed-off session. /// Nested data type for SessionHandoffDataRepository. public partial class SessionHandoffDataRepository @@ -4016,6 +4006,21 @@ public enum ElicitationRequestedDataMode Url, } +/// The user action: "accept" (submitted form), "decline" (explicitly refused), or "cancel" (dismissed). +[JsonConverter(typeof(JsonStringEnumConverter))] +public enum ElicitationCompletedDataAction +{ + /// The accept variant. + [JsonStringEnumMemberName("accept")] + Accept, + /// The decline variant. + [JsonStringEnumMemberName("decline")] + Decline, + /// The cancel variant. + [JsonStringEnumMemberName("cancel")] + Cancel, +} + /// Connection status: connected, failed, needs-auth, pending, disabled, or not_configured. [JsonConverter(typeof(JsonStringEnumConverter))] public enum SessionMcpServersLoadedDataServersItemStatus @@ -4177,9 +4182,6 @@ public enum SessionExtensionsLoadedDataExtensionsItemStatus [JsonSerializable(typeof(SessionHandoffDataRepository))] [JsonSerializable(typeof(SessionHandoffEvent))] [JsonSerializable(typeof(SessionIdleData))] -[JsonSerializable(typeof(SessionIdleDataBackgroundTasks))] -[JsonSerializable(typeof(SessionIdleDataBackgroundTasksAgentsItem))] -[JsonSerializable(typeof(SessionIdleDataBackgroundTasksShellsItem))] [JsonSerializable(typeof(SessionIdleEvent))] [JsonSerializable(typeof(SessionInfoData))] [JsonSerializable(typeof(SessionInfoEvent))] diff --git a/dotnet/src/Session.cs b/dotnet/src/Session.cs index 3468e9b52..09a53efd3 100644 --- a/dotnet/src/Session.cs +++ b/dotnet/src/Session.cs @@ -1105,6 +1105,7 @@ await InvokeRpcAsync( /// /// Model ID to switch to (e.g., "gpt-4.1"). /// Reasoning effort level (e.g., "low", "medium", "high", "xhigh"). + /// Per-property overrides for model capabilities, deep-merged over runtime defaults. /// Optional cancellation token. /// /// @@ -1112,9 +1113,9 @@ await InvokeRpcAsync( /// await session.SetModelAsync("claude-sonnet-4.6", "high"); /// /// - public async Task SetModelAsync(string model, string? reasoningEffort, CancellationToken cancellationToken = default) + public async Task SetModelAsync(string model, string? reasoningEffort, ModelCapabilitiesOverride? modelCapabilities = null, CancellationToken cancellationToken = default) { - await Rpc.Model.SwitchToAsync(model, reasoningEffort, cancellationToken); + await Rpc.Model.SwitchToAsync(model, reasoningEffort, modelCapabilities, cancellationToken); } /// @@ -1122,7 +1123,7 @@ public async Task SetModelAsync(string model, string? reasoningEffort, Cancellat /// public Task SetModelAsync(string model, CancellationToken cancellationToken = default) { - return SetModelAsync(model, reasoningEffort: null, cancellationToken); + return SetModelAsync(model, reasoningEffort: null, modelCapabilities: null, cancellationToken); } /// diff --git a/dotnet/src/Types.cs b/dotnet/src/Types.cs index 80410c27a..265781bac 100644 --- a/dotnet/src/Types.cs +++ b/dotnet/src/Types.cs @@ -1579,6 +1579,7 @@ protected SessionConfig(SessionConfig? other) ? new Dictionary(other.McpServers, other.McpServers.Comparer) : null; Model = other.Model; + ModelCapabilities = other.ModelCapabilities; OnElicitationRequest = other.OnElicitationRequest; OnEvent = other.OnEvent; OnPermissionRequest = other.OnPermissionRequest; @@ -1616,6 +1617,11 @@ protected SessionConfig(SessionConfig? other) /// public string? ReasoningEffort { get; set; } + /// + /// Per-property overrides for model capabilities, deep-merged over runtime defaults. + /// + public ModelCapabilitiesOverride? ModelCapabilities { get; set; } + /// /// Override the default configuration directory location. /// When specified, the session will use this directory for storing config and state. @@ -1780,6 +1786,7 @@ protected ResumeSessionConfig(ResumeSessionConfig? other) ? new Dictionary(other.McpServers, other.McpServers.Comparer) : null; Model = other.Model; + ModelCapabilities = other.ModelCapabilities; OnElicitationRequest = other.OnElicitationRequest; OnEvent = other.OnEvent; OnPermissionRequest = other.OnPermissionRequest; @@ -1837,6 +1844,11 @@ protected ResumeSessionConfig(ResumeSessionConfig? other) /// public string? ReasoningEffort { get; set; } + /// + /// Per-property overrides for model capabilities, deep-merged over runtime defaults. + /// + public ModelCapabilitiesOverride? ModelCapabilities { get; set; } + /// /// Handler for permission requests from the server. /// When provided, the server will call this handler to request permission for operations. @@ -2439,6 +2451,7 @@ public class SystemMessageTransformRpcResponse [JsonSerializable(typeof(MessageOptions))] [JsonSerializable(typeof(ModelBilling))] [JsonSerializable(typeof(ModelCapabilities))] +[JsonSerializable(typeof(ModelCapabilitiesOverride))] [JsonSerializable(typeof(ModelInfo))] [JsonSerializable(typeof(ModelLimits))] [JsonSerializable(typeof(ModelPolicy))] diff --git a/dotnet/test/Harness/CapiProxy.cs b/dotnet/test/Harness/CapiProxy.cs index e6208f251..1c775adb0 100644 --- a/dotnet/test/Harness/CapiProxy.cs +++ b/dotnet/test/Harness/CapiProxy.cs @@ -164,9 +164,16 @@ public record ChatCompletionRequest( public record ChatCompletionMessage( string Role, - string? Content, + JsonElement? Content, [property: JsonPropertyName("tool_call_id")] string? ToolCallId, - [property: JsonPropertyName("tool_calls")] List? ToolCalls); + [property: JsonPropertyName("tool_calls")] List? ToolCalls) +{ + /// + /// Returns Content as a string when the JSON value is a string, or null otherwise. + /// + [JsonIgnore] + public string? StringContent => Content is { ValueKind: JsonValueKind.String } c ? c.GetString() : null; +} public record ChatCompletionToolCall(string Id, string Type, ChatCompletionToolCallFunction Function); diff --git a/dotnet/test/Harness/E2ETestBase.cs b/dotnet/test/Harness/E2ETestBase.cs index e982090cb..d1756ea61 100644 --- a/dotnet/test/Harness/E2ETestBase.cs +++ b/dotnet/test/Harness/E2ETestBase.cs @@ -69,7 +69,7 @@ protected Task ResumeSessionAsync(string sessionId, ResumeSessio protected static string GetSystemMessage(ParsedHttpExchange exchange) { - return exchange.Request.Messages.FirstOrDefault(m => m.Role == "system")?.Content ?? string.Empty; + return exchange.Request.Messages.FirstOrDefault(m => m.Role == "system")?.StringContent ?? string.Empty; } protected static List GetToolNames(ParsedHttpExchange exchange) diff --git a/dotnet/test/SessionConfigTests.cs b/dotnet/test/SessionConfigTests.cs new file mode 100644 index 000000000..140b5e70e --- /dev/null +++ b/dotnet/test/SessionConfigTests.cs @@ -0,0 +1,115 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + *--------------------------------------------------------------------------------------------*/ + +using System.Linq; +using System.Text.Json; +using GitHub.Copilot.SDK.Rpc; +using GitHub.Copilot.SDK.Test.Harness; +using Xunit; +using Xunit.Abstractions; + +namespace GitHub.Copilot.SDK.Test; + +public class SessionConfigTests(E2ETestFixture fixture, ITestOutputHelper output) + : E2ETestBase(fixture, "session_config", output) +{ + private const string ViewImagePrompt = "Use the view tool to look at the file test.png and describe what you see"; + + private static readonly byte[] Png1X1 = Convert.FromBase64String( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="); + + [Fact] + public async Task VisionDisabledThenEnabledViaSetModel() + { + await File.WriteAllBytesAsync(Path.Combine(Ctx.WorkDir, "test.png"), Png1X1); + + var session = await CreateSessionAsync(new SessionConfig + { + Model = "claude-sonnet-4.5", + ModelCapabilities = new ModelCapabilitiesOverride + { + Supports = new ModelCapabilitiesOverrideSupports { Vision = false }, + }, + }); + + // Turn 1: vision off — no image_url expected + await session.SendAndWaitAsync(new MessageOptions { Prompt = ViewImagePrompt }); + var trafficAfterT1 = await Ctx.GetExchangesAsync(); + var t1Messages = trafficAfterT1.SelectMany(e => e.Request.Messages).ToList(); + Assert.False(HasImageUrlContent(t1Messages), "Expected no image_url content when vision is disabled"); + + // Switch vision on + await session.SetModelAsync( + "claude-sonnet-4.5", + reasoningEffort: null, + modelCapabilities: new ModelCapabilitiesOverride + { + Supports = new ModelCapabilitiesOverrideSupports { Vision = true }, + }); + + // Turn 2: vision on — image_url expected + await session.SendAndWaitAsync(new MessageOptions { Prompt = ViewImagePrompt }); + var trafficAfterT2 = await Ctx.GetExchangesAsync(); + var newExchanges = trafficAfterT2.Skip(trafficAfterT1.Count).ToList(); + Assert.NotEmpty(newExchanges); + var t2Messages = newExchanges.SelectMany(e => e.Request.Messages).ToList(); + Assert.True(HasImageUrlContent(t2Messages), "Expected image_url content when vision is enabled"); + + await session.DisposeAsync(); + } + + [Fact] + public async Task VisionEnabledThenDisabledViaSetModel() + { + await File.WriteAllBytesAsync(Path.Combine(Ctx.WorkDir, "test.png"), Png1X1); + + var session = await CreateSessionAsync(new SessionConfig + { + Model = "claude-sonnet-4.5", + ModelCapabilities = new ModelCapabilitiesOverride + { + Supports = new ModelCapabilitiesOverrideSupports { Vision = true }, + }, + }); + + // Turn 1: vision on — image_url expected + await session.SendAndWaitAsync(new MessageOptions { Prompt = ViewImagePrompt }); + var trafficAfterT1 = await Ctx.GetExchangesAsync(); + var t1Messages = trafficAfterT1.SelectMany(e => e.Request.Messages).ToList(); + Assert.True(HasImageUrlContent(t1Messages), "Expected image_url content when vision is enabled"); + + // Switch vision off + await session.SetModelAsync( + "claude-sonnet-4.5", + reasoningEffort: null, + modelCapabilities: new ModelCapabilitiesOverride + { + Supports = new ModelCapabilitiesOverrideSupports { Vision = false }, + }); + + // Turn 2: vision off — no image_url expected in new exchanges + await session.SendAndWaitAsync(new MessageOptions { Prompt = ViewImagePrompt }); + var trafficAfterT2 = await Ctx.GetExchangesAsync(); + var newExchanges = trafficAfterT2.Skip(trafficAfterT1.Count).ToList(); + Assert.NotEmpty(newExchanges); + var t2Messages = newExchanges.SelectMany(e => e.Request.Messages).ToList(); + Assert.False(HasImageUrlContent(t2Messages), "Expected no image_url content when vision is disabled"); + + await session.DisposeAsync(); + } + + /// + /// Checks whether any user message contains an image_url content part. + /// Content can be a string (no images) or a JSON array of content parts. + /// + private static bool HasImageUrlContent(List messages) + { + return messages + .Where(m => m.Role == "user" && m.Content is { ValueKind: JsonValueKind.Array }) + .Any(m => m.Content!.Value.EnumerateArray().Any(part => + part.TryGetProperty("type", out var typeProp) && + typeProp.ValueKind == JsonValueKind.String && + typeProp.GetString() == "image_url")); + } +} diff --git a/dotnet/test/ToolResultsTests.cs b/dotnet/test/ToolResultsTests.cs index 0fc36557c..d04494e38 100644 --- a/dotnet/test/ToolResultsTests.cs +++ b/dotnet/test/ToolResultsTests.cs @@ -102,8 +102,8 @@ await session.SendAsync(new MessageOptions .ToList(); Assert.Single(toolResults); - Assert.DoesNotContain("toolTelemetry", toolResults[0].Content); - Assert.DoesNotContain("resultType", toolResults[0].Content); + Assert.DoesNotContain("toolTelemetry", toolResults[0].StringContent); + Assert.DoesNotContain("resultType", toolResults[0].StringContent); [Description("Analyzes code for issues")] static ToolResultAIContent AnalyzeCode([Description("File to analyze")] string file) diff --git a/dotnet/test/ToolsTests.cs b/dotnet/test/ToolsTests.cs index c2350cbff..ec0ba0936 100644 --- a/dotnet/test/ToolsTests.cs +++ b/dotnet/test/ToolsTests.cs @@ -97,7 +97,7 @@ public async Task Handles_Tool_Calling_Errors() Assert.Single(toolResults); var toolResult = toolResults[0]; Assert.Equal(toolCall.Id, toolResult.ToolCallId); - Assert.DoesNotContain("Melbourne", toolResult.Content); + Assert.DoesNotContain("Melbourne", toolResult.StringContent); // Importantly, we're checking that the assistant does not see the // exception information as if it was the tool's output. diff --git a/go/client.go b/go/client.go index 6f88c768a..731efbe24 100644 --- a/go/client.go +++ b/go/client.go @@ -547,6 +547,7 @@ func (c *Client) CreateSession(ctx context.Context, config *SessionConfig) (*Ses req.AvailableTools = config.AvailableTools req.ExcludedTools = config.ExcludedTools req.Provider = config.Provider + req.ModelCapabilities = config.ModelCapabilities req.WorkingDirectory = config.WorkingDirectory req.MCPServers = config.MCPServers req.EnvValueMode = "direct" @@ -687,6 +688,7 @@ func (c *Client) ResumeSessionWithOptions(ctx context.Context, sessionID string, req.SystemMessage = wireSystemMessage req.Tools = config.Tools req.Provider = config.Provider + req.ModelCapabilities = config.ModelCapabilities req.AvailableTools = config.AvailableTools req.ExcludedTools = config.ExcludedTools if config.Streaming { diff --git a/go/generated_session_events.go b/go/generated_session_events.go index e3b6fa71e..aeed590c4 100644 --- a/go/generated_session_events.go +++ b/go/generated_session_events.go @@ -34,7 +34,7 @@ type SessionEvent struct { // // Error details for timeline display including message and optional diagnostic information // - // Payload indicating the agent is idle; includes any background tasks still in flight + // Payload indicating the session is fully idle with no background tasks in flight // // Session title change payload containing the new display title // @@ -132,12 +132,12 @@ type SessionEvent struct { // // User input request notification with question and optional predefined choices // - // User input request completion notification signaling UI dismissal + // User input request completion with the user's response // // Elicitation request; may be form-based (structured input) or URL-based (browser // redirect) // - // Elicitation request completion notification signaling UI dismissal + // Elicitation request completion with the user's response // // Sampling request from an MCP server; contains the server name and a requestId for // correlation @@ -164,7 +164,7 @@ type SessionEvent struct { // // Plan approval request with plan content and available user actions // - // Plan mode exit completion notification signaling UI dismissal + // Plan mode exit completion with the user's approval decision and optional feedback Data Data `json:"data"` // When true, the event is transient and not persisted to the session event log on disk Ephemeral *bool `json:"ephemeral,omitempty"` @@ -186,7 +186,7 @@ type SessionEvent struct { // // # Error details for timeline display including message and optional diagnostic information // -// Payload indicating the agent is idle; includes any background tasks still in flight +// # Payload indicating the session is fully idle with no background tasks in flight // // # Session title change payload containing the new display title // @@ -284,12 +284,12 @@ type SessionEvent struct { // // # User input request notification with question and optional predefined choices // -// # User input request completion notification signaling UI dismissal +// # User input request completion with the user's response // // Elicitation request; may be form-based (structured input) or URL-based (browser // redirect) // -// # Elicitation request completion notification signaling UI dismissal +// # Elicitation request completion with the user's response // // Sampling request from an MCP server; contains the server name and a requestId for // correlation @@ -316,7 +316,7 @@ type SessionEvent struct { // // # Plan approval request with plan content and available user actions // -// Plan mode exit completion notification signaling UI dismissal +// Plan mode exit completion with the user's approval decision and optional feedback type Data struct { // Whether the session was already in use by another client at start time // @@ -387,8 +387,6 @@ type Data struct { URL *string `json:"url,omitempty"` // True when the preceding agentic loop was cancelled via abort signal Aborted *bool `json:"aborted,omitempty"` - // Background tasks still running when the agent became idle - BackgroundTasks *BackgroundTasks `json:"backgroundTasks,omitempty"` // The new display title for the session Title *string `json:"title,omitempty"` // Category of informational message (e.g., "notification", "timing", "context_window", @@ -611,7 +609,9 @@ type Data struct { // The system or developer prompt text // // The notification text, typically wrapped in XML tags - Content *string `json:"content,omitempty"` + // + // The submitted form data when action is 'accept'; keys match the requested schema fields + Content *DataContent `json:"content"` // CAPI interaction ID for correlating this user message with its turn // // CAPI interaction ID for correlating this turn with upstream telemetry @@ -810,6 +810,10 @@ type Data struct { Choices []string `json:"choices,omitempty"` // The question or prompt to present to the user Question *string `json:"question,omitempty"` + // The user's answer to the input request + Answer *string `json:"answer,omitempty"` + // Whether the answer was typed as free-form text rather than selected from choices + WasFreeform *bool `json:"wasFreeform,omitempty"` // The source that initiated the request (MCP server name, or absent for agent-initiated) ElicitationSource *string `json:"elicitationSource,omitempty"` // Elicitation mode; "form" for structured input, "url" for browser-based. Defaults to @@ -817,6 +821,9 @@ type Data struct { Mode *Mode `json:"mode,omitempty"` // JSON Schema describing the form fields to present to the user (form mode only) RequestedSchema *RequestedSchema `json:"requestedSchema,omitempty"` + // The user action: "accept" (submitted form), "decline" (explicitly refused), or "cancel" + // (dismissed) + Action *Action `json:"action,omitempty"` // The JSON-RPC request ID from the MCP protocol MCPRequestID *MCPRequestID `json:"mcpRequestId"` // Name of the MCP server that initiated the sampling request @@ -851,10 +858,18 @@ type Data struct { PlanContent *string `json:"planContent,omitempty"` // The recommended action for the user to take RecommendedAction *string `json:"recommendedAction,omitempty"` + // Whether the plan was approved by the user + Approved *bool `json:"approved,omitempty"` + // Whether edits should be auto-approved without confirmation + AutoApproveEdits *bool `json:"autoApproveEdits,omitempty"` + // Free-form feedback from the user if they requested changes to the plan + Feedback *string `json:"feedback,omitempty"` + // Which action the user selected (e.g. 'autopilot', 'interactive', 'exit_only') + SelectedAction *string `json:"selectedAction,omitempty"` // Array of resolved skill metadata Skills []Skill `json:"skills,omitempty"` // Array of loaded custom agent metadata - Agents []DataAgent `json:"agents,omitempty"` + Agents []Agent `json:"agents,omitempty"` // Fatal errors from agent loading Errors []string `json:"errors,omitempty"` // Non-fatal warnings from agent loading @@ -867,7 +882,7 @@ type Data struct { Extensions []Extension `json:"extensions,omitempty"` } -type DataAgent struct { +type Agent struct { // Description of what the agent does Description string `json:"description"` // Human-readable display name @@ -964,32 +979,6 @@ type Start struct { Line float64 `json:"line"` } -// Background tasks still running when the agent became idle -type BackgroundTasks struct { - // Currently running background agents - Agents []BackgroundTasksAgent `json:"agents"` - // Currently running background shell commands - Shells []Shell `json:"shells"` -} - -// A background agent task -type BackgroundTasksAgent struct { - // Unique identifier of the background agent - AgentID string `json:"agentId"` - // Type of the background agent - AgentType string `json:"agentType"` - // Human-readable description of the agent task - Description *string `json:"description,omitempty"` -} - -// A background shell command -type Shell struct { - // Human-readable description of the shell command - Description *string `json:"description,omitempty"` - // Unique identifier of the background shell - ShellID string `json:"shellId"` -} - // Aggregate code change metrics for the session type CodeChanges struct { // List of file paths that were modified during the session @@ -1281,7 +1270,7 @@ type Result struct { Content *string `json:"content,omitempty"` // Structured content blocks (text, images, audio, resources) returned by the tool in their // native format - Contents []Content `json:"contents,omitempty"` + Contents []ContentElement `json:"contents,omitempty"` // Full detailed tool result for UI/timeline display, preserving complete content such as // diffs. Falls back to content when absent. DetailedContent *string `json:"detailedContent,omitempty"` @@ -1303,7 +1292,7 @@ type Result struct { // # Resource link content block referencing an external resource // // Embedded resource content block with inline text or binary data -type Content struct { +type ContentElement struct { // The text content // // Terminal/shell output text @@ -1425,6 +1414,16 @@ type UI struct { Elicitation *bool `json:"elicitation,omitempty"` } +// The user action: "accept" (submitted form), "decline" (explicitly refused), or "cancel" +// (dismissed) +type Action string + +const ( + ActionAccept Action = "accept" + ActionCancel Action = "cancel" + ActionDecline Action = "decline" +) + // The agent mode that was active when this message was sent type AgentMode string @@ -1693,6 +1692,48 @@ const ( SessionEventTypeUserMessage SessionEventType = "user.message" ) +type DataContent struct { + String *string + UnionMap map[string]*ContentValue +} + +func (x *DataContent) UnmarshalJSON(data []byte) error { + x.UnionMap = nil + object, err := unmarshalUnion(data, nil, nil, nil, &x.String, false, nil, false, nil, true, &x.UnionMap, false, nil, false) + if err != nil { + return err + } + if object { + } + return nil +} + +func (x *DataContent) MarshalJSON() ([]byte, error) { + return marshalUnion(nil, nil, nil, x.String, false, nil, false, nil, x.UnionMap != nil, x.UnionMap, false, nil, false) +} + +type ContentValue struct { + Bool *bool + Double *float64 + String *string + StringArray []string +} + +func (x *ContentValue) UnmarshalJSON(data []byte) error { + x.StringArray = nil + object, err := unmarshalUnion(data, nil, &x.Double, &x.Bool, &x.String, true, &x.StringArray, false, nil, false, nil, false, nil, false) + if err != nil { + return err + } + if object { + } + return nil +} + +func (x *ContentValue) MarshalJSON() ([]byte, error) { + return marshalUnion(nil, x.Double, x.Bool, x.String, x.StringArray != nil, x.StringArray, false, nil, false, nil, false, nil, false) +} + type ContextUnion struct { ContextClass *ContextClass String *string diff --git a/go/internal/e2e/compaction_test.go b/go/internal/e2e/compaction_test.go index 888ab2aa9..4d71b35e4 100644 --- a/go/internal/e2e/compaction_test.go +++ b/go/internal/e2e/compaction_test.go @@ -84,7 +84,7 @@ func TestCompaction(t *testing.T) { if err != nil { t.Fatalf("Failed to send verification message: %v", err) } - if answer.Data.Content == nil || !strings.Contains(strings.ToLower(*answer.Data.Content), "dragon") { + if answer.Data.Content == nil || !strings.Contains(strings.ToLower(*answer.Data.Content.String), "dragon") { t.Errorf("Expected answer to contain 'dragon', got %v", answer.Data.Content) } }) diff --git a/go/internal/e2e/mcp_and_agents_test.go b/go/internal/e2e/mcp_and_agents_test.go index 079d26e9f..c5b6c666b 100644 --- a/go/internal/e2e/mcp_and_agents_test.go +++ b/go/internal/e2e/mcp_and_agents_test.go @@ -51,7 +51,7 @@ func TestMCPServers(t *testing.T) { t.Fatalf("Failed to get final message: %v", err) } - if message.Data.Content == nil || !strings.Contains(*message.Data.Content, "4") { + if message.Data.Content == nil || !strings.Contains(*message.Data.Content.String, "4") { t.Errorf("Expected message to contain '4', got: %v", message.Data.Content) } @@ -100,7 +100,7 @@ func TestMCPServers(t *testing.T) { t.Fatalf("Failed to send message: %v", err) } - if message.Data.Content == nil || !strings.Contains(*message.Data.Content, "6") { + if message.Data.Content == nil || !strings.Contains(*message.Data.Content.String, "6") { t.Errorf("Expected message to contain '6', got: %v", message.Data.Content) } @@ -146,7 +146,7 @@ func TestMCPServers(t *testing.T) { t.Fatalf("Failed to send message: %v", err) } - if message.Data.Content == nil || !strings.Contains(*message.Data.Content, "hunter2") { + if message.Data.Content == nil || !strings.Contains(*message.Data.Content.String, "hunter2") { t.Errorf("Expected message to contain 'hunter2', got: %v", message.Data.Content) } @@ -231,7 +231,7 @@ func TestCustomAgents(t *testing.T) { t.Fatalf("Failed to get final message: %v", err) } - if message.Data.Content == nil || !strings.Contains(*message.Data.Content, "10") { + if message.Data.Content == nil || !strings.Contains(*message.Data.Content.String, "10") { t.Errorf("Expected message to contain '10', got: %v", message.Data.Content) } @@ -280,7 +280,7 @@ func TestCustomAgents(t *testing.T) { t.Fatalf("Failed to send message: %v", err) } - if message.Data.Content == nil || !strings.Contains(*message.Data.Content, "12") { + if message.Data.Content == nil || !strings.Contains(*message.Data.Content.String, "12") { t.Errorf("Expected message to contain '12', got: %v", message.Data.Content) } @@ -441,7 +441,7 @@ func TestCombinedConfiguration(t *testing.T) { t.Fatalf("Failed to get final message: %v", err) } - if message.Data.Content == nil || !strings.Contains(*message.Data.Content, "14") { + if message.Data.Content == nil || !strings.Contains(*message.Data.Content.String, "14") { t.Errorf("Expected message to contain '14', got: %v", message.Data.Content) } diff --git a/go/internal/e2e/multi_client_test.go b/go/internal/e2e/multi_client_test.go index 406f118ce..ad0c94b46 100644 --- a/go/internal/e2e/multi_client_test.go +++ b/go/internal/e2e/multi_client_test.go @@ -112,7 +112,7 @@ func TestMultiClient(t *testing.T) { t.Fatalf("Failed to send message: %v", err) } - if response == nil || response.Data.Content == nil || !strings.Contains(*response.Data.Content, "MAGIC_hello_42") { + if response == nil || response.Data.Content == nil || !strings.Contains(*response.Data.Content.String, "MAGIC_hello_42") { t.Errorf("Expected response to contain 'MAGIC_hello_42', got %v", response) } @@ -180,7 +180,7 @@ func TestMultiClient(t *testing.T) { if err != nil { t.Fatalf("Failed to send message: %v", err) } - if response == nil || response.Data.Content == nil || *response.Data.Content == "" { + if response == nil || response.Data.Content == nil || *response.Data.Content.String == "" { t.Errorf("Expected non-empty response") } @@ -371,8 +371,8 @@ func TestMultiClient(t *testing.T) { if response1 == nil || response1.Data.Content == nil { t.Fatalf("Expected response with content") } - if !strings.Contains(*response1.Data.Content, "CITY_FOR_US") { - t.Errorf("Expected response to contain 'CITY_FOR_US', got '%s'", *response1.Data.Content) + if !strings.Contains(*response1.Data.Content.String, "CITY_FOR_US") { + t.Errorf("Expected response to contain 'CITY_FOR_US', got '%s'", *response1.Data.Content.String) } response2, err := session1.SendAndWait(t.Context(), copilot.MessageOptions{ @@ -384,8 +384,8 @@ func TestMultiClient(t *testing.T) { if response2 == nil || response2.Data.Content == nil { t.Fatalf("Expected response with content") } - if !strings.Contains(*response2.Data.Content, "CURRENCY_FOR_US") { - t.Errorf("Expected response to contain 'CURRENCY_FOR_US', got '%s'", *response2.Data.Content) + if !strings.Contains(*response2.Data.Content.String, "CURRENCY_FOR_US") { + t.Errorf("Expected response to contain 'CURRENCY_FOR_US', got '%s'", *response2.Data.Content.String) } session2.Disconnect() @@ -436,8 +436,8 @@ func TestMultiClient(t *testing.T) { if stableResponse == nil || stableResponse.Data.Content == nil { t.Fatalf("Expected response with content") } - if !strings.Contains(*stableResponse.Data.Content, "STABLE_test1") { - t.Errorf("Expected response to contain 'STABLE_test1', got '%s'", *stableResponse.Data.Content) + if !strings.Contains(*stableResponse.Data.Content.String, "STABLE_test1") { + t.Errorf("Expected response to contain 'STABLE_test1', got '%s'", *stableResponse.Data.Content.String) } ephemeralResponse, err := session1.SendAndWait(t.Context(), copilot.MessageOptions{ @@ -449,8 +449,8 @@ func TestMultiClient(t *testing.T) { if ephemeralResponse == nil || ephemeralResponse.Data.Content == nil { t.Fatalf("Expected response with content") } - if !strings.Contains(*ephemeralResponse.Data.Content, "EPHEMERAL_test2") { - t.Errorf("Expected response to contain 'EPHEMERAL_test2', got '%s'", *ephemeralResponse.Data.Content) + if !strings.Contains(*ephemeralResponse.Data.Content.String, "EPHEMERAL_test2") { + t.Errorf("Expected response to contain 'EPHEMERAL_test2', got '%s'", *ephemeralResponse.Data.Content.String) } // Disconnect client 2 without destroying the shared session @@ -474,12 +474,12 @@ func TestMultiClient(t *testing.T) { if afterResponse == nil || afterResponse.Data.Content == nil { t.Fatalf("Expected response with content") } - if !strings.Contains(*afterResponse.Data.Content, "STABLE_still_here") { - t.Errorf("Expected response to contain 'STABLE_still_here', got '%s'", *afterResponse.Data.Content) + if !strings.Contains(*afterResponse.Data.Content.String, "STABLE_still_here") { + t.Errorf("Expected response to contain 'STABLE_still_here', got '%s'", *afterResponse.Data.Content.String) } // ephemeral_tool should NOT have produced a result - if strings.Contains(*afterResponse.Data.Content, "EPHEMERAL_") { - t.Errorf("Expected response NOT to contain 'EPHEMERAL_', got '%s'", *afterResponse.Data.Content) + if strings.Contains(*afterResponse.Data.Content.String, "EPHEMERAL_") { + t.Errorf("Expected response NOT to contain 'EPHEMERAL_', got '%s'", *afterResponse.Data.Content.String) } }) } diff --git a/go/internal/e2e/permissions_test.go b/go/internal/e2e/permissions_test.go index 98f620043..e6e6ebcae 100644 --- a/go/internal/e2e/permissions_test.go +++ b/go/internal/e2e/permissions_test.go @@ -266,7 +266,7 @@ func TestPermissions(t *testing.T) { t.Fatalf("Failed to get final message: %v", err) } - if message.Data.Content == nil || !strings.Contains(*message.Data.Content, "4") { + if message.Data.Content == nil || !strings.Contains(*message.Data.Content.String, "4") { t.Errorf("Expected message to contain '4', got: %v", message.Data.Content) } }) diff --git a/go/internal/e2e/session_config_test.go b/go/internal/e2e/session_config_test.go new file mode 100644 index 000000000..77edeec74 --- /dev/null +++ b/go/internal/e2e/session_config_test.go @@ -0,0 +1,164 @@ +package e2e + +import ( + "encoding/base64" + "encoding/json" + "os" + "path/filepath" + "testing" + + copilot "github.com/github/copilot-sdk/go" + "github.com/github/copilot-sdk/go/internal/e2e/testharness" + "github.com/github/copilot-sdk/go/rpc" +) + +// hasImageURLContent returns true if any user message in the given exchanges +// contains an image_url content part (multimodal vision content). +func hasImageURLContent(exchanges []testharness.ParsedHttpExchange) bool { + for _, ex := range exchanges { + for _, msg := range ex.Request.Messages { + if msg.Role == "user" && len(msg.RawContent) > 0 { + var content []interface{} + if json.Unmarshal(msg.RawContent, &content) == nil { + for _, part := range content { + if m, ok := part.(map[string]interface{}); ok { + if m["type"] == "image_url" { + return true + } + } + } + } + } + } + } + return false +} + +func TestSessionConfig(t *testing.T) { + ctx := testharness.NewTestContext(t) + client := ctx.NewClient() + t.Cleanup(func() { client.ForceStop() }) + + if err := client.Start(t.Context()); err != nil { + t.Fatalf("Failed to start client: %v", err) + } + + // Write 1x1 PNG to the work directory + png1x1, err := base64.StdEncoding.DecodeString("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==") + if err != nil { + t.Fatalf("Failed to decode PNG: %v", err) + } + if err := os.WriteFile(filepath.Join(ctx.WorkDir, "test.png"), png1x1, 0644); err != nil { + t.Fatalf("Failed to write test.png: %v", err) + } + + viewImagePrompt := "Use the view tool to look at the file test.png and describe what you see" + + t.Run("vision disabled then enabled via setModel", func(t *testing.T) { + ctx.ConfigureForTest(t) + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + ModelCapabilities: &rpc.ModelCapabilitiesOverride{ + Supports: &rpc.ModelCapabilitiesOverrideSupports{ + Vision: copilot.Bool(false), + }, + }, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + + // Turn 1: vision off — no image_url expected + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: viewImagePrompt}); err != nil { + t.Fatalf("Failed to send message: %v", err) + } + + trafficAfterT1, err := ctx.GetExchanges() + if err != nil { + t.Fatalf("Failed to get exchanges: %v", err) + } + if hasImageURLContent(trafficAfterT1) { + t.Error("Expected no image_url content parts when vision is disabled") + } + + // Switch vision on + if err := session.SetModel(t.Context(), "claude-sonnet-4.5", &copilot.SetModelOptions{ + ModelCapabilities: &rpc.ModelCapabilitiesOverride{ + Supports: &rpc.ModelCapabilitiesOverrideSupports{ + Vision: copilot.Bool(true), + }, + }, + }); err != nil { + t.Fatalf("SetModel returned error: %v", err) + } + + // Turn 2: vision on — image_url expected in new exchanges + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: viewImagePrompt}); err != nil { + t.Fatalf("Failed to send second message: %v", err) + } + + trafficAfterT2, err := ctx.GetExchanges() + if err != nil { + t.Fatalf("Failed to get exchanges after turn 2: %v", err) + } + newExchanges := trafficAfterT2[len(trafficAfterT1):] + if !hasImageURLContent(newExchanges) { + t.Error("Expected image_url content parts when vision is enabled") + } + }) + + t.Run("vision enabled then disabled via setModel", func(t *testing.T) { + ctx.ConfigureForTest(t) + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + ModelCapabilities: &rpc.ModelCapabilitiesOverride{ + Supports: &rpc.ModelCapabilitiesOverrideSupports{ + Vision: copilot.Bool(true), + }, + }, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + + // Turn 1: vision on — image_url expected + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: viewImagePrompt}); err != nil { + t.Fatalf("Failed to send message: %v", err) + } + + trafficAfterT1, err := ctx.GetExchanges() + if err != nil { + t.Fatalf("Failed to get exchanges: %v", err) + } + if !hasImageURLContent(trafficAfterT1) { + t.Error("Expected image_url content parts when vision is enabled") + } + + // Switch vision off + if err := session.SetModel(t.Context(), "claude-sonnet-4.5", &copilot.SetModelOptions{ + ModelCapabilities: &rpc.ModelCapabilitiesOverride{ + Supports: &rpc.ModelCapabilitiesOverrideSupports{ + Vision: copilot.Bool(false), + }, + }, + }); err != nil { + t.Fatalf("SetModel returned error: %v", err) + } + + // Turn 2: vision off — no image_url expected in new exchanges + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: viewImagePrompt}); err != nil { + t.Fatalf("Failed to send second message: %v", err) + } + + trafficAfterT2, err := ctx.GetExchanges() + if err != nil { + t.Fatalf("Failed to get exchanges after turn 2: %v", err) + } + newExchanges := trafficAfterT2[len(trafficAfterT1):] + if hasImageURLContent(newExchanges) { + t.Error("Expected no image_url content parts when vision is disabled") + } + }) +} diff --git a/go/internal/e2e/session_test.go b/go/internal/e2e/session_test.go index caab5255e..e895d8126 100644 --- a/go/internal/e2e/session_test.go +++ b/go/internal/e2e/session_test.go @@ -70,7 +70,7 @@ func TestSession(t *testing.T) { t.Fatalf("Failed to send message: %v", err) } - if assistantMessage.Data.Content == nil || !strings.Contains(*assistantMessage.Data.Content, "2") { + if assistantMessage.Data.Content == nil || !strings.Contains(*assistantMessage.Data.Content.String, "2") { t.Errorf("Expected assistant message to contain '2', got %v", assistantMessage.Data.Content) } @@ -79,7 +79,7 @@ func TestSession(t *testing.T) { t.Fatalf("Failed to send second message: %v", err) } - if secondMessage.Data.Content == nil || !strings.Contains(*secondMessage.Data.Content, "4") { + if secondMessage.Data.Content == nil || !strings.Contains(*secondMessage.Data.Content.String, "4") { t.Errorf("Expected second message to contain '4', got %v", secondMessage.Data.Content) } }) @@ -106,7 +106,7 @@ func TestSession(t *testing.T) { content := "" if assistantMessage != nil && assistantMessage.Data.Content != nil { - content = *assistantMessage.Data.Content + content = *assistantMessage.Data.Content.String } if !strings.Contains(content, "GitHub") { @@ -160,7 +160,7 @@ func TestSession(t *testing.T) { content := "" if assistantMessage.Data.Content != nil { - content = *assistantMessage.Data.Content + content = *assistantMessage.Data.Content.String } if strings.Contains(content, "GitHub") { @@ -359,7 +359,7 @@ func TestSession(t *testing.T) { content := "" if assistantMessage.Data.Content != nil { - content = *assistantMessage.Data.Content + content = *assistantMessage.Data.Content.String } if !strings.Contains(content, "54321") { @@ -391,7 +391,7 @@ func TestSession(t *testing.T) { t.Fatalf("Failed to get assistant message: %v", err) } - if answer.Data.Content == nil || !strings.Contains(*answer.Data.Content, "2") { + if answer.Data.Content == nil || !strings.Contains(*answer.Data.Content.String, "2") { t.Errorf("Expected answer to contain '2', got %v", answer.Data.Content) } @@ -412,7 +412,7 @@ func TestSession(t *testing.T) { t.Fatalf("Failed to get assistant message from resumed session: %v", err) } - if answer2.Data.Content == nil || !strings.Contains(*answer2.Data.Content, "2") { + if answer2.Data.Content == nil || !strings.Contains(*answer2.Data.Content.String, "2") { t.Errorf("Expected resumed session answer to contain '2', got %v", answer2.Data.Content) } @@ -421,7 +421,7 @@ func TestSession(t *testing.T) { if err != nil { t.Fatalf("Failed to send follow-up message: %v", err) } - if answer3 == nil || answer3.Data.Content == nil || !strings.Contains(*answer3.Data.Content, "4") { + if answer3 == nil || answer3.Data.Content == nil || !strings.Contains(*answer3.Data.Content.String, "4") { t.Errorf("Expected follow-up answer to contain '4', got %v", answer3) } }) @@ -446,7 +446,7 @@ func TestSession(t *testing.T) { t.Fatalf("Failed to get assistant message: %v", err) } - if answer.Data.Content == nil || !strings.Contains(*answer.Data.Content, "2") { + if answer.Data.Content == nil || !strings.Contains(*answer.Data.Content.String, "2") { t.Errorf("Expected answer to contain '2', got %v", answer.Data.Content) } @@ -494,7 +494,7 @@ func TestSession(t *testing.T) { if err != nil { t.Fatalf("Failed to send follow-up message: %v", err) } - if answer3 == nil || answer3.Data.Content == nil || !strings.Contains(*answer3.Data.Content, "4") { + if answer3 == nil || answer3.Data.Content == nil || !strings.Contains(*answer3.Data.Content.String, "4") { t.Errorf("Expected follow-up answer to contain '4', got %v", answer3) } }) @@ -625,7 +625,7 @@ func TestSession(t *testing.T) { t.Fatalf("Failed to send message after abort: %v", err) } - if answer.Data.Content == nil || !strings.Contains(*answer.Data.Content, "4") { + if answer.Data.Content == nil || !strings.Contains(*answer.Data.Content.String, "4") { t.Errorf("Expected answer to contain '4', got %v", answer.Data.Content) } }) @@ -720,7 +720,7 @@ func TestSession(t *testing.T) { if err != nil { t.Fatalf("Failed to get assistant message: %v", err) } - if assistantMessage.Data.Content == nil || !strings.Contains(*assistantMessage.Data.Content, "300") { + if assistantMessage.Data.Content == nil || !strings.Contains(*assistantMessage.Data.Content.String, "300") { t.Errorf("Expected assistant message to contain '300', got %v", assistantMessage.Data.Content) } }) @@ -753,7 +753,7 @@ func TestSession(t *testing.T) { t.Fatalf("Failed to get assistant message: %v", err) } - if assistantMessage.Data.Content == nil || !strings.Contains(*assistantMessage.Data.Content, "2") { + if assistantMessage.Data.Content == nil || !strings.Contains(*assistantMessage.Data.Content.String, "2") { t.Errorf("Expected assistant message to contain '2', got %v", assistantMessage.Data.Content) } }) diff --git a/go/internal/e2e/skills_test.go b/go/internal/e2e/skills_test.go index 524280fd8..c632251e5 100644 --- a/go/internal/e2e/skills_test.go +++ b/go/internal/e2e/skills_test.go @@ -72,7 +72,7 @@ func TestSkills(t *testing.T) { t.Fatalf("Failed to send message: %v", err) } - if message.Data.Content == nil || !strings.Contains(*message.Data.Content, skillMarker) { + if message.Data.Content == nil || !strings.Contains(*message.Data.Content.String, skillMarker) { t.Errorf("Expected message to contain skill marker '%s', got: %v", skillMarker, message.Data.Content) } @@ -101,8 +101,8 @@ func TestSkills(t *testing.T) { t.Fatalf("Failed to send message: %v", err) } - if message.Data.Content != nil && strings.Contains(*message.Data.Content, skillMarker) { - t.Errorf("Expected message to NOT contain skill marker '%s' when disabled, got: %v", skillMarker, *message.Data.Content) + if message.Data.Content != nil && strings.Contains(*message.Data.Content.String, skillMarker) { + t.Errorf("Expected message to NOT contain skill marker '%s' when disabled, got: %v", skillMarker, *message.Data.Content.String) } session.Disconnect() @@ -127,8 +127,8 @@ func TestSkills(t *testing.T) { t.Fatalf("Failed to send message: %v", err) } - if message1.Data.Content != nil && strings.Contains(*message1.Data.Content, skillMarker) { - t.Errorf("Expected message to NOT contain skill marker before skill was added, got: %v", *message1.Data.Content) + if message1.Data.Content != nil && strings.Contains(*message1.Data.Content.String, skillMarker) { + t.Errorf("Expected message to NOT contain skill marker before skill was added, got: %v", *message1.Data.Content.String) } // Resume with skillDirectories - skill should now be active @@ -150,7 +150,7 @@ func TestSkills(t *testing.T) { t.Fatalf("Failed to send message: %v", err) } - if message2.Data.Content == nil || !strings.Contains(*message2.Data.Content, skillMarker) { + if message2.Data.Content == nil || !strings.Contains(*message2.Data.Content.String, skillMarker) { t.Errorf("Expected message to contain skill marker '%s' after resume, got: %v", skillMarker, message2.Data.Content) } diff --git a/go/internal/e2e/streaming_fidelity_test.go b/go/internal/e2e/streaming_fidelity_test.go index ef76c3d8b..c2f50349a 100644 --- a/go/internal/e2e/streaming_fidelity_test.go +++ b/go/internal/e2e/streaming_fidelity_test.go @@ -161,7 +161,7 @@ func TestStreamingFidelity(t *testing.T) { if err != nil { t.Fatalf("Failed to send follow-up message: %v", err) } - if answer == nil || answer.Data.Content == nil || !strings.Contains(*answer.Data.Content, "18") { + if answer == nil || answer.Data.Content == nil || !strings.Contains(*answer.Data.Content.String, "18") { t.Errorf("Expected answer to contain '18', got %v", answer) } diff --git a/go/internal/e2e/testharness/proxy.go b/go/internal/e2e/testharness/proxy.go index 91f8a8e0a..0caf19403 100644 --- a/go/internal/e2e/testharness/proxy.go +++ b/go/internal/e2e/testharness/proxy.go @@ -172,10 +172,35 @@ type ChatCompletionRequest struct { // ChatCompletionMessage represents a message in the chat completion request. type ChatCompletionMessage struct { - Role string `json:"role"` - Content string `json:"content,omitempty"` - ToolCallID string `json:"tool_call_id,omitempty"` - ToolCalls []ToolCall `json:"tool_calls,omitempty"` + Role string `json:"role"` + Content string `json:"content,omitempty"` + RawContent json.RawMessage `json:"-"` + ToolCallID string `json:"tool_call_id,omitempty"` + ToolCalls []ToolCall `json:"tool_calls,omitempty"` +} + +// UnmarshalJSON handles Content being either a plain string or an array of +// content parts (e.g. multimodal messages with image_url entries). +func (m *ChatCompletionMessage) UnmarshalJSON(data []byte) error { + type Alias ChatCompletionMessage + aux := &struct { + Content json.RawMessage `json:"content,omitempty"` + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, aux); err != nil { + return err + } + m.RawContent = aux.Content + m.Content = "" + if len(aux.Content) > 0 { + var s string + if json.Unmarshal(aux.Content, &s) == nil { + m.Content = s + } + } + return nil } // ToolCall represents a tool call in an assistant message. diff --git a/go/internal/e2e/tool_results_test.go b/go/internal/e2e/tool_results_test.go index b35d9b5d0..28e273cbb 100644 --- a/go/internal/e2e/tool_results_test.go +++ b/go/internal/e2e/tool_results_test.go @@ -48,7 +48,7 @@ func TestToolResults(t *testing.T) { content := "" if answer.Data.Content != nil { - content = *answer.Data.Content + content = *answer.Data.Content.String } if !strings.Contains(strings.ToLower(content), "sunny") && !strings.Contains(content, "72") { t.Errorf("Expected answer to mention sunny or 72, got %q", content) @@ -96,7 +96,7 @@ func TestToolResults(t *testing.T) { content := "" if answer.Data.Content != nil { - content = *answer.Data.Content + content = *answer.Data.Content.String } if !strings.Contains(strings.ToLower(content), "service is down") { t.Errorf("Expected 'service is down', got %q", content) @@ -146,7 +146,7 @@ func TestToolResults(t *testing.T) { content := "" if answer.Data.Content != nil { - content = *answer.Data.Content + content = *answer.Data.Content.String } if !strings.Contains(strings.ToLower(content), "no issues") { t.Errorf("Expected 'no issues', got %q", content) diff --git a/go/internal/e2e/tools_test.go b/go/internal/e2e/tools_test.go index c9676363f..005ff5c44 100644 --- a/go/internal/e2e/tools_test.go +++ b/go/internal/e2e/tools_test.go @@ -43,7 +43,7 @@ func TestTools(t *testing.T) { t.Fatalf("Failed to get assistant message: %v", err) } - if answer.Data.Content == nil || !strings.Contains(*answer.Data.Content, "ELIZA") { + if answer.Data.Content == nil || !strings.Contains(*answer.Data.Content.String, "ELIZA") { t.Errorf("Expected answer to contain 'ELIZA', got %v", answer.Data.Content) } }) @@ -78,7 +78,7 @@ func TestTools(t *testing.T) { t.Fatalf("Failed to get assistant message: %v", err) } - if answer.Data.Content == nil || !strings.Contains(*answer.Data.Content, "HELLO") { + if answer.Data.Content == nil || !strings.Contains(*answer.Data.Content.String, "HELLO") { t.Errorf("Expected answer to contain 'HELLO', got %v", answer.Data.Content) } }) @@ -162,10 +162,10 @@ func TestTools(t *testing.T) { } // The assistant should not see the exception information - if answer.Data.Content != nil && strings.Contains(*answer.Data.Content, "Melbourne") { - t.Errorf("Assistant should not see error details 'Melbourne', got '%s'", *answer.Data.Content) + if answer.Data.Content != nil && strings.Contains(*answer.Data.Content.String, "Melbourne") { + t.Errorf("Assistant should not see error details 'Melbourne', got '%s'", *answer.Data.Content.String) } - if answer.Data.Content == nil || !strings.Contains(strings.ToLower(*answer.Data.Content), "unknown") { + if answer.Data.Content == nil || !strings.Contains(strings.ToLower(*answer.Data.Content.String), "unknown") { t.Errorf("Expected answer to contain 'unknown', got %v", answer.Data.Content) } }) @@ -236,7 +236,7 @@ func TestTools(t *testing.T) { t.Fatalf("Expected assistant message with content") } - responseContent := *answer.Data.Content + responseContent := *answer.Data.Content.String if responseContent == "" { t.Errorf("Expected non-empty response") } @@ -301,7 +301,7 @@ func TestTools(t *testing.T) { t.Fatalf("Failed to get assistant message: %v", err) } - if answer.Data.Content == nil || !strings.Contains(*answer.Data.Content, "RESULT: test123") { + if answer.Data.Content == nil || !strings.Contains(*answer.Data.Content.String, "RESULT: test123") { t.Errorf("Expected answer to contain 'RESULT: test123', got %v", answer.Data.Content) } @@ -343,7 +343,7 @@ func TestTools(t *testing.T) { t.Fatalf("Failed to get assistant message: %v", err) } - if answer.Data.Content == nil || !strings.Contains(*answer.Data.Content, "CUSTOM_GREP_RESULT") { + if answer.Data.Content == nil || !strings.Contains(*answer.Data.Content.String, "CUSTOM_GREP_RESULT") { t.Errorf("Expected answer to contain 'CUSTOM_GREP_RESULT', got %v", answer.Data.Content) } }) @@ -386,7 +386,7 @@ func TestTools(t *testing.T) { t.Fatalf("Failed to get assistant message: %v", err) } - if answer.Data.Content == nil || !strings.Contains(*answer.Data.Content, "HELLO") { + if answer.Data.Content == nil || !strings.Contains(*answer.Data.Content.String, "HELLO") { t.Errorf("Expected answer to contain 'HELLO', got %v", answer.Data.Content) } diff --git a/go/rpc/generated_rpc.go b/go/rpc/generated_rpc.go index 3e7b336b7..6eee90963 100644 --- a/go/rpc/generated_rpc.go +++ b/go/rpc/generated_rpc.go @@ -33,7 +33,7 @@ type Model struct { // Billing information Billing *Billing `json:"billing,omitempty"` // Model capabilities and limits - Capabilities Capabilities `json:"capabilities"` + Capabilities ModelCapabilities `json:"capabilities"` // Default reasoning effort level (only present if model supports reasoning effort) DefaultReasoningEffort *string `json:"defaultReasoningEffort,omitempty"` // Model identifier (e.g., "claude-sonnet-4.5") @@ -53,25 +53,37 @@ type Billing struct { } // Model capabilities and limits -type Capabilities struct { +type ModelCapabilities struct { // Token limits for prompts, outputs, and context window - Limits Limits `json:"limits"` + Limits ModelCapabilitiesLimits `json:"limits"` // Feature flags indicating what the model supports - Supports Supports `json:"supports"` + Supports ModelCapabilitiesSupports `json:"supports"` } // Token limits for prompts, outputs, and context window -type Limits struct { +type ModelCapabilitiesLimits struct { // Maximum total context window size in tokens MaxContextWindowTokens float64 `json:"max_context_window_tokens"` // Maximum number of output/completion tokens MaxOutputTokens *float64 `json:"max_output_tokens,omitempty"` // Maximum number of prompt/input tokens MaxPromptTokens *float64 `json:"max_prompt_tokens,omitempty"` + // Vision-specific limits + Vision *ModelCapabilitiesLimitsVision `json:"vision,omitempty"` +} + +// Vision-specific limits +type ModelCapabilitiesLimitsVision struct { + // Maximum image size in bytes + MaxPromptImageSize float64 `json:"max_prompt_image_size"` + // Maximum number of images per prompt + MaxPromptImages float64 `json:"max_prompt_images"` + // MIME types the model accepts + SupportedMediaTypes []string `json:"supported_media_types"` } // Feature flags indicating what the model supports -type Supports struct { +type ModelCapabilitiesSupports struct { // Whether this model supports reasoning effort configuration ReasoningEffort *bool `json:"reasoningEffort,omitempty"` // Whether this model supports vision/image input @@ -234,12 +246,46 @@ type SessionModelSwitchToResult struct { } type SessionModelSwitchToParams struct { + // Override individual model capabilities resolved by the runtime + ModelCapabilities *ModelCapabilitiesOverride `json:"modelCapabilities,omitempty"` // Model identifier to switch to ModelID string `json:"modelId"` // Reasoning effort level to use for the model ReasoningEffort *string `json:"reasoningEffort,omitempty"` } +// Override individual model capabilities resolved by the runtime +type ModelCapabilitiesOverride struct { + // Token limits for prompts, outputs, and context window + Limits *ModelCapabilitiesOverrideLimits `json:"limits,omitempty"` + // Feature flags indicating what the model supports + Supports *ModelCapabilitiesOverrideSupports `json:"supports,omitempty"` +} + +// Token limits for prompts, outputs, and context window +type ModelCapabilitiesOverrideLimits struct { + // Maximum total context window size in tokens + MaxContextWindowTokens *float64 `json:"max_context_window_tokens,omitempty"` + MaxOutputTokens *float64 `json:"max_output_tokens,omitempty"` + MaxPromptTokens *float64 `json:"max_prompt_tokens,omitempty"` + Vision *ModelCapabilitiesOverrideLimitsVision `json:"vision,omitempty"` +} + +type ModelCapabilitiesOverrideLimitsVision struct { + // Maximum image size in bytes + MaxPromptImageSize *float64 `json:"max_prompt_image_size,omitempty"` + // Maximum number of images per prompt + MaxPromptImages *float64 `json:"max_prompt_images,omitempty"` + // MIME types the model accepts + SupportedMediaTypes []string `json:"supported_media_types,omitempty"` +} + +// Feature flags indicating what the model supports +type ModelCapabilitiesOverrideSupports struct { + ReasoningEffort *bool `json:"reasoningEffort,omitempty"` + Vision *bool `json:"vision,omitempty"` +} + type SessionModeGetResult struct { // The current agent mode. Mode Mode `json:"mode"` @@ -981,6 +1027,9 @@ func (a *ModelApi) SwitchTo(ctx context.Context, params *SessionModelSwitchToPar if params.ReasoningEffort != nil { req["reasoningEffort"] = *params.ReasoningEffort } + if params.ModelCapabilities != nil { + req["modelCapabilities"] = *params.ModelCapabilities + } } raw, err := a.client.Request("session.model.switchTo", req) if err != nil { diff --git a/go/session.go b/go/session.go index 225f2bf5e..f7b4a852c 100644 --- a/go/session.go +++ b/go/session.go @@ -1229,6 +1229,9 @@ func (s *Session) Abort(ctx context.Context) error { type SetModelOptions struct { // ReasoningEffort sets the reasoning effort level for the new model (e.g., "low", "medium", "high", "xhigh"). ReasoningEffort *string + // ModelCapabilities overrides individual model capabilities resolved by the runtime. + // Only non-nil fields are applied over the runtime-resolved capabilities. + ModelCapabilities *rpc.ModelCapabilitiesOverride } // SetModel changes the model for this session. @@ -1246,6 +1249,7 @@ func (s *Session) SetModel(ctx context.Context, model string, opts *SetModelOpti params := &rpc.SessionModelSwitchToParams{ModelID: model} if opts != nil { params.ReasoningEffort = opts.ReasoningEffort + params.ModelCapabilities = opts.ModelCapabilities } _, err := s.RPC.Model.SwitchTo(ctx, params) if err != nil { diff --git a/go/types.go b/go/types.go index 9f23dcb85..d8081710e 100644 --- a/go/types.go +++ b/go/types.go @@ -3,6 +3,8 @@ package copilot import ( "context" "encoding/json" + + "github.com/github/copilot-sdk/go/rpc" ) // ConnectionState represents the client connection state @@ -475,6 +477,9 @@ type SessionConfig struct { Streaming bool // Provider configures a custom model provider (BYOK) Provider *ProviderConfig + // ModelCapabilities overrides individual model capabilities resolved by the runtime. + // Only non-nil fields are applied over the runtime-resolved capabilities. + ModelCapabilities *rpc.ModelCapabilitiesOverride // MCPServers configures MCP servers for the session MCPServers map[string]MCPServerConfig // CustomAgents configures custom agents for the session @@ -650,6 +655,9 @@ type ResumeSessionConfig struct { ExcludedTools []string // Provider configures a custom model provider Provider *ProviderConfig + // ModelCapabilities overrides individual model capabilities resolved by the runtime. + // Only non-nil fields are applied over the runtime-resolved capabilities. + ModelCapabilities *rpc.ModelCapabilitiesOverride // ReasoningEffort level for models that support it. // Valid values: "low", "medium", "high", "xhigh" ReasoningEffort string @@ -851,32 +859,33 @@ type SessionLifecycleHandler func(event SessionLifecycleEvent) // createSessionRequest is the request for session.create type createSessionRequest struct { - Model string `json:"model,omitempty"` - SessionID string `json:"sessionId,omitempty"` - ClientName string `json:"clientName,omitempty"` - ReasoningEffort string `json:"reasoningEffort,omitempty"` - Tools []Tool `json:"tools,omitempty"` - SystemMessage *SystemMessageConfig `json:"systemMessage,omitempty"` - AvailableTools []string `json:"availableTools"` - ExcludedTools []string `json:"excludedTools,omitempty"` - Provider *ProviderConfig `json:"provider,omitempty"` - RequestPermission *bool `json:"requestPermission,omitempty"` - RequestUserInput *bool `json:"requestUserInput,omitempty"` - Hooks *bool `json:"hooks,omitempty"` - WorkingDirectory string `json:"workingDirectory,omitempty"` - Streaming *bool `json:"streaming,omitempty"` - MCPServers map[string]MCPServerConfig `json:"mcpServers,omitempty"` - EnvValueMode string `json:"envValueMode,omitempty"` - CustomAgents []CustomAgentConfig `json:"customAgents,omitempty"` - Agent string `json:"agent,omitempty"` - ConfigDir string `json:"configDir,omitempty"` - SkillDirectories []string `json:"skillDirectories,omitempty"` - DisabledSkills []string `json:"disabledSkills,omitempty"` - InfiniteSessions *InfiniteSessionConfig `json:"infiniteSessions,omitempty"` - Commands []wireCommand `json:"commands,omitempty"` - RequestElicitation *bool `json:"requestElicitation,omitempty"` - Traceparent string `json:"traceparent,omitempty"` - Tracestate string `json:"tracestate,omitempty"` + Model string `json:"model,omitempty"` + SessionID string `json:"sessionId,omitempty"` + ClientName string `json:"clientName,omitempty"` + ReasoningEffort string `json:"reasoningEffort,omitempty"` + Tools []Tool `json:"tools,omitempty"` + SystemMessage *SystemMessageConfig `json:"systemMessage,omitempty"` + AvailableTools []string `json:"availableTools"` + ExcludedTools []string `json:"excludedTools,omitempty"` + Provider *ProviderConfig `json:"provider,omitempty"` + ModelCapabilities *rpc.ModelCapabilitiesOverride `json:"modelCapabilities,omitempty"` + RequestPermission *bool `json:"requestPermission,omitempty"` + RequestUserInput *bool `json:"requestUserInput,omitempty"` + Hooks *bool `json:"hooks,omitempty"` + WorkingDirectory string `json:"workingDirectory,omitempty"` + Streaming *bool `json:"streaming,omitempty"` + MCPServers map[string]MCPServerConfig `json:"mcpServers,omitempty"` + EnvValueMode string `json:"envValueMode,omitempty"` + CustomAgents []CustomAgentConfig `json:"customAgents,omitempty"` + Agent string `json:"agent,omitempty"` + ConfigDir string `json:"configDir,omitempty"` + SkillDirectories []string `json:"skillDirectories,omitempty"` + DisabledSkills []string `json:"disabledSkills,omitempty"` + InfiniteSessions *InfiniteSessionConfig `json:"infiniteSessions,omitempty"` + Commands []wireCommand `json:"commands,omitempty"` + RequestElicitation *bool `json:"requestElicitation,omitempty"` + Traceparent string `json:"traceparent,omitempty"` + Tracestate string `json:"tracestate,omitempty"` } // wireCommand is the wire representation of a command (name + description only, no handler). @@ -894,33 +903,34 @@ type createSessionResponse struct { // resumeSessionRequest is the request for session.resume type resumeSessionRequest struct { - SessionID string `json:"sessionId"` - ClientName string `json:"clientName,omitempty"` - Model string `json:"model,omitempty"` - ReasoningEffort string `json:"reasoningEffort,omitempty"` - Tools []Tool `json:"tools,omitempty"` - SystemMessage *SystemMessageConfig `json:"systemMessage,omitempty"` - AvailableTools []string `json:"availableTools"` - ExcludedTools []string `json:"excludedTools,omitempty"` - Provider *ProviderConfig `json:"provider,omitempty"` - RequestPermission *bool `json:"requestPermission,omitempty"` - RequestUserInput *bool `json:"requestUserInput,omitempty"` - Hooks *bool `json:"hooks,omitempty"` - WorkingDirectory string `json:"workingDirectory,omitempty"` - ConfigDir string `json:"configDir,omitempty"` - DisableResume *bool `json:"disableResume,omitempty"` - Streaming *bool `json:"streaming,omitempty"` - MCPServers map[string]MCPServerConfig `json:"mcpServers,omitempty"` - EnvValueMode string `json:"envValueMode,omitempty"` - CustomAgents []CustomAgentConfig `json:"customAgents,omitempty"` - Agent string `json:"agent,omitempty"` - SkillDirectories []string `json:"skillDirectories,omitempty"` - DisabledSkills []string `json:"disabledSkills,omitempty"` - InfiniteSessions *InfiniteSessionConfig `json:"infiniteSessions,omitempty"` - Commands []wireCommand `json:"commands,omitempty"` - RequestElicitation *bool `json:"requestElicitation,omitempty"` - Traceparent string `json:"traceparent,omitempty"` - Tracestate string `json:"tracestate,omitempty"` + SessionID string `json:"sessionId"` + ClientName string `json:"clientName,omitempty"` + Model string `json:"model,omitempty"` + ReasoningEffort string `json:"reasoningEffort,omitempty"` + Tools []Tool `json:"tools,omitempty"` + SystemMessage *SystemMessageConfig `json:"systemMessage,omitempty"` + AvailableTools []string `json:"availableTools"` + ExcludedTools []string `json:"excludedTools,omitempty"` + Provider *ProviderConfig `json:"provider,omitempty"` + ModelCapabilities *rpc.ModelCapabilitiesOverride `json:"modelCapabilities,omitempty"` + RequestPermission *bool `json:"requestPermission,omitempty"` + RequestUserInput *bool `json:"requestUserInput,omitempty"` + Hooks *bool `json:"hooks,omitempty"` + WorkingDirectory string `json:"workingDirectory,omitempty"` + ConfigDir string `json:"configDir,omitempty"` + DisableResume *bool `json:"disableResume,omitempty"` + Streaming *bool `json:"streaming,omitempty"` + MCPServers map[string]MCPServerConfig `json:"mcpServers,omitempty"` + EnvValueMode string `json:"envValueMode,omitempty"` + CustomAgents []CustomAgentConfig `json:"customAgents,omitempty"` + Agent string `json:"agent,omitempty"` + SkillDirectories []string `json:"skillDirectories,omitempty"` + DisabledSkills []string `json:"disabledSkills,omitempty"` + InfiniteSessions *InfiniteSessionConfig `json:"infiniteSessions,omitempty"` + Commands []wireCommand `json:"commands,omitempty"` + RequestElicitation *bool `json:"requestElicitation,omitempty"` + Traceparent string `json:"traceparent,omitempty"` + Tracestate string `json:"tracestate,omitempty"` } // resumeSessionResponse is the response from session.resume diff --git a/nodejs/package-lock.json b/nodejs/package-lock.json index 98ed1f0c7..e51474b78 100644 --- a/nodejs/package-lock.json +++ b/nodejs/package-lock.json @@ -9,7 +9,7 @@ "version": "0.1.8", "license": "MIT", "dependencies": { - "@github/copilot": "^1.0.17", + "@github/copilot": "^1.0.20-1", "vscode-jsonrpc": "^8.2.1", "zod": "^4.3.6" }, @@ -663,26 +663,26 @@ } }, "node_modules/@github/copilot": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-1.0.17.tgz", - "integrity": "sha512-RTJ+kEKOdidjuOs8ozsoBdz+94g7tFJIEu5kz1P2iwJhsL+iIA5rtn9/jXOF0hAI3CLSXKZoSd66cqHrn4rb1A==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-1.0.20-1.tgz", + "integrity": "sha512-a34M4P6XcKFy1sDubqn54qakQxeWwA44vKaOh3oNZT8vgna9R4ap2NYGnM8fn7XDAdlJ9QgW6Xt7dfPGwKkt/A==", "license": "SEE LICENSE IN LICENSE.md", "bin": { "copilot": "npm-loader.js" }, "optionalDependencies": { - "@github/copilot-darwin-arm64": "1.0.17", - "@github/copilot-darwin-x64": "1.0.17", - "@github/copilot-linux-arm64": "1.0.17", - "@github/copilot-linux-x64": "1.0.17", - "@github/copilot-win32-arm64": "1.0.17", - "@github/copilot-win32-x64": "1.0.17" + "@github/copilot-darwin-arm64": "1.0.20-1", + "@github/copilot-darwin-x64": "1.0.20-1", + "@github/copilot-linux-arm64": "1.0.20-1", + "@github/copilot-linux-x64": "1.0.20-1", + "@github/copilot-win32-arm64": "1.0.20-1", + "@github/copilot-win32-x64": "1.0.20-1" } }, "node_modules/@github/copilot-darwin-arm64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-1.0.17.tgz", - "integrity": "sha512-LSv66P8611y/UjTESnaHLYqLl9kA9yBYsaocZPQoOsvMgCmktgaBgUWq+KMpLMicaFN0jBAE5F0Ve7dW6N9X3A==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-1.0.20-1.tgz", + "integrity": "sha512-tip/KyjhRQG7OMAR8rBWrFcPk3XFQQlajozIMPxEA7+qwgMBOlaGcO0iuDEdF5vAtYXhUPPAI/tbuUqkueoJEA==", "cpu": [ "arm64" ], @@ -696,9 +696,9 @@ } }, "node_modules/@github/copilot-darwin-x64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-1.0.17.tgz", - "integrity": "sha512-yqRS0/8kYTGl4VvfJ/QOtHTeYF+DnAWNUReZgt2U0AEP3zgj4z4hxSH7D2PsO/488L4KsBmmcnJr13HmBGiT/w==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-1.0.20-1.tgz", + "integrity": "sha512-d/Etng6335TF1Dcw37XFtjKKZqQbqh9trXg5GhMySUamo4UolykylWJuhs+suCx2JJc1lGzPVAdGOxAvj+4P3Q==", "cpu": [ "x64" ], @@ -712,9 +712,9 @@ } }, "node_modules/@github/copilot-linux-arm64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-1.0.17.tgz", - "integrity": "sha512-TOK0ma0A24zmQJslkGxUk+KnMFpiqquWEXB5sIv/5Ci45Qi7s0BRWTnqtiJ8Vahwb/wkja6KarHkLA27+ETGUA==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-1.0.20-1.tgz", + "integrity": "sha512-ptwwVk/uMEoVdGTbhfC8CLtSCq3agnRKlD+iojabcg5K0y0HbaEGIaOeJle0uARpqeyLADgoUkMbth/wWQI2gQ==", "cpu": [ "arm64" ], @@ -728,9 +728,9 @@ } }, "node_modules/@github/copilot-linux-x64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-1.0.17.tgz", - "integrity": "sha512-4Yum3uaAuTM/SiNtzchsO/G/144Bi/Z4FEcearW6WsGDvS6cRwSJeudOM0y4aoy4BHcv8+yw7YuXH5BHC3SAiA==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-1.0.20-1.tgz", + "integrity": "sha512-sUuR5uVR1/Ndew/pSEQP4vLy2iohW+PMD96R+gzJkF77soe+PfFR7R6Py1VWmwAK1MDblyilDfMcusYLXK48LA==", "cpu": [ "x64" ], @@ -744,9 +744,9 @@ } }, "node_modules/@github/copilot-win32-arm64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-1.0.17.tgz", - "integrity": "sha512-I1ferbfQ0aS149WyEUw6XS1sFixwTUUm13BPBQ3yMzD8G2SaoxTsdYdlhZpkVfkfh/rUYyvMKKi9VNxoVYOlDA==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-1.0.20-1.tgz", + "integrity": "sha512-gk4belEoOHfQH2pJf0GPh2t1N4suIg1mhwJQHveGi5av22XZzYjY7yarNom+YCqc692MAuYsfNF0wXXSij3wBg==", "cpu": [ "arm64" ], @@ -760,9 +760,9 @@ } }, "node_modules/@github/copilot-win32-x64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-1.0.17.tgz", - "integrity": "sha512-kjiOxY9ibS+rPp9XFpPdfdYzluEL3SHN8R5/fnA7RO+kZEJ4FDKWJjAiec3tgVkEHQT3UwNuVa/u3TdfYNF15w==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-1.0.20-1.tgz", + "integrity": "sha512-ypRD1iawRw8a0qzhp4fq4ZqvqL86mk2UZNWyuTM8HOe2o3+SrZbveXpEk7gUYJ4ShLhqLVywJHs4+4yPkv5p+A==", "cpu": [ "x64" ], diff --git a/nodejs/package.json b/nodejs/package.json index 99681ec3f..55e058ea6 100644 --- a/nodejs/package.json +++ b/nodejs/package.json @@ -56,7 +56,7 @@ "author": "GitHub", "license": "MIT", "dependencies": { - "@github/copilot": "^1.0.17", + "@github/copilot": "^1.0.20-1", "vscode-jsonrpc": "^8.2.1", "zod": "^4.3.6" }, diff --git a/nodejs/src/client.ts b/nodejs/src/client.ts index 23aac99a3..e61afcacf 100644 --- a/nodejs/src/client.ts +++ b/nodejs/src/client.ts @@ -712,6 +712,7 @@ export class CopilotClient { availableTools: config.availableTools, excludedTools: config.excludedTools, provider: config.provider, + modelCapabilities: config.modelCapabilities, requestPermission: true, requestUserInput: !!config.onUserInputRequest, requestElicitation: !!config.onElicitationRequest, @@ -847,6 +848,7 @@ export class CopilotClient { description: cmd.description, })), provider: config.provider, + modelCapabilities: config.modelCapabilities, requestPermission: true, requestUserInput: !!config.onUserInputRequest, requestElicitation: !!config.onElicitationRequest, diff --git a/nodejs/src/generated/rpc.ts b/nodejs/src/generated/rpc.ts index 4f87c14f2..a72c07b9a 100644 --- a/nodejs/src/generated/rpc.ts +++ b/nodejs/src/generated/rpc.ts @@ -40,41 +40,7 @@ export interface ModelsListResult { * Display name */ name: string; - /** - * Model capabilities and limits - */ - capabilities: { - /** - * Feature flags indicating what the model supports - */ - supports: { - /** - * Whether this model supports vision/image input - */ - vision?: boolean; - /** - * Whether this model supports reasoning effort configuration - */ - reasoningEffort?: boolean; - }; - /** - * Token limits for prompts, outputs, and context window - */ - limits: { - /** - * Maximum number of prompt/input tokens - */ - max_prompt_tokens?: number; - /** - * Maximum number of output/completion tokens - */ - max_output_tokens?: number; - /** - * Maximum total context window size in tokens - */ - max_context_window_tokens: number; - }; - }; + capabilities: ModelCapabilities; /** * Policy state (if applicable) */ @@ -107,6 +73,61 @@ export interface ModelsListResult { defaultReasoningEffort?: string; }[]; } +/** + * Model capabilities and limits + */ +export interface ModelCapabilities { + supports: ModelCapabilitiesSupports; + limits: ModelCapabilitiesLimits; +} +/** + * Feature flags indicating what the model supports + */ +export interface ModelCapabilitiesSupports { + /** + * Whether this model supports vision/image input + */ + vision?: boolean; + /** + * Whether this model supports reasoning effort configuration + */ + reasoningEffort?: boolean; +} +/** + * Token limits for prompts, outputs, and context window + */ +export interface ModelCapabilitiesLimits { + /** + * Maximum number of prompt/input tokens + */ + max_prompt_tokens?: number; + /** + * Maximum number of output/completion tokens + */ + max_output_tokens?: number; + /** + * Maximum total context window size in tokens + */ + max_context_window_tokens: number; + vision?: ModelCapabilitiesLimitsVision; +} +/** + * Vision-specific limits + */ +export interface ModelCapabilitiesLimitsVision { + /** + * MIME types the model accepts + */ + supported_media_types: string[]; + /** + * Maximum number of images per prompt + */ + max_prompt_images: number; + /** + * Maximum image size in bytes + */ + max_prompt_image_size: number; +} export interface ToolsListResult { /** @@ -396,6 +417,47 @@ export interface SessionModelSwitchToParams { * Reasoning effort level to use for the model */ reasoningEffort?: string; + modelCapabilities?: ModelCapabilitiesOverride; +} +/** + * Override individual model capabilities resolved by the runtime + */ +export interface ModelCapabilitiesOverride { + supports?: ModelCapabilitiesOverrideSupports; + limits?: ModelCapabilitiesOverrideLimits; +} +/** + * Feature flags indicating what the model supports + */ +export interface ModelCapabilitiesOverrideSupports { + vision?: boolean; + reasoningEffort?: boolean; +} +/** + * Token limits for prompts, outputs, and context window + */ +export interface ModelCapabilitiesOverrideLimits { + max_prompt_tokens?: number; + max_output_tokens?: number; + /** + * Maximum total context window size in tokens + */ + max_context_window_tokens?: number; + vision?: ModelCapabilitiesOverrideLimitsVision; +} +export interface ModelCapabilitiesOverrideLimitsVision { + /** + * MIME types the model accepts + */ + supported_media_types?: string[]; + /** + * Maximum number of images per prompt + */ + max_prompt_images?: number; + /** + * Maximum image size in bytes + */ + max_prompt_image_size?: number; } export interface SessionModeGetResult { diff --git a/nodejs/src/generated/session-events.ts b/nodejs/src/generated/session-events.ts index 642c933cd..0c0389ad0 100644 --- a/nodejs/src/generated/session-events.ts +++ b/nodejs/src/generated/session-events.ts @@ -270,44 +270,9 @@ export type SessionEvent = ephemeral: true; type: "session.idle"; /** - * Payload indicating the agent is idle; includes any background tasks still in flight + * Payload indicating the session is fully idle with no background tasks in flight */ data: { - /** - * Background tasks still running when the agent became idle - */ - backgroundTasks?: { - /** - * Currently running background agents - */ - agents: { - /** - * Unique identifier of the background agent - */ - agentId: string; - /** - * Type of the background agent - */ - agentType: string; - /** - * Human-readable description of the agent task - */ - description?: string; - }[]; - /** - * Currently running background shell commands - */ - shells: { - /** - * Unique identifier of the background shell - */ - shellId: string; - /** - * Human-readable description of the shell command - */ - description?: string; - }[]; - }; /** * True when the preceding agentic loop was cancelled via abort signal */ @@ -2979,13 +2944,21 @@ export type SessionEvent = ephemeral: true; type: "user_input.completed"; /** - * User input request completion notification signaling UI dismissal + * User input request completion with the user's response */ data: { /** * Request ID of the resolved user input request; clients should dismiss any UI for this request */ requestId: string; + /** + * The user's answer to the input request + */ + answer?: string; + /** + * Whether the answer was typed as free-form text rather than selected from choices + */ + wasFreeform?: boolean; }; } | { @@ -3069,13 +3042,23 @@ export type SessionEvent = ephemeral: true; type: "elicitation.completed"; /** - * Elicitation request completion notification signaling UI dismissal + * Elicitation request completion with the user's response */ data: { /** * Request ID of the resolved elicitation request; clients should dismiss any UI for this request */ requestId: string; + /** + * The user action: "accept" (submitted form), "decline" (explicitly refused), or "cancel" (dismissed) + */ + action?: "accept" | "decline" | "cancel"; + /** + * The submitted form data when action is 'accept'; keys match the requested schema fields + */ + content?: { + [k: string]: string | number | boolean | string[]; + }; }; } | { @@ -3490,13 +3473,29 @@ export type SessionEvent = ephemeral: true; type: "exit_plan_mode.completed"; /** - * Plan mode exit completion notification signaling UI dismissal + * Plan mode exit completion with the user's approval decision and optional feedback */ data: { /** * Request ID of the resolved exit plan mode request; clients should dismiss any UI for this request */ requestId: string; + /** + * Whether the plan was approved by the user + */ + approved?: boolean; + /** + * Which action the user selected (e.g. 'autopilot', 'interactive', 'exit_only') + */ + selectedAction?: string; + /** + * Whether edits should be auto-approved without confirmation + */ + autoApproveEdits?: boolean; + /** + * Free-form feedback from the user if they requested changes to the plan + */ + feedback?: string; }; } | { diff --git a/nodejs/src/index.ts b/nodejs/src/index.ts index dc754a778..3fab122db 100644 --- a/nodejs/src/index.ts +++ b/nodejs/src/index.ts @@ -36,6 +36,7 @@ export type { MessageOptions, ModelBilling, ModelCapabilities, + ModelCapabilitiesOverride, ModelInfo, ModelPolicy, PermissionHandler, diff --git a/nodejs/src/session.ts b/nodejs/src/session.ts index 0bd5ad7b8..ffb2c045a 100644 --- a/nodejs/src/session.ts +++ b/nodejs/src/session.ts @@ -24,6 +24,7 @@ import type { PermissionRequest, PermissionRequestResult, ReasoningEffort, + ModelCapabilitiesOverride, SectionTransformFn, SessionCapabilities, SessionEvent, @@ -1029,7 +1030,13 @@ export class CopilotSession { * await session.setModel("claude-sonnet-4.6", { reasoningEffort: "high" }); * ``` */ - async setModel(model: string, options?: { reasoningEffort?: ReasoningEffort }): Promise { + async setModel( + model: string, + options?: { + reasoningEffort?: ReasoningEffort; + modelCapabilities?: ModelCapabilitiesOverride; + } + ): Promise { await this.rpc.model.switchTo({ modelId: model, ...options }); } diff --git a/nodejs/src/types.ts b/nodejs/src/types.ts index c20bf00db..0d459e5d1 100644 --- a/nodejs/src/types.ts +++ b/nodejs/src/types.ts @@ -1065,6 +1065,9 @@ export interface SessionConfig { */ reasoningEffort?: ReasoningEffort; + /** Per-property overrides for model capabilities, deep-merged over runtime defaults. */ + modelCapabilities?: ModelCapabilitiesOverride; + /** * Override the default configuration directory location. * When specified, the session will use this directory for storing config and state. @@ -1214,6 +1217,7 @@ export type ResumeSessionConfig = Pick< | "availableTools" | "excludedTools" | "provider" + | "modelCapabilities" | "streaming" | "reasoningEffort" | "onPermissionRequest" @@ -1465,6 +1469,13 @@ export interface ModelCapabilities { }; } +/** Deep-partial override for model capabilities — every property optional. */ +export type ModelCapabilitiesOverride = { + [K in keyof ModelCapabilities]?: { + [P in keyof ModelCapabilities[K]]?: ModelCapabilities[K][P]; + }; +}; + /** * Model policy state */ diff --git a/nodejs/test/e2e/session_config.test.ts b/nodejs/test/e2e/session_config.test.ts index e27421ebf..c1c19e81e 100644 --- a/nodejs/test/e2e/session_config.test.ts +++ b/nodejs/test/e2e/session_config.test.ts @@ -5,7 +5,7 @@ import { approveAll } from "../../src/index.js"; import { createSdkTestContext } from "./harness/sdkTestContext.js"; describe("Session Configuration", async () => { - const { copilotClient: client, workDir } = await createSdkTestContext(); + const { copilotClient: client, workDir, openAiEndpoint } = await createSdkTestContext(); it("should use workingDirectory for tool execution", async () => { const subDir = join(workDir, "subproject"); @@ -75,4 +75,79 @@ describe("Session Configuration", async () => { // Just verify send doesn't throw — attachment support varies by runtime await session.disconnect(); }); + + const PNG_1X1 = Buffer.from( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==", + "base64" + ); + const VIEW_IMAGE_PROMPT = + "Use the view tool to look at the file test.png and describe what you see"; + + function hasImageUrlContent(messages: Array<{ role: string; content: unknown }>): boolean { + return messages.some( + (m) => + m.role === "user" && + Array.isArray(m.content) && + m.content.some((p: { type: string }) => p.type === "image_url") + ); + } + + it("vision disabled then enabled via setModel", async () => { + await writeFile(join(workDir, "test.png"), PNG_1X1); + + const session = await client.createSession({ + onPermissionRequest: approveAll, + modelCapabilities: { supports: { vision: false } }, + }); + + // Turn 1: vision off — no image_url expected + await session.sendAndWait({ prompt: VIEW_IMAGE_PROMPT }); + const trafficAfterT1 = await openAiEndpoint.getExchanges(); + const t1Messages = trafficAfterT1.flatMap((e) => e.request.messages ?? []); + expect(hasImageUrlContent(t1Messages)).toBe(false); + + // Switch vision on (re-specify same model with updated capabilities) + await session.setModel("claude-sonnet-4.5", { + modelCapabilities: { supports: { vision: true } }, + }); + + // Turn 2: vision on — image_url expected + await session.sendAndWait({ prompt: VIEW_IMAGE_PROMPT }); + const trafficAfterT2 = await openAiEndpoint.getExchanges(); + // Only check exchanges added after turn 1 + const newExchanges = trafficAfterT2.slice(trafficAfterT1.length); + const t2Messages = newExchanges.flatMap((e) => e.request.messages ?? []); + expect(hasImageUrlContent(t2Messages)).toBe(true); + + await session.disconnect(); + }); + + it("vision enabled then disabled via setModel", async () => { + await writeFile(join(workDir, "test.png"), PNG_1X1); + + const session = await client.createSession({ + onPermissionRequest: approveAll, + modelCapabilities: { supports: { vision: true } }, + }); + + // Turn 1: vision on — image_url expected + await session.sendAndWait({ prompt: VIEW_IMAGE_PROMPT }); + const trafficAfterT1 = await openAiEndpoint.getExchanges(); + const t1Messages = trafficAfterT1.flatMap((e) => e.request.messages ?? []); + expect(hasImageUrlContent(t1Messages)).toBe(true); + + // Switch vision off + await session.setModel("claude-sonnet-4.5", { + modelCapabilities: { supports: { vision: false } }, + }); + + // Turn 2: vision off — no image_url expected in new exchanges + await session.sendAndWait({ prompt: VIEW_IMAGE_PROMPT }); + const trafficAfterT2 = await openAiEndpoint.getExchanges(); + const newExchanges = trafficAfterT2.slice(trafficAfterT1.length); + const t2Messages = newExchanges.flatMap((e) => e.request.messages ?? []); + expect(hasImageUrlContent(t2Messages)).toBe(false); + + await session.disconnect(); + }); }); diff --git a/python/copilot/__init__.py b/python/copilot/__init__.py index 5a89909c5..db9f150c8 100644 --- a/python/copilot/__init__.py +++ b/python/copilot/__init__.py @@ -4,7 +4,15 @@ JSON-RPC based SDK for programmatic control of GitHub Copilot CLI """ -from .client import CopilotClient, ExternalServerConfig, SubprocessConfig +from .client import ( + CopilotClient, + ExternalServerConfig, + ModelCapabilitiesOverride, + ModelLimitsOverride, + ModelSupportsOverride, + ModelVisionLimitsOverride, + SubprocessConfig, +) from .session import ( CommandContext, CommandDefinition, @@ -33,6 +41,10 @@ "ElicitationResult", "ExternalServerConfig", "InputOptions", + "ModelCapabilitiesOverride", + "ModelLimitsOverride", + "ModelSupportsOverride", + "ModelVisionLimitsOverride", "SessionCapabilities", "SessionUiApi", "SessionUiCapabilities", diff --git a/python/copilot/client.py b/python/copilot/client.py index 356a5fd59..df6756cfe 100644 --- a/python/copilot/client.py +++ b/python/copilot/client.py @@ -384,6 +384,66 @@ def to_dict(self) -> dict: return result +@dataclass +class ModelVisionLimitsOverride: + supported_media_types: list[str] | None = None + max_prompt_images: int | None = None + max_prompt_image_size: int | None = None + + +@dataclass +class ModelLimitsOverride: + max_prompt_tokens: int | None = None + max_output_tokens: int | None = None + max_context_window_tokens: int | None = None + vision: ModelVisionLimitsOverride | None = None + + +@dataclass +class ModelSupportsOverride: + vision: bool | None = None + reasoning_effort: bool | None = None + + +@dataclass +class ModelCapabilitiesOverride: + supports: ModelSupportsOverride | None = None + limits: ModelLimitsOverride | None = None + + +def _capabilities_to_dict(caps: ModelCapabilitiesOverride) -> dict: + result: dict = {} + if caps.supports is not None: + s: dict = {} + if caps.supports.vision is not None: + s["vision"] = caps.supports.vision + if caps.supports.reasoning_effort is not None: + s["reasoningEffort"] = caps.supports.reasoning_effort + if s: + result["supports"] = s + if caps.limits is not None: + lim: dict = {} + if caps.limits.max_prompt_tokens is not None: + lim["max_prompt_tokens"] = caps.limits.max_prompt_tokens + if caps.limits.max_output_tokens is not None: + lim["max_output_tokens"] = caps.limits.max_output_tokens + if caps.limits.max_context_window_tokens is not None: + lim["max_context_window_tokens"] = caps.limits.max_context_window_tokens + if caps.limits.vision is not None: + v: dict = {} + if caps.limits.vision.supported_media_types is not None: + v["supported_media_types"] = caps.limits.vision.supported_media_types + if caps.limits.vision.max_prompt_images is not None: + v["max_prompt_images"] = caps.limits.vision.max_prompt_images + if caps.limits.vision.max_prompt_image_size is not None: + v["max_prompt_image_size"] = caps.limits.vision.max_prompt_image_size + if v: + lim["vision"] = v + if lim: + result["limits"] = lim + return result + + @dataclass class ModelPolicy: """Model policy state""" @@ -1107,6 +1167,7 @@ async def create_session( hooks: SessionHooks | None = None, working_directory: str | None = None, provider: ProviderConfig | None = None, + model_capabilities: ModelCapabilitiesOverride | None = None, streaming: bool | None = None, mcp_servers: dict[str, MCPServerConfig] | None = None, custom_agents: list[CustomAgentConfig] | None = None, @@ -1141,6 +1202,7 @@ async def create_session( hooks: Lifecycle hooks for the session. working_directory: Working directory for the session. provider: Provider configuration for Azure or custom endpoints. + model_capabilities: Override individual model capabilities resolved by the runtime. streaming: Whether to enable streaming responses. mcp_servers: MCP server configurations. custom_agents: Custom agent configurations. @@ -1247,6 +1309,10 @@ async def create_session( if provider: payload["provider"] = self._convert_provider_to_wire_format(provider) + # Add model capabilities override if provided + if model_capabilities: + payload["modelCapabilities"] = _capabilities_to_dict(model_capabilities) + # Add MCP servers configuration if provided if mcp_servers: payload["mcpServers"] = mcp_servers @@ -1346,6 +1412,7 @@ async def resume_session( hooks: SessionHooks | None = None, working_directory: str | None = None, provider: ProviderConfig | None = None, + model_capabilities: ModelCapabilitiesOverride | None = None, streaming: bool | None = None, mcp_servers: dict[str, MCPServerConfig] | None = None, custom_agents: list[CustomAgentConfig] | None = None, @@ -1380,6 +1447,7 @@ async def resume_session( hooks: Lifecycle hooks for the session. working_directory: Working directory for the session. provider: Provider configuration for Azure or custom endpoints. + model_capabilities: Override individual model capabilities resolved by the runtime. streaming: Whether to enable streaming responses. mcp_servers: MCP server configurations. custom_agents: Custom agent configurations. @@ -1455,6 +1523,8 @@ async def resume_session( payload["excludedTools"] = excluded_tools if provider: payload["provider"] = self._convert_provider_to_wire_format(provider) + if model_capabilities: + payload["modelCapabilities"] = _capabilities_to_dict(model_capabilities) if streaming is not None: payload["streaming"] = streaming diff --git a/python/copilot/generated/rpc.py b/python/copilot/generated/rpc.py index 7852d9984..93b80ee4f 100644 --- a/python/copilot/generated/rpc.py +++ b/python/copilot/generated/rpc.py @@ -10,8 +10,7 @@ from dataclasses import dataclass -from typing import Any, TypeVar, cast -from collections.abc import Callable +from typing import Any, TypeVar, Callable, cast from enum import Enum from uuid import UUID @@ -49,9 +48,9 @@ def from_union(fs, x): assert False -def from_bool(x: Any) -> bool: - assert isinstance(x, bool) - return x +def from_list(f: Callable[[Any], T], x: Any) -> list[T]: + assert isinstance(x, list) + return [f(y) for y in x] def to_class(c: type[T], x: Any) -> dict: @@ -59,9 +58,9 @@ def to_class(c: type[T], x: Any) -> dict: return cast(Any, x).to_dict() -def from_list(f: Callable[[Any], T], x: Any) -> list[T]: - assert isinstance(x, list) - return [f(y) for y in x] +def from_bool(x: Any) -> bool: + assert isinstance(x, bool) + return x def from_dict(f: Callable[[Any], T], x: Any) -> dict[str, T]: @@ -144,7 +143,36 @@ def to_dict(self) -> dict: @dataclass -class Limits: +class ModelCapabilitiesLimitsVision: + """Vision-specific limits""" + + max_prompt_image_size: float + """Maximum image size in bytes""" + + max_prompt_images: float + """Maximum number of images per prompt""" + + supported_media_types: list[str] + """MIME types the model accepts""" + + @staticmethod + def from_dict(obj: Any) -> 'ModelCapabilitiesLimitsVision': + assert isinstance(obj, dict) + max_prompt_image_size = from_float(obj.get("max_prompt_image_size")) + max_prompt_images = from_float(obj.get("max_prompt_images")) + supported_media_types = from_list(from_str, obj.get("supported_media_types")) + return ModelCapabilitiesLimitsVision(max_prompt_image_size, max_prompt_images, supported_media_types) + + def to_dict(self) -> dict: + result: dict = {} + result["max_prompt_image_size"] = to_float(self.max_prompt_image_size) + result["max_prompt_images"] = to_float(self.max_prompt_images) + result["supported_media_types"] = from_list(from_str, self.supported_media_types) + return result + + +@dataclass +class ModelCapabilitiesLimits: """Token limits for prompts, outputs, and context window""" max_context_window_tokens: float @@ -156,13 +184,17 @@ class Limits: max_prompt_tokens: float | None = None """Maximum number of prompt/input tokens""" + vision: ModelCapabilitiesLimitsVision | None = None + """Vision-specific limits""" + @staticmethod - def from_dict(obj: Any) -> 'Limits': + def from_dict(obj: Any) -> 'ModelCapabilitiesLimits': assert isinstance(obj, dict) max_context_window_tokens = from_float(obj.get("max_context_window_tokens")) max_output_tokens = from_union([from_float, from_none], obj.get("max_output_tokens")) max_prompt_tokens = from_union([from_float, from_none], obj.get("max_prompt_tokens")) - return Limits(max_context_window_tokens, max_output_tokens, max_prompt_tokens) + vision = from_union([ModelCapabilitiesLimitsVision.from_dict, from_none], obj.get("vision")) + return ModelCapabilitiesLimits(max_context_window_tokens, max_output_tokens, max_prompt_tokens, vision) def to_dict(self) -> dict: result: dict = {} @@ -171,11 +203,13 @@ def to_dict(self) -> dict: result["max_output_tokens"] = from_union([to_float, from_none], self.max_output_tokens) if self.max_prompt_tokens is not None: result["max_prompt_tokens"] = from_union([to_float, from_none], self.max_prompt_tokens) + if self.vision is not None: + result["vision"] = from_union([lambda x: to_class(ModelCapabilitiesLimitsVision, x), from_none], self.vision) return result @dataclass -class Supports: +class ModelCapabilitiesSupports: """Feature flags indicating what the model supports""" reasoning_effort: bool | None = None @@ -185,11 +219,11 @@ class Supports: """Whether this model supports vision/image input""" @staticmethod - def from_dict(obj: Any) -> 'Supports': + def from_dict(obj: Any) -> 'ModelCapabilitiesSupports': assert isinstance(obj, dict) reasoning_effort = from_union([from_bool, from_none], obj.get("reasoningEffort")) vision = from_union([from_bool, from_none], obj.get("vision")) - return Supports(reasoning_effort, vision) + return ModelCapabilitiesSupports(reasoning_effort, vision) def to_dict(self) -> dict: result: dict = {} @@ -201,26 +235,26 @@ def to_dict(self) -> dict: @dataclass -class Capabilities: +class ModelCapabilities: """Model capabilities and limits""" - limits: Limits + limits: ModelCapabilitiesLimits """Token limits for prompts, outputs, and context window""" - supports: Supports + supports: ModelCapabilitiesSupports """Feature flags indicating what the model supports""" @staticmethod - def from_dict(obj: Any) -> 'Capabilities': + def from_dict(obj: Any) -> 'ModelCapabilities': assert isinstance(obj, dict) - limits = Limits.from_dict(obj.get("limits")) - supports = Supports.from_dict(obj.get("supports")) - return Capabilities(limits, supports) + limits = ModelCapabilitiesLimits.from_dict(obj.get("limits")) + supports = ModelCapabilitiesSupports.from_dict(obj.get("supports")) + return ModelCapabilities(limits, supports) def to_dict(self) -> dict: result: dict = {} - result["limits"] = to_class(Limits, self.limits) - result["supports"] = to_class(Supports, self.supports) + result["limits"] = to_class(ModelCapabilitiesLimits, self.limits) + result["supports"] = to_class(ModelCapabilitiesSupports, self.supports) return result @@ -250,7 +284,7 @@ def to_dict(self) -> dict: @dataclass class Model: - capabilities: Capabilities + capabilities: ModelCapabilities """Model capabilities and limits""" id: str @@ -274,7 +308,7 @@ class Model: @staticmethod def from_dict(obj: Any) -> 'Model': assert isinstance(obj, dict) - capabilities = Capabilities.from_dict(obj.get("capabilities")) + capabilities = ModelCapabilities.from_dict(obj.get("capabilities")) id = from_str(obj.get("id")) name = from_str(obj.get("name")) billing = from_union([Billing.from_dict, from_none], obj.get("billing")) @@ -285,7 +319,7 @@ def from_dict(obj: Any) -> 'Model': def to_dict(self) -> dict: result: dict = {} - result["capabilities"] = to_class(Capabilities, self.capabilities) + result["capabilities"] = to_class(ModelCapabilities, self.capabilities) result["id"] = from_str(self.id) result["name"] = from_str(self.name) if self.billing is not None: @@ -475,7 +509,7 @@ class ServerValue: command: str | None = None cwd: str | None = None env: dict[str, str] | None = None - filter_mapping: dict[str | FilterMappingEnum] | FilterMappingEnum | None = None + filter_mapping: dict[str, FilterMappingEnum] | FilterMappingEnum | None = None is_default_server: bool | None = None timeout: float | None = None tools: list[str] | None = None @@ -561,7 +595,7 @@ class MCPConfigAddParamsConfig: command: str | None = None cwd: str | None = None env: dict[str, str] | None = None - filter_mapping: dict[str | FilterMappingEnum] | FilterMappingEnum | None = None + filter_mapping: dict[str, FilterMappingEnum] | FilterMappingEnum | None = None is_default_server: bool | None = None timeout: float | None = None tools: list[str] | None = None @@ -652,7 +686,7 @@ class MCPConfigUpdateParamsConfig: command: str | None = None cwd: str | None = None env: dict[str, str] | None = None - filter_mapping: dict[str | FilterMappingEnum] | FilterMappingEnum | None = None + filter_mapping: dict[str, FilterMappingEnum] | FilterMappingEnum | None = None is_default_server: bool | None = None timeout: float | None = None tools: list[str] | None = None @@ -839,11 +873,126 @@ def to_dict(self) -> dict: return result +@dataclass +class ModelCapabilitiesOverrideLimitsVision: + max_prompt_image_size: float | None = None + """Maximum image size in bytes""" + + max_prompt_images: float | None = None + """Maximum number of images per prompt""" + + supported_media_types: list[str] | None = None + """MIME types the model accepts""" + + @staticmethod + def from_dict(obj: Any) -> 'ModelCapabilitiesOverrideLimitsVision': + assert isinstance(obj, dict) + max_prompt_image_size = from_union([from_float, from_none], obj.get("max_prompt_image_size")) + max_prompt_images = from_union([from_float, from_none], obj.get("max_prompt_images")) + supported_media_types = from_union([lambda x: from_list(from_str, x), from_none], obj.get("supported_media_types")) + return ModelCapabilitiesOverrideLimitsVision(max_prompt_image_size, max_prompt_images, supported_media_types) + + def to_dict(self) -> dict: + result: dict = {} + if self.max_prompt_image_size is not None: + result["max_prompt_image_size"] = from_union([to_float, from_none], self.max_prompt_image_size) + if self.max_prompt_images is not None: + result["max_prompt_images"] = from_union([to_float, from_none], self.max_prompt_images) + if self.supported_media_types is not None: + result["supported_media_types"] = from_union([lambda x: from_list(from_str, x), from_none], self.supported_media_types) + return result + + +@dataclass +class ModelCapabilitiesOverrideLimits: + """Token limits for prompts, outputs, and context window""" + + max_context_window_tokens: float | None = None + """Maximum total context window size in tokens""" + + max_output_tokens: float | None = None + max_prompt_tokens: float | None = None + vision: ModelCapabilitiesOverrideLimitsVision | None = None + + @staticmethod + def from_dict(obj: Any) -> 'ModelCapabilitiesOverrideLimits': + assert isinstance(obj, dict) + max_context_window_tokens = from_union([from_float, from_none], obj.get("max_context_window_tokens")) + max_output_tokens = from_union([from_float, from_none], obj.get("max_output_tokens")) + max_prompt_tokens = from_union([from_float, from_none], obj.get("max_prompt_tokens")) + vision = from_union([ModelCapabilitiesOverrideLimitsVision.from_dict, from_none], obj.get("vision")) + return ModelCapabilitiesOverrideLimits(max_context_window_tokens, max_output_tokens, max_prompt_tokens, vision) + + def to_dict(self) -> dict: + result: dict = {} + if self.max_context_window_tokens is not None: + result["max_context_window_tokens"] = from_union([to_float, from_none], self.max_context_window_tokens) + if self.max_output_tokens is not None: + result["max_output_tokens"] = from_union([to_float, from_none], self.max_output_tokens) + if self.max_prompt_tokens is not None: + result["max_prompt_tokens"] = from_union([to_float, from_none], self.max_prompt_tokens) + if self.vision is not None: + result["vision"] = from_union([lambda x: to_class(ModelCapabilitiesOverrideLimitsVision, x), from_none], self.vision) + return result + + +@dataclass +class ModelCapabilitiesOverrideSupports: + """Feature flags indicating what the model supports""" + + reasoning_effort: bool | None = None + vision: bool | None = None + + @staticmethod + def from_dict(obj: Any) -> 'ModelCapabilitiesOverrideSupports': + assert isinstance(obj, dict) + reasoning_effort = from_union([from_bool, from_none], obj.get("reasoningEffort")) + vision = from_union([from_bool, from_none], obj.get("vision")) + return ModelCapabilitiesOverrideSupports(reasoning_effort, vision) + + def to_dict(self) -> dict: + result: dict = {} + if self.reasoning_effort is not None: + result["reasoningEffort"] = from_union([from_bool, from_none], self.reasoning_effort) + if self.vision is not None: + result["vision"] = from_union([from_bool, from_none], self.vision) + return result + + +@dataclass +class ModelCapabilitiesOverride: + """Override individual model capabilities resolved by the runtime""" + + limits: ModelCapabilitiesOverrideLimits | None = None + """Token limits for prompts, outputs, and context window""" + + supports: ModelCapabilitiesOverrideSupports | None = None + """Feature flags indicating what the model supports""" + + @staticmethod + def from_dict(obj: Any) -> 'ModelCapabilitiesOverride': + assert isinstance(obj, dict) + limits = from_union([ModelCapabilitiesOverrideLimits.from_dict, from_none], obj.get("limits")) + supports = from_union([ModelCapabilitiesOverrideSupports.from_dict, from_none], obj.get("supports")) + return ModelCapabilitiesOverride(limits, supports) + + def to_dict(self) -> dict: + result: dict = {} + if self.limits is not None: + result["limits"] = from_union([lambda x: to_class(ModelCapabilitiesOverrideLimits, x), from_none], self.limits) + if self.supports is not None: + result["supports"] = from_union([lambda x: to_class(ModelCapabilitiesOverrideSupports, x), from_none], self.supports) + return result + + @dataclass class SessionModelSwitchToParams: model_id: str """Model identifier to switch to""" + model_capabilities: ModelCapabilitiesOverride | None = None + """Override individual model capabilities resolved by the runtime""" + reasoning_effort: str | None = None """Reasoning effort level to use for the model""" @@ -851,12 +1000,15 @@ class SessionModelSwitchToParams: def from_dict(obj: Any) -> 'SessionModelSwitchToParams': assert isinstance(obj, dict) model_id = from_str(obj.get("modelId")) + model_capabilities = from_union([ModelCapabilitiesOverride.from_dict, from_none], obj.get("modelCapabilities")) reasoning_effort = from_union([from_str, from_none], obj.get("reasoningEffort")) - return SessionModelSwitchToParams(model_id, reasoning_effort) + return SessionModelSwitchToParams(model_id, model_capabilities, reasoning_effort) def to_dict(self) -> dict: result: dict = {} result["modelId"] = from_str(self.model_id) + if self.model_capabilities is not None: + result["modelCapabilities"] = from_union([lambda x: to_class(ModelCapabilitiesOverride, x), from_none], self.model_capabilities) if self.reasoning_effort is not None: result["reasoningEffort"] = from_union([from_str, from_none], self.reasoning_effort) return result diff --git a/python/copilot/generated/session_events.py b/python/copilot/generated/session_events.py index 9b4267829..361718ebb 100644 --- a/python/copilot/generated/session_events.py +++ b/python/copilot/generated/session_events.py @@ -78,6 +78,15 @@ def from_int(x: Any) -> int: return x +class Action(Enum): + """The user action: "accept" (submitted form), "decline" (explicitly refused), or "cancel" + (dismissed) + """ + ACCEPT = "accept" + CANCEL = "cancel" + DECLINE = "decline" + + class AgentMode(Enum): """The agent mode that was active when this message was sent""" @@ -88,7 +97,7 @@ class AgentMode(Enum): @dataclass -class DataAgent: +class Agent: description: str """Description of what the agent does""" @@ -114,7 +123,7 @@ class DataAgent: """Model override for this agent, if set""" @staticmethod - def from_dict(obj: Any) -> 'DataAgent': + def from_dict(obj: Any) -> 'Agent': assert isinstance(obj, dict) description = from_str(obj.get("description")) display_name = from_str(obj.get("displayName")) @@ -124,7 +133,7 @@ def from_dict(obj: Any) -> 'DataAgent': tools = from_list(from_str, obj.get("tools")) user_invocable = from_bool(obj.get("userInvocable")) model = from_union([from_str, from_none], obj.get("model")) - return DataAgent(description, display_name, id, name, source, tools, user_invocable, model) + return Agent(description, display_name, id, name, source, tools, user_invocable, model) def to_dict(self) -> dict: result: dict = {} @@ -363,85 +372,6 @@ def to_dict(self) -> dict: return result -@dataclass -class BackgroundTasksAgent: - """A background agent task""" - - agent_id: str - """Unique identifier of the background agent""" - - agent_type: str - """Type of the background agent""" - - description: str | None = None - """Human-readable description of the agent task""" - - @staticmethod - def from_dict(obj: Any) -> 'BackgroundTasksAgent': - assert isinstance(obj, dict) - agent_id = from_str(obj.get("agentId")) - agent_type = from_str(obj.get("agentType")) - description = from_union([from_str, from_none], obj.get("description")) - return BackgroundTasksAgent(agent_id, agent_type, description) - - def to_dict(self) -> dict: - result: dict = {} - result["agentId"] = from_str(self.agent_id) - result["agentType"] = from_str(self.agent_type) - if self.description is not None: - result["description"] = from_union([from_str, from_none], self.description) - return result - - -@dataclass -class Shell: - """A background shell command""" - - shell_id: str - """Unique identifier of the background shell""" - - description: str | None = None - """Human-readable description of the shell command""" - - @staticmethod - def from_dict(obj: Any) -> 'Shell': - assert isinstance(obj, dict) - shell_id = from_str(obj.get("shellId")) - description = from_union([from_str, from_none], obj.get("description")) - return Shell(shell_id, description) - - def to_dict(self) -> dict: - result: dict = {} - result["shellId"] = from_str(self.shell_id) - if self.description is not None: - result["description"] = from_union([from_str, from_none], self.description) - return result - - -@dataclass -class BackgroundTasks: - """Background tasks still running when the agent became idle""" - - agents: list[BackgroundTasksAgent] - """Currently running background agents""" - - shells: list[Shell] - """Currently running background shell commands""" - - @staticmethod - def from_dict(obj: Any) -> 'BackgroundTasks': - assert isinstance(obj, dict) - agents = from_list(BackgroundTasksAgent.from_dict, obj.get("agents")) - shells = from_list(Shell.from_dict, obj.get("shells")) - return BackgroundTasks(agents, shells) - - def to_dict(self) -> dict: - result: dict = {} - result["agents"] = from_list(lambda x: to_class(BackgroundTasksAgent, x), self.agents) - result["shells"] = from_list(lambda x: to_class(Shell, x), self.shells) - return result - - @dataclass class CodeChanges: """Aggregate code change metrics for the session""" @@ -1384,7 +1314,7 @@ class ContentType(Enum): @dataclass -class Content: +class ContentElement: """A content block within a tool result, which may be text, terminal output, image, audio, or a resource @@ -1448,7 +1378,7 @@ class Content: """The embedded resource contents, either text or base64-encoded binary""" @staticmethod - def from_dict(obj: Any) -> 'Content': + def from_dict(obj: Any) -> 'ContentElement': assert isinstance(obj, dict) type = ContentType(obj.get("type")) text = from_union([from_str, from_none], obj.get("text")) @@ -1463,7 +1393,7 @@ def from_dict(obj: Any) -> 'Content': title = from_union([from_str, from_none], obj.get("title")) uri = from_union([from_str, from_none], obj.get("uri")) resource = from_union([Resource.from_dict, from_none], obj.get("resource")) - return Content(type, text, cwd, exit_code, data, mime_type, description, icons, name, size, title, uri, resource) + return ContentElement(type, text, cwd, exit_code, data, mime_type, description, icons, name, size, title, uri, resource) def to_dict(self) -> dict: result: dict = {} @@ -1516,7 +1446,7 @@ class Result: """Concise tool result text sent to the LLM for chat completion, potentially truncated for token efficiency """ - contents: list[Content] | None = None + contents: list[ContentElement] | None = None """Structured content blocks (text, images, audio, resources) returned by the tool in their native format """ @@ -1531,7 +1461,7 @@ class Result: def from_dict(obj: Any) -> 'Result': assert isinstance(obj, dict) content = from_union([from_str, from_none], obj.get("content")) - contents = from_union([lambda x: from_list(Content.from_dict, x), from_none], obj.get("contents")) + contents = from_union([lambda x: from_list(ContentElement.from_dict, x), from_none], obj.get("contents")) detailed_content = from_union([from_str, from_none], obj.get("detailedContent")) kind = from_union([ResultKind, from_none], obj.get("kind")) return Result(content, contents, detailed_content, kind) @@ -1541,7 +1471,7 @@ def to_dict(self) -> dict: if self.content is not None: result["content"] = from_union([from_str, from_none], self.content) if self.contents is not None: - result["contents"] = from_union([lambda x: from_list(lambda x: to_class(Content, x), x), from_none], self.contents) + result["contents"] = from_union([lambda x: from_list(lambda x: to_class(ContentElement, x), x), from_none], self.contents) if self.detailed_content is not None: result["detailedContent"] = from_union([from_str, from_none], self.detailed_content) if self.kind is not None: @@ -1779,7 +1709,7 @@ class Data: Error details for timeline display including message and optional diagnostic information - Payload indicating the agent is idle; includes any background tasks still in flight + Payload indicating the session is fully idle with no background tasks in flight Session title change payload containing the new display title @@ -1877,12 +1807,12 @@ class Data: User input request notification with question and optional predefined choices - User input request completion notification signaling UI dismissal + User input request completion with the user's response Elicitation request; may be form-based (structured input) or URL-based (browser redirect) - Elicitation request completion notification signaling UI dismissal + Elicitation request completion with the user's response Sampling request from an MCP server; contains the server name and a requestId for correlation @@ -1909,7 +1839,7 @@ class Data: Plan approval request with plan content and available user actions - Plan mode exit completion notification signaling UI dismissal + Plan mode exit completion with the user's approval decision and optional feedback """ already_in_use: bool | None = None """Whether the session was already in use by another client at start time @@ -1999,9 +1929,6 @@ class Data: aborted: bool | None = None """True when the preceding agentic loop was cancelled via abort signal""" - background_tasks: BackgroundTasks | None = None - """Background tasks still running when the agent became idle""" - title: str | None = None """The new display title for the session""" @@ -2273,7 +2200,7 @@ class Data: attachments: list[Attachment] | None = None """Files, selections, or GitHub references attached to the message""" - content: str | None = None + content: str | dict[str, float | bool | list[str] | str] | None = None """The user's message text as displayed in the timeline The complete extended thinking text from the model @@ -2285,6 +2212,8 @@ class Data: The system or developer prompt text The notification text, typically wrapped in XML tags + + The submitted form data when action is 'accept'; keys match the requested schema fields """ interaction_id: str | None = None """CAPI interaction ID for correlating this user message with its turn @@ -2547,6 +2476,12 @@ class Data: question: str | None = None """The question or prompt to present to the user""" + answer: str | None = None + """The user's answer to the input request""" + + was_freeform: bool | None = None + """Whether the answer was typed as free-form text rather than selected from choices""" + elicitation_source: str | None = None """The source that initiated the request (MCP server name, or absent for agent-initiated)""" @@ -2557,6 +2492,10 @@ class Data: requested_schema: RequestedSchema | None = None """JSON Schema describing the form fields to present to the user (form mode only)""" + action: Action | None = None + """The user action: "accept" (submitted form), "decline" (explicitly refused), or "cancel" + (dismissed) + """ mcp_request_id: float | str | None = None """The JSON-RPC request ID from the MCP protocol""" @@ -2605,10 +2544,22 @@ class Data: recommended_action: str | None = None """The recommended action for the user to take""" + approved: bool | None = None + """Whether the plan was approved by the user""" + + auto_approve_edits: bool | None = None + """Whether edits should be auto-approved without confirmation""" + + feedback: str | None = None + """Free-form feedback from the user if they requested changes to the plan""" + + selected_action: str | None = None + """Which action the user selected (e.g. 'autopilot', 'interactive', 'exit_only')""" + skills: list[Skill] | None = None """Array of resolved skill metadata""" - agents: list[DataAgent] | None = None + agents: list[Agent] | None = None """Array of loaded custom agent metadata""" errors: list[str] | None = None @@ -2648,7 +2599,6 @@ def from_dict(obj: Any) -> 'Data': status_code = from_union([from_int, from_none], obj.get("statusCode")) url = from_union([from_str, from_none], obj.get("url")) aborted = from_union([from_bool, from_none], obj.get("aborted")) - background_tasks = from_union([BackgroundTasks.from_dict, from_none], obj.get("backgroundTasks")) title = from_union([from_str, from_none], obj.get("title")) info_type = from_union([from_str, from_none], obj.get("infoType")) warning_type = from_union([from_str, from_none], obj.get("warningType")) @@ -2709,7 +2659,7 @@ def from_dict(obj: Any) -> 'Data': tokens_removed = from_union([from_float, from_none], obj.get("tokensRemoved")) agent_mode = from_union([AgentMode, from_none], obj.get("agentMode")) attachments = from_union([lambda x: from_list(Attachment.from_dict, x), from_none], obj.get("attachments")) - content = from_union([from_str, from_none], obj.get("content")) + content = from_union([from_str, lambda x: from_dict(lambda x: from_union([from_float, from_bool, lambda x: from_list(from_str, x), from_str], x), x), from_none], obj.get("content")) interaction_id = from_union([from_str, from_none], obj.get("interactionId")) source = from_union([from_str, from_none], obj.get("source")) transformed_content = from_union([from_str, from_none], obj.get("transformedContent")) @@ -2773,9 +2723,12 @@ def from_dict(obj: Any) -> 'Data': allow_freeform = from_union([from_bool, from_none], obj.get("allowFreeform")) choices = from_union([lambda x: from_list(from_str, x), from_none], obj.get("choices")) question = from_union([from_str, from_none], obj.get("question")) + answer = from_union([from_str, from_none], obj.get("answer")) + was_freeform = from_union([from_bool, from_none], obj.get("wasFreeform")) elicitation_source = from_union([from_str, from_none], obj.get("elicitationSource")) mode = from_union([Mode, from_none], obj.get("mode")) requested_schema = from_union([RequestedSchema.from_dict, from_none], obj.get("requestedSchema")) + action = from_union([Action, from_none], obj.get("action")) mcp_request_id = from_union([from_float, from_str, from_none], obj.get("mcpRequestId")) server_name = from_union([from_str, from_none], obj.get("serverName")) server_url = from_union([from_str, from_none], obj.get("serverUrl")) @@ -2790,14 +2743,18 @@ def from_dict(obj: Any) -> 'Data': actions = from_union([lambda x: from_list(from_str, x), from_none], obj.get("actions")) plan_content = from_union([from_str, from_none], obj.get("planContent")) recommended_action = from_union([from_str, from_none], obj.get("recommendedAction")) + approved = from_union([from_bool, from_none], obj.get("approved")) + auto_approve_edits = from_union([from_bool, from_none], obj.get("autoApproveEdits")) + feedback = from_union([from_str, from_none], obj.get("feedback")) + selected_action = from_union([from_str, from_none], obj.get("selectedAction")) skills = from_union([lambda x: from_list(Skill.from_dict, x), from_none], obj.get("skills")) - agents = from_union([lambda x: from_list(DataAgent.from_dict, x), from_none], obj.get("agents")) + agents = from_union([lambda x: from_list(Agent.from_dict, x), from_none], obj.get("agents")) errors = from_union([lambda x: from_list(from_str, x), from_none], obj.get("errors")) warnings = from_union([lambda x: from_list(from_str, x), from_none], obj.get("warnings")) servers = from_union([lambda x: from_list(Server.from_dict, x), from_none], obj.get("servers")) status = from_union([ServerStatus, from_none], obj.get("status")) extensions = from_union([lambda x: from_list(Extension.from_dict, x), from_none], obj.get("extensions")) - return Data(already_in_use, context, copilot_version, producer, reasoning_effort, remote_steerable, selected_model, session_id, start_time, version, event_count, resume_time, error_type, message, provider_call_id, stack, status_code, url, aborted, background_tasks, title, info_type, warning_type, new_model, previous_model, previous_reasoning_effort, new_mode, previous_mode, operation, path, handoff_time, host, remote_session_id, repository, source_type, summary, messages_removed_during_truncation, performed_by, post_truncation_messages_length, post_truncation_tokens_in_messages, pre_truncation_messages_length, pre_truncation_tokens_in_messages, token_limit, tokens_removed_during_truncation, events_removed, up_to_event_id, code_changes, conversation_tokens, current_model, current_tokens, error_reason, model_metrics, session_start_time, shutdown_type, system_tokens, tool_definitions_tokens, total_api_duration_ms, total_premium_requests, base_commit, branch, cwd, git_root, head_commit, host_type, is_initial, messages_length, checkpoint_number, checkpoint_path, compaction_tokens_used, error, messages_removed, post_compaction_tokens, pre_compaction_messages_length, pre_compaction_tokens, request_id, success, summary_content, tokens_removed, agent_mode, attachments, content, interaction_id, source, transformed_content, turn_id, intent, reasoning_id, delta_content, total_response_size_bytes, encrypted_content, message_id, output_tokens, parent_tool_call_id, phase, reasoning_opaque, reasoning_text, tool_requests, api_call_id, cache_read_tokens, cache_write_tokens, copilot_usage, cost, duration, initiator, input_tokens, inter_token_latency_ms, model, quota_snapshots, ttft_ms, reason, arguments, tool_call_id, tool_name, mcp_server_name, mcp_tool_name, partial_output, progress_message, is_user_requested, result, tool_telemetry, allowed_tools, description, name, plugin_name, plugin_version, agent_description, agent_display_name, agent_name, duration_ms, total_tokens, total_tool_calls, tools, hook_invocation_id, hook_type, input, output, metadata, role, kind, permission_request, resolved_by_hook, allow_freeform, choices, question, elicitation_source, mode, requested_schema, mcp_request_id, server_name, server_url, static_client_config, traceparent, tracestate, command, args, command_name, commands, ui, actions, plan_content, recommended_action, skills, agents, errors, warnings, servers, status, extensions) + return Data(already_in_use, context, copilot_version, producer, reasoning_effort, remote_steerable, selected_model, session_id, start_time, version, event_count, resume_time, error_type, message, provider_call_id, stack, status_code, url, aborted, title, info_type, warning_type, new_model, previous_model, previous_reasoning_effort, new_mode, previous_mode, operation, path, handoff_time, host, remote_session_id, repository, source_type, summary, messages_removed_during_truncation, performed_by, post_truncation_messages_length, post_truncation_tokens_in_messages, pre_truncation_messages_length, pre_truncation_tokens_in_messages, token_limit, tokens_removed_during_truncation, events_removed, up_to_event_id, code_changes, conversation_tokens, current_model, current_tokens, error_reason, model_metrics, session_start_time, shutdown_type, system_tokens, tool_definitions_tokens, total_api_duration_ms, total_premium_requests, base_commit, branch, cwd, git_root, head_commit, host_type, is_initial, messages_length, checkpoint_number, checkpoint_path, compaction_tokens_used, error, messages_removed, post_compaction_tokens, pre_compaction_messages_length, pre_compaction_tokens, request_id, success, summary_content, tokens_removed, agent_mode, attachments, content, interaction_id, source, transformed_content, turn_id, intent, reasoning_id, delta_content, total_response_size_bytes, encrypted_content, message_id, output_tokens, parent_tool_call_id, phase, reasoning_opaque, reasoning_text, tool_requests, api_call_id, cache_read_tokens, cache_write_tokens, copilot_usage, cost, duration, initiator, input_tokens, inter_token_latency_ms, model, quota_snapshots, ttft_ms, reason, arguments, tool_call_id, tool_name, mcp_server_name, mcp_tool_name, partial_output, progress_message, is_user_requested, result, tool_telemetry, allowed_tools, description, name, plugin_name, plugin_version, agent_description, agent_display_name, agent_name, duration_ms, total_tokens, total_tool_calls, tools, hook_invocation_id, hook_type, input, output, metadata, role, kind, permission_request, resolved_by_hook, allow_freeform, choices, question, answer, was_freeform, elicitation_source, mode, requested_schema, action, mcp_request_id, server_name, server_url, static_client_config, traceparent, tracestate, command, args, command_name, commands, ui, actions, plan_content, recommended_action, approved, auto_approve_edits, feedback, selected_action, skills, agents, errors, warnings, servers, status, extensions) def to_dict(self) -> dict: result: dict = {} @@ -2839,8 +2796,6 @@ def to_dict(self) -> dict: result["url"] = from_union([from_str, from_none], self.url) if self.aborted is not None: result["aborted"] = from_union([from_bool, from_none], self.aborted) - if self.background_tasks is not None: - result["backgroundTasks"] = from_union([lambda x: to_class(BackgroundTasks, x), from_none], self.background_tasks) if self.title is not None: result["title"] = from_union([from_str, from_none], self.title) if self.info_type is not None: @@ -2962,7 +2917,7 @@ def to_dict(self) -> dict: if self.attachments is not None: result["attachments"] = from_union([lambda x: from_list(lambda x: to_class(Attachment, x), x), from_none], self.attachments) if self.content is not None: - result["content"] = from_union([from_str, from_none], self.content) + result["content"] = from_union([from_str, lambda x: from_dict(lambda x: from_union([to_float, from_bool, lambda x: from_list(from_str, x), from_str], x), x), from_none], self.content) if self.interaction_id is not None: result["interactionId"] = from_union([from_str, from_none], self.interaction_id) if self.source is not None: @@ -3089,12 +3044,18 @@ def to_dict(self) -> dict: result["choices"] = from_union([lambda x: from_list(from_str, x), from_none], self.choices) if self.question is not None: result["question"] = from_union([from_str, from_none], self.question) + if self.answer is not None: + result["answer"] = from_union([from_str, from_none], self.answer) + if self.was_freeform is not None: + result["wasFreeform"] = from_union([from_bool, from_none], self.was_freeform) if self.elicitation_source is not None: result["elicitationSource"] = from_union([from_str, from_none], self.elicitation_source) if self.mode is not None: result["mode"] = from_union([lambda x: to_enum(Mode, x), from_none], self.mode) if self.requested_schema is not None: result["requestedSchema"] = from_union([lambda x: to_class(RequestedSchema, x), from_none], self.requested_schema) + if self.action is not None: + result["action"] = from_union([lambda x: to_enum(Action, x), from_none], self.action) if self.mcp_request_id is not None: result["mcpRequestId"] = from_union([to_float, from_str, from_none], self.mcp_request_id) if self.server_name is not None: @@ -3123,10 +3084,18 @@ def to_dict(self) -> dict: result["planContent"] = from_union([from_str, from_none], self.plan_content) if self.recommended_action is not None: result["recommendedAction"] = from_union([from_str, from_none], self.recommended_action) + if self.approved is not None: + result["approved"] = from_union([from_bool, from_none], self.approved) + if self.auto_approve_edits is not None: + result["autoApproveEdits"] = from_union([from_bool, from_none], self.auto_approve_edits) + if self.feedback is not None: + result["feedback"] = from_union([from_str, from_none], self.feedback) + if self.selected_action is not None: + result["selectedAction"] = from_union([from_str, from_none], self.selected_action) if self.skills is not None: result["skills"] = from_union([lambda x: from_list(lambda x: to_class(Skill, x), x), from_none], self.skills) if self.agents is not None: - result["agents"] = from_union([lambda x: from_list(lambda x: to_class(DataAgent, x), x), from_none], self.agents) + result["agents"] = from_union([lambda x: from_list(lambda x: to_class(Agent, x), x), from_none], self.agents) if self.errors is not None: result["errors"] = from_union([lambda x: from_list(from_str, x), from_none], self.errors) if self.warnings is not None: @@ -3236,7 +3205,7 @@ class SessionEvent: Error details for timeline display including message and optional diagnostic information - Payload indicating the agent is idle; includes any background tasks still in flight + Payload indicating the session is fully idle with no background tasks in flight Session title change payload containing the new display title @@ -3334,12 +3303,12 @@ class SessionEvent: User input request notification with question and optional predefined choices - User input request completion notification signaling UI dismissal + User input request completion with the user's response Elicitation request; may be form-based (structured input) or URL-based (browser redirect) - Elicitation request completion notification signaling UI dismissal + Elicitation request completion with the user's response Sampling request from an MCP server; contains the server name and a requestId for correlation @@ -3366,7 +3335,7 @@ class SessionEvent: Plan approval request with plan content and available user actions - Plan mode exit completion notification signaling UI dismissal + Plan mode exit completion with the user's approval decision and optional feedback """ id: UUID """Unique event identifier (UUID v4), generated when the event is emitted""" diff --git a/python/copilot/session.py b/python/copilot/session.py index 9bf384fbe..59ec8532b 100644 --- a/python/copilot/session.py +++ b/python/copilot/session.py @@ -17,7 +17,7 @@ from collections.abc import Awaitable, Callable from dataclasses import dataclass from types import TracebackType -from typing import Any, Literal, NotRequired, Required, TypedDict, cast +from typing import TYPE_CHECKING, Any, Literal, NotRequired, Required, TypedDict, cast from ._jsonrpc import JsonRpcError, ProcessExitedError from ._telemetry import get_trace_context, trace_context @@ -41,6 +41,9 @@ SessionUIHandlePendingElicitationParams, SessionUIHandlePendingElicitationParamsResult, ) +from .generated.rpc import ( + ModelCapabilitiesOverride as _RpcModelCapabilitiesOverride, +) from .generated.session_events import ( PermissionRequest, SessionEvent, @@ -49,6 +52,9 @@ ) from .tools import Tool, ToolHandler, ToolInvocation, ToolResult +if TYPE_CHECKING: + from .client import ModelCapabilitiesOverride + # Re-export SessionEvent under an alias used internally SessionEventTypeAlias = SessionEvent @@ -1882,7 +1888,13 @@ async def abort(self) -> None: """ await self._client.request("session.abort", {"sessionId": self.session_id}) - async def set_model(self, model: str, *, reasoning_effort: str | None = None) -> None: + async def set_model( + self, + model: str, + *, + reasoning_effort: str | None = None, + model_capabilities: ModelCapabilitiesOverride | None = None, + ) -> None: """ Change the model for this session. @@ -1893,6 +1905,7 @@ async def set_model(self, model: str, *, reasoning_effort: str | None = None) -> model: Model ID to switch to (e.g., "gpt-4.1", "claude-sonnet-4"). reasoning_effort: Optional reasoning effort level for the new model (e.g., "low", "medium", "high", "xhigh"). + model_capabilities: Override individual model capabilities resolved by the runtime. Raises: Exception: If the session has been destroyed or the connection fails. @@ -1901,10 +1914,18 @@ async def set_model(self, model: str, *, reasoning_effort: str | None = None) -> >>> await session.set_model("gpt-4.1") >>> await session.set_model("claude-sonnet-4.6", reasoning_effort="high") """ + rpc_caps = None + if model_capabilities is not None: + from .client import _capabilities_to_dict + + rpc_caps = _RpcModelCapabilitiesOverride.from_dict( + _capabilities_to_dict(model_capabilities) + ) await self.rpc.model.switch_to( SessionModelSwitchToParams( model_id=model, reasoning_effort=reasoning_effort, + model_capabilities=rpc_caps, ) ) diff --git a/python/e2e/test_session_config.py b/python/e2e/test_session_config.py new file mode 100644 index 000000000..e9c203b79 --- /dev/null +++ b/python/e2e/test_session_config.py @@ -0,0 +1,99 @@ +"""E2E tests for session configuration including model capabilities overrides.""" + +import base64 +import os + +import pytest + +from copilot import ModelCapabilitiesOverride, ModelSupportsOverride +from copilot.session import PermissionHandler + +from .testharness import E2ETestContext + +pytestmark = pytest.mark.asyncio(loop_scope="module") + + +def has_image_url_content(exchanges: list[dict]) -> bool: + """Check if any exchange contains an image_url content part in user messages.""" + for ex in exchanges: + for msg in ex.get("request", {}).get("messages", []): + if msg.get("role") == "user" and isinstance(msg.get("content"), list): + if any(p.get("type") == "image_url" for p in msg["content"]): + return True + return False + + +PNG_1X1 = base64.b64decode( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==" +) +VIEW_IMAGE_PROMPT = "Use the view tool to look at the file test.png and describe what you see" + + +class TestSessionConfig: + """Tests for session configuration including model capabilities overrides.""" + + async def test_vision_disabled_then_enabled_via_setmodel(self, ctx: E2ETestContext): + png_path = os.path.join(ctx.work_dir, "test.png") + with open(png_path, "wb") as f: + f.write(PNG_1X1) + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + model_capabilities=ModelCapabilitiesOverride( + supports=ModelSupportsOverride(vision=False) + ), + ) + + # Turn 1: vision off — no image_url expected + await session.send_and_wait(VIEW_IMAGE_PROMPT) + traffic_after_t1 = await ctx.get_exchanges() + assert not has_image_url_content(traffic_after_t1) + + # Switch vision on + await session.set_model( + "claude-sonnet-4.5", + model_capabilities=ModelCapabilitiesOverride( + supports=ModelSupportsOverride(vision=True) + ), + ) + + # Turn 2: vision on — image_url expected in new exchanges + await session.send_and_wait(VIEW_IMAGE_PROMPT) + traffic_after_t2 = await ctx.get_exchanges() + new_exchanges = traffic_after_t2[len(traffic_after_t1) :] + assert has_image_url_content(new_exchanges) + + await session.disconnect() + + async def test_vision_enabled_then_disabled_via_setmodel(self, ctx: E2ETestContext): + png_path = os.path.join(ctx.work_dir, "test.png") + with open(png_path, "wb") as f: + f.write(PNG_1X1) + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + model_capabilities=ModelCapabilitiesOverride( + supports=ModelSupportsOverride(vision=True) + ), + ) + + # Turn 1: vision on — image_url expected + await session.send_and_wait(VIEW_IMAGE_PROMPT) + traffic_after_t1 = await ctx.get_exchanges() + assert has_image_url_content(traffic_after_t1) + + # Switch vision off + await session.set_model( + "claude-sonnet-4.5", + model_capabilities=ModelCapabilitiesOverride( + supports=ModelSupportsOverride(vision=False) + ), + ) + + # Turn 2: vision off — no image_url expected in new exchanges + await session.send_and_wait(VIEW_IMAGE_PROMPT) + traffic_after_t2 = await ctx.get_exchanges() + new_exchanges = traffic_after_t2[len(traffic_after_t1) :] + assert not has_image_url_content(new_exchanges) + + await session.disconnect() diff --git a/scripts/codegen/csharp.ts b/scripts/codegen/csharp.ts index 304324421..d60cfbb96 100644 --- a/scripts/codegen/csharp.ts +++ b/scripts/codegen/csharp.ts @@ -620,7 +620,7 @@ function resolveRpcType(schema: JSONSchema7, isRequired: boolean, parentClassNam return isRequired ? enumName : `${enumName}?`; } if (schema.type === "object" && schema.properties) { - const className = `${parentClassName}${propName}`; + const className = (schema.title as string) ?? `${parentClassName}${propName}`; classes.push(emitRpcClass(className, schema, "public", classes)); return isRequired ? className : `${className}?`; } diff --git a/scripts/codegen/python.ts b/scripts/codegen/python.ts index 0340cf1f1..71e44943f 100644 --- a/scripts/codegen/python.ts +++ b/scripts/codegen/python.ts @@ -57,14 +57,36 @@ function replaceBalancedBrackets(code: string, prefix: string, replacer: (inner: return result; } +/** Split a string by commas, but only at the top bracket depth (ignores commas inside [...]) */ +function splitTopLevelCommas(s: string): string[] { + const parts: string[] = []; + let depth = 0; + let start = 0; + for (let i = 0; i < s.length; i++) { + if (s[i] === "[") depth++; + else if (s[i] === "]") depth--; + else if (s[i] === "," && depth === 0) { + parts.push(s.slice(start, i)); + start = i + 1; + } + } + parts.push(s.slice(start)); + return parts; +} + function modernizePython(code: string): string { // Replace Optional[X] with X | None (handles arbitrarily nested brackets) code = replaceBalancedBrackets(code, "Optional", (inner) => `${inner} | None`); - // Replace Union[X, Y] with X | Y - code = replaceBalancedBrackets(code, "Union", (inner) => { - return inner.split(",").map((s: string) => s.trim()).join(" | "); - }); + // Replace Union[X, Y] with X | Y (split only at top-level commas, not inside brackets) + // Run iteratively to handle nested Union inside Dict/List + let prev = ""; + while (prev !== code) { + prev = code; + code = replaceBalancedBrackets(code, "Union", (inner) => { + return splitTopLevelCommas(inner).map((s: string) => s.trim()).join(" | "); + }); + } // Replace List[X] with list[X] code = code.replace(/\bList\[/g, "list["); diff --git a/test/harness/package-lock.json b/test/harness/package-lock.json index 5d055e680..67e294c83 100644 --- a/test/harness/package-lock.json +++ b/test/harness/package-lock.json @@ -9,7 +9,7 @@ "version": "1.0.0", "license": "ISC", "devDependencies": { - "@github/copilot": "^1.0.17", + "@github/copilot": "^1.0.20-1", "@modelcontextprotocol/sdk": "^1.26.0", "@types/node": "^25.3.3", "openai": "^6.17.0", @@ -462,27 +462,27 @@ } }, "node_modules/@github/copilot": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-1.0.17.tgz", - "integrity": "sha512-RTJ+kEKOdidjuOs8ozsoBdz+94g7tFJIEu5kz1P2iwJhsL+iIA5rtn9/jXOF0hAI3CLSXKZoSd66cqHrn4rb1A==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-1.0.20-1.tgz", + "integrity": "sha512-a34M4P6XcKFy1sDubqn54qakQxeWwA44vKaOh3oNZT8vgna9R4ap2NYGnM8fn7XDAdlJ9QgW6Xt7dfPGwKkt/A==", "dev": true, "license": "SEE LICENSE IN LICENSE.md", "bin": { "copilot": "npm-loader.js" }, "optionalDependencies": { - "@github/copilot-darwin-arm64": "1.0.17", - "@github/copilot-darwin-x64": "1.0.17", - "@github/copilot-linux-arm64": "1.0.17", - "@github/copilot-linux-x64": "1.0.17", - "@github/copilot-win32-arm64": "1.0.17", - "@github/copilot-win32-x64": "1.0.17" + "@github/copilot-darwin-arm64": "1.0.20-1", + "@github/copilot-darwin-x64": "1.0.20-1", + "@github/copilot-linux-arm64": "1.0.20-1", + "@github/copilot-linux-x64": "1.0.20-1", + "@github/copilot-win32-arm64": "1.0.20-1", + "@github/copilot-win32-x64": "1.0.20-1" } }, "node_modules/@github/copilot-darwin-arm64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-1.0.17.tgz", - "integrity": "sha512-LSv66P8611y/UjTESnaHLYqLl9kA9yBYsaocZPQoOsvMgCmktgaBgUWq+KMpLMicaFN0jBAE5F0Ve7dW6N9X3A==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-1.0.20-1.tgz", + "integrity": "sha512-tip/KyjhRQG7OMAR8rBWrFcPk3XFQQlajozIMPxEA7+qwgMBOlaGcO0iuDEdF5vAtYXhUPPAI/tbuUqkueoJEA==", "cpu": [ "arm64" ], @@ -497,9 +497,9 @@ } }, "node_modules/@github/copilot-darwin-x64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-1.0.17.tgz", - "integrity": "sha512-yqRS0/8kYTGl4VvfJ/QOtHTeYF+DnAWNUReZgt2U0AEP3zgj4z4hxSH7D2PsO/488L4KsBmmcnJr13HmBGiT/w==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-1.0.20-1.tgz", + "integrity": "sha512-d/Etng6335TF1Dcw37XFtjKKZqQbqh9trXg5GhMySUamo4UolykylWJuhs+suCx2JJc1lGzPVAdGOxAvj+4P3Q==", "cpu": [ "x64" ], @@ -514,9 +514,9 @@ } }, "node_modules/@github/copilot-linux-arm64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-1.0.17.tgz", - "integrity": "sha512-TOK0ma0A24zmQJslkGxUk+KnMFpiqquWEXB5sIv/5Ci45Qi7s0BRWTnqtiJ8Vahwb/wkja6KarHkLA27+ETGUA==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-1.0.20-1.tgz", + "integrity": "sha512-ptwwVk/uMEoVdGTbhfC8CLtSCq3agnRKlD+iojabcg5K0y0HbaEGIaOeJle0uARpqeyLADgoUkMbth/wWQI2gQ==", "cpu": [ "arm64" ], @@ -531,9 +531,9 @@ } }, "node_modules/@github/copilot-linux-x64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-1.0.17.tgz", - "integrity": "sha512-4Yum3uaAuTM/SiNtzchsO/G/144Bi/Z4FEcearW6WsGDvS6cRwSJeudOM0y4aoy4BHcv8+yw7YuXH5BHC3SAiA==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-1.0.20-1.tgz", + "integrity": "sha512-sUuR5uVR1/Ndew/pSEQP4vLy2iohW+PMD96R+gzJkF77soe+PfFR7R6Py1VWmwAK1MDblyilDfMcusYLXK48LA==", "cpu": [ "x64" ], @@ -548,9 +548,9 @@ } }, "node_modules/@github/copilot-win32-arm64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-1.0.17.tgz", - "integrity": "sha512-I1ferbfQ0aS149WyEUw6XS1sFixwTUUm13BPBQ3yMzD8G2SaoxTsdYdlhZpkVfkfh/rUYyvMKKi9VNxoVYOlDA==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-1.0.20-1.tgz", + "integrity": "sha512-gk4belEoOHfQH2pJf0GPh2t1N4suIg1mhwJQHveGi5av22XZzYjY7yarNom+YCqc692MAuYsfNF0wXXSij3wBg==", "cpu": [ "arm64" ], @@ -565,9 +565,9 @@ } }, "node_modules/@github/copilot-win32-x64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-1.0.17.tgz", - "integrity": "sha512-kjiOxY9ibS+rPp9XFpPdfdYzluEL3SHN8R5/fnA7RO+kZEJ4FDKWJjAiec3tgVkEHQT3UwNuVa/u3TdfYNF15w==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-1.0.20-1.tgz", + "integrity": "sha512-ypRD1iawRw8a0qzhp4fq4ZqvqL86mk2UZNWyuTM8HOe2o3+SrZbveXpEk7gUYJ4ShLhqLVywJHs4+4yPkv5p+A==", "cpu": [ "x64" ], diff --git a/test/harness/package.json b/test/harness/package.json index 257caf35c..48f43e856 100644 --- a/test/harness/package.json +++ b/test/harness/package.json @@ -11,7 +11,7 @@ "test": "vitest run" }, "devDependencies": { - "@github/copilot": "^1.0.17", + "@github/copilot": "^1.0.20-1", "@modelcontextprotocol/sdk": "^1.26.0", "@types/node": "^25.3.3", "openai": "^6.17.0", diff --git a/test/snapshots/session_config/vision_disabled_then_enabled_via_setmodel.yaml b/test/snapshots/session_config/vision_disabled_then_enabled_via_setmodel.yaml new file mode 100644 index 000000000..0ac6e9748 --- /dev/null +++ b/test/snapshots/session_config/vision_disabled_then_enabled_via_setmodel.yaml @@ -0,0 +1,134 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + content: I'll view the test.png file for you. + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - role: assistant + tool_calls: + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - messages: + - role: system + content: ${system} + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + content: I'll view the test.png file for you. + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - role: tool + tool_call_id: toolcall_0 + content: Intent logged + - role: tool + tool_call_id: toolcall_1 + content: Viewed image file successfully. + - role: assistant + content: The image shows the **GitHub Copilot logo** - a stylized hexagonal icon in shades of blue and purple with a + distinctive geometric design. The logo features a three-dimensional hexagon shape with gradient colors + transitioning from deep blue to lighter blue/purple tones, creating a modern, tech-forward aesthetic. This is + GitHub's AI pair programming assistant branding. + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + tool_calls: + - id: toolcall_2 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - role: assistant + tool_calls: + - id: toolcall_3 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - messages: + - role: system + content: ${system} + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + content: I'll view the test.png file for you. + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - role: tool + tool_call_id: toolcall_0 + content: Intent logged + - role: tool + tool_call_id: toolcall_1 + content: Viewed image file successfully. + - role: assistant + content: The image shows the **GitHub Copilot logo** - a stylized hexagonal icon in shades of blue and purple with a + distinctive geometric design. The logo features a three-dimensional hexagon shape with gradient colors + transitioning from deep blue to lighter blue/purple tones, creating a modern, tech-forward aesthetic. This is + GitHub's AI pair programming assistant branding. + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + tool_calls: + - id: toolcall_2 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - id: toolcall_3 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - role: tool + tool_call_id: toolcall_2 + content: Intent logged + - role: tool + tool_call_id: toolcall_3 + content: Viewed image file successfully. + - role: user + - role: assistant + content: >- + Looking at the image from test.png, I can see what appears to be a **very light, nearly blank image** with a + pale yellowish or off-white color. The image is quite minimal - it looks like either: + + + - A blank/empty canvas with a subtle cream or pale yellow tint + + - A very light gradient or texture that's barely visible + + - Possibly a placeholder or background image + + + There's no distinct text, shapes, logos, or other clear visual elements visible in this image. It's + essentially a solid light color fill, possibly with very subtle variations in tone. diff --git a/test/snapshots/session_config/vision_enabled_then_disabled_via_setmodel.yaml b/test/snapshots/session_config/vision_enabled_then_disabled_via_setmodel.yaml new file mode 100644 index 000000000..0ba017c21 --- /dev/null +++ b/test/snapshots/session_config/vision_enabled_then_disabled_via_setmodel.yaml @@ -0,0 +1,195 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + content: I'll use the view tool to look at the test.png file and describe what I see. + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - role: assistant + tool_calls: + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - messages: + - role: system + content: ${system} + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + content: I'll use the view tool to look at the test.png file and describe what I see. + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - role: tool + tool_call_id: toolcall_0 + content: Intent logged + - role: tool + tool_call_id: toolcall_1 + content: Viewed image file successfully. + - role: user + - role: assistant + content: >- + I can see the image file test.png. It appears to be a mostly white or very light-colored image with minimal + visible content. The image seems to be either: + + + 1. A blank or nearly blank white image + + 2. An image with extremely faint or low-contrast content that's difficult to distinguish + + 3. Possibly a placeholder or test image + + + The image doesn't contain any clearly visible text, graphics, or distinguishable elements from what I can + observe. It's essentially showing as a white/very light surface without obvious features. + + + Is this the expected content, or were you anticipating something different in this test image? + - messages: + - role: system + content: ${system} + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + content: I'll use the view tool to look at the test.png file and describe what I see. + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - role: tool + tool_call_id: toolcall_0 + content: Intent logged + - role: tool + tool_call_id: toolcall_1 + content: Viewed image file successfully. + - role: assistant + content: >- + I can see the image file test.png. It appears to be a mostly white or very light-colored image with minimal + visible content. The image seems to be either: + + + 1. A blank or nearly blank white image + + 2. An image with extremely faint or low-contrast content that's difficult to distinguish + + 3. Possibly a placeholder or test image + + + The image doesn't contain any clearly visible text, graphics, or distinguishable elements from what I can + observe. It's essentially showing as a white/very light surface without obvious features. + + + Is this the expected content, or were you anticipating something different in this test image? + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + tool_calls: + - id: toolcall_2 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - role: assistant + tool_calls: + - id: toolcall_3 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - messages: + - role: system + content: ${system} + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + content: I'll use the view tool to look at the test.png file and describe what I see. + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - role: tool + tool_call_id: toolcall_0 + content: Intent logged + - role: tool + tool_call_id: toolcall_1 + content: Viewed image file successfully. + - role: assistant + content: >- + I can see the image file test.png. It appears to be a mostly white or very light-colored image with minimal + visible content. The image seems to be either: + + + 1. A blank or nearly blank white image + + 2. An image with extremely faint or low-contrast content that's difficult to distinguish + + 3. Possibly a placeholder or test image + + + The image doesn't contain any clearly visible text, graphics, or distinguishable elements from what I can + observe. It's essentially showing as a white/very light surface without obvious features. + + + Is this the expected content, or were you anticipating something different in this test image? + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + tool_calls: + - id: toolcall_2 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - id: toolcall_3 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - role: tool + tool_call_id: toolcall_2 + content: Intent logged + - role: tool + tool_call_id: toolcall_3 + content: Viewed image file successfully. + - role: assistant + content: >- + Looking at test.png again, I can see it's a predominantly white or very light-colored image. The image appears + to be blank or contains very minimal content that isn't easily visible against the white background. There are + no clearly distinguishable text, shapes, graphics, or other visual elements that I can identify. + + + The image seems to be a simple test image - possibly intentionally blank or nearly blank for testing purposes.