|
1 | | -namespace ManagedCode.CodexSharpSDK.Tests; |
| 1 | +using ManagedCode.CodexSharpSDK.Client; |
| 2 | +using ManagedCode.CodexSharpSDK.Execution; |
| 3 | +using ManagedCode.CodexSharpSDK.Tests.Shared; |
| 4 | + |
| 5 | +namespace ManagedCode.CodexSharpSDK.Tests.Integration; |
2 | 6 |
|
3 | 7 | public class CodexExecIntegrationTests |
4 | 8 | { |
5 | | - private const string SandboxRootDirectoryName = ".sandbox"; |
6 | | - private const string SandboxDirectoryPrefix = "codexsharp-integration-"; |
| 9 | + private const string FirstPrompt = "Reply with short plain text: first."; |
| 10 | + private const string SecondPrompt = "Reply with short plain text: second."; |
| 11 | + private const string InvalidModel = "__codexsharp_invalid_model__"; |
7 | 12 |
|
8 | 13 | [Test] |
9 | 14 | public async Task RunAsync_UsesDefaultProcessRunner_EndToEnd() |
10 | 15 | { |
11 | | - if (OperatingSystem.IsWindows()) |
| 16 | + var settings = RealCodexTestSupport.TryGetSettings(); |
| 17 | + if (settings is null) |
12 | 18 | { |
13 | 19 | return; |
14 | 20 | } |
15 | 21 |
|
16 | | - var sandboxDirectory = CreateSandboxDirectory(); |
17 | | - try |
18 | | - { |
19 | | - var argsLog = Path.Combine(sandboxDirectory, "args.log"); |
20 | | - var inputLog = Path.Combine(sandboxDirectory, "input.log"); |
21 | | - var executablePath = Path.Combine(sandboxDirectory, "fake-codex.sh"); |
22 | | - |
23 | | - WriteExecutableScript(executablePath, BuildSuccessScript(argsLog, inputLog, "thread_it_1", "integration_ok")); |
24 | | - |
25 | | - await using var client = new CodexClient(new CodexOptions |
26 | | - { |
27 | | - CodexPathOverride = executablePath, |
28 | | - }); |
29 | | - |
30 | | - var thread = client.StartThread(new ThreadOptions |
31 | | - { |
32 | | - Model = "gpt-5.3-codex", |
33 | | - SandboxMode = SandboxMode.WorkspaceWrite, |
34 | | - }); |
35 | | - |
36 | | - var result = await thread.RunAsync("hello from integration"); |
37 | | - |
38 | | - await Assert.That(thread.Id).IsEqualTo("thread_it_1"); |
39 | | - await Assert.That(result.FinalResponse).IsEqualTo("integration_ok"); |
40 | | - await Assert.That(result.Usage).IsNotNull(); |
41 | | - |
42 | | - var args = await File.ReadAllLinesAsync(argsLog); |
43 | | - await Assert.That(args).Contains("exec"); |
44 | | - await Assert.That(args).Contains("--experimental-json"); |
45 | | - await Assert.That(args).Contains("--model"); |
46 | | - await Assert.That(args).Contains("gpt-5.3-codex"); |
47 | | - await Assert.That(args).Contains("--sandbox"); |
48 | | - await Assert.That(args).Contains("workspace-write"); |
49 | | - |
50 | | - var input = await File.ReadAllTextAsync(inputLog); |
51 | | - await Assert.That(input).IsEqualTo("hello from integration"); |
52 | | - } |
53 | | - finally |
| 22 | + var exec = new CodexExec(); |
| 23 | + using var cancellation = new CancellationTokenSource(TimeSpan.FromMinutes(2)); |
| 24 | + |
| 25 | + var lines = await DrainToListAsync(exec.RunAsync(new CodexExecArgs |
54 | 26 | { |
55 | | - CleanupSandboxDirectory(sandboxDirectory); |
56 | | - } |
| 27 | + Input = FirstPrompt, |
| 28 | + Model = settings.Model, |
| 29 | + ModelReasoningEffort = ModelReasoningEffort.Minimal, |
| 30 | + SandboxMode = SandboxMode.WorkspaceWrite, |
| 31 | + NetworkAccessEnabled = true, |
| 32 | + ApiKey = settings.ApiKey, |
| 33 | + CancellationToken = cancellation.Token, |
| 34 | + })); |
| 35 | + |
| 36 | + await Assert.That(lines.Any(line => line.Contains("\"type\":\"thread.started\"", StringComparison.Ordinal))).IsTrue(); |
| 37 | + await Assert.That(lines.Any(line => line.Contains("\"type\":\"turn.completed\"", StringComparison.Ordinal))).IsTrue(); |
57 | 38 | } |
58 | 39 |
|
59 | 40 | [Test] |
60 | 41 | public async Task RunAsync_SecondCallPassesResumeArgument_EndToEnd() |
61 | 42 | { |
62 | | - if (OperatingSystem.IsWindows()) |
| 43 | + var settings = RealCodexTestSupport.TryGetSettings(); |
| 44 | + if (settings is null) |
63 | 45 | { |
64 | 46 | return; |
65 | 47 | } |
66 | 48 |
|
67 | | - var sandboxDirectory = CreateSandboxDirectory(); |
68 | | - try |
69 | | - { |
70 | | - var argsLog = Path.Combine(sandboxDirectory, "args.log"); |
71 | | - var inputLog = Path.Combine(sandboxDirectory, "input.log"); |
72 | | - var executablePath = Path.Combine(sandboxDirectory, "fake-codex.sh"); |
73 | | - |
74 | | - WriteExecutableScript(executablePath, BuildSuccessScript(argsLog, inputLog, "thread_it_2", "ok")); |
75 | | - |
76 | | - await using var client = new CodexClient(new CodexOptions |
77 | | - { |
78 | | - CodexPathOverride = executablePath, |
79 | | - }); |
80 | | - |
81 | | - var thread = client.StartThread(); |
| 49 | + using var client = RealCodexTestSupport.CreateClient(settings); |
| 50 | + using var cancellation = new CancellationTokenSource(TimeSpan.FromMinutes(3)); |
82 | 51 |
|
83 | | - await thread.RunAsync("first"); |
84 | | - await thread.RunAsync("second"); |
85 | | - |
86 | | - var args = await File.ReadAllLinesAsync(argsLog); |
87 | | - var resumeIndex = Array.IndexOf(args, "resume"); |
88 | | - |
89 | | - await Assert.That(resumeIndex).IsGreaterThan(-1); |
90 | | - await Assert.That(args[resumeIndex + 1]).IsEqualTo("thread_it_2"); |
91 | | - } |
92 | | - finally |
| 52 | + var thread = client.StartThread(new ThreadOptions |
93 | 53 | { |
94 | | - CleanupSandboxDirectory(sandboxDirectory); |
95 | | - } |
| 54 | + Model = settings.Model, |
| 55 | + ModelReasoningEffort = ModelReasoningEffort.Minimal, |
| 56 | + SandboxMode = SandboxMode.WorkspaceWrite, |
| 57 | + NetworkAccessEnabled = true, |
| 58 | + }); |
| 59 | + |
| 60 | + var firstResult = await thread.RunAsync( |
| 61 | + FirstPrompt, |
| 62 | + new TurnOptions { CancellationToken = cancellation.Token }); |
| 63 | + |
| 64 | + var threadId = thread.Id; |
| 65 | + await Assert.That(threadId).IsNotNull(); |
| 66 | + await Assert.That(firstResult.Usage).IsNotNull(); |
| 67 | + |
| 68 | + var secondResult = await thread.RunAsync( |
| 69 | + SecondPrompt, |
| 70 | + new TurnOptions { CancellationToken = cancellation.Token }); |
| 71 | + |
| 72 | + await Assert.That(secondResult.Usage).IsNotNull(); |
| 73 | + await Assert.That(thread.Id).IsEqualTo(threadId); |
96 | 74 | } |
97 | 75 |
|
98 | 76 | [Test] |
99 | 77 | public async Task RunAsync_PropagatesNonZeroExitCode_EndToEnd() |
100 | 78 | { |
101 | | - if (OperatingSystem.IsWindows()) |
| 79 | + var settings = RealCodexTestSupport.TryGetSettings(); |
| 80 | + if (settings is null) |
102 | 81 | { |
103 | 82 | return; |
104 | 83 | } |
105 | 84 |
|
106 | | - var sandboxDirectory = CreateSandboxDirectory(); |
107 | | - try |
108 | | - { |
109 | | - var executablePath = Path.Combine(sandboxDirectory, "fake-codex.sh"); |
110 | | - WriteExecutableScript(executablePath, BuildFailureScript()); |
| 85 | + var exec = new CodexExec(); |
| 86 | + using var cancellation = new CancellationTokenSource(TimeSpan.FromMinutes(2)); |
111 | 87 |
|
112 | | - await using var client = new CodexClient(new CodexOptions |
113 | | - { |
114 | | - CodexPathOverride = executablePath, |
115 | | - }); |
116 | | - |
117 | | - var thread = client.StartThread(); |
118 | | - var action = async () => await thread.RunAsync("trigger failure"); |
119 | | - |
120 | | - var exception = await Assert.That(action).ThrowsException(); |
121 | | - await Assert.That(exception).IsTypeOf<InvalidOperationException>(); |
122 | | - await Assert.That(exception!.Message).Contains("exited with code 9"); |
123 | | - } |
124 | | - finally |
| 88 | + var action = async () => await DrainAsync(exec.RunAsync(new CodexExecArgs |
125 | 89 | { |
126 | | - CleanupSandboxDirectory(sandboxDirectory); |
127 | | - } |
| 90 | + Input = FirstPrompt, |
| 91 | + Model = InvalidModel, |
| 92 | + SandboxMode = SandboxMode.WorkspaceWrite, |
| 93 | + NetworkAccessEnabled = true, |
| 94 | + ApiKey = settings.ApiKey, |
| 95 | + CancellationToken = cancellation.Token, |
| 96 | + })); |
| 97 | + |
| 98 | + var exception = await Assert.That(action).ThrowsException(); |
| 99 | + await Assert.That(exception).IsTypeOf<InvalidOperationException>(); |
| 100 | + await Assert.That(exception!.Message).Contains("exited with code"); |
128 | 101 | } |
129 | 102 |
|
130 | | - private static string CreateSandboxDirectory() |
| 103 | + private static async Task DrainAsync(IAsyncEnumerable<string> lines) |
131 | 104 | { |
132 | | - var testsDirectory = Environment.CurrentDirectory; |
133 | | - var sandboxRootDirectory = Path.Combine(testsDirectory, SandboxRootDirectoryName); |
134 | | - Directory.CreateDirectory(sandboxRootDirectory); |
135 | | - |
136 | | - var directory = Path.Combine(sandboxRootDirectory, $"{SandboxDirectoryPrefix}{Guid.NewGuid():N}"); |
137 | | - Directory.CreateDirectory(directory); |
138 | | - return directory; |
139 | | - } |
140 | | - |
141 | | - private static void CleanupSandboxDirectory(string directory) |
142 | | - { |
143 | | - try |
| 105 | + await foreach (var _ in lines) |
144 | 106 | { |
145 | | - if (Directory.Exists(directory)) |
146 | | - { |
147 | | - Directory.Delete(directory, recursive: true); |
148 | | - } |
| 107 | + // Intentionally empty. |
149 | 108 | } |
150 | | - catch |
151 | | - { |
152 | | - // Suppress cleanup errors. |
153 | | - } |
154 | | - } |
155 | | - |
156 | | - private static string BuildSuccessScript(string argsLog, string inputLog, string threadId, string response) |
157 | | - { |
158 | | - var escapedThreadId = EscapeJsonString(threadId); |
159 | | - var escapedResponse = EscapeJsonString(response); |
160 | | - |
161 | | - return string.Join('\n', |
162 | | - [ |
163 | | - "#!/usr/bin/env bash", |
164 | | - "set -euo pipefail", |
165 | | - $"args_log={ToBashLiteral(argsLog)}", |
166 | | - $"input_log={ToBashLiteral(inputLog)}", |
167 | | - "printf '%s\\n' \"$@\" > \"$args_log\"", |
168 | | - "cat > \"$input_log\"", |
169 | | - $"echo '{{\"type\":\"thread.started\",\"thread_id\":\"{escapedThreadId}\"}}'", |
170 | | - $"echo '{{\"type\":\"item.completed\",\"item\":{{\"id\":\"item_1\",\"type\":\"agent_message\",\"text\":\"{escapedResponse}\"}}}}'", |
171 | | - "echo '{\"type\":\"turn.completed\",\"usage\":{\"input_tokens\":2,\"cached_input_tokens\":0,\"output_tokens\":3}}'", |
172 | | - ]) + "\n"; |
173 | 109 | } |
174 | 110 |
|
175 | | - private static string BuildFailureScript() |
| 111 | + private static async Task<List<string>> DrainToListAsync(IAsyncEnumerable<string> lines) |
176 | 112 | { |
177 | | - return """ |
178 | | -#!/usr/bin/env bash |
179 | | -set -euo pipefail |
180 | | -cat > /dev/null |
181 | | -echo "forced integration failure" >&2 |
182 | | -exit 9 |
183 | | -"""; |
184 | | - } |
| 113 | + var result = new List<string>(); |
185 | 114 |
|
186 | | - private static string ToBashLiteral(string value) |
187 | | - { |
188 | | - return $"'{value.Replace("'", "'\"'\"'")}'"; |
189 | | - } |
190 | | - |
191 | | - private static string EscapeJsonString(string value) |
192 | | - { |
193 | | - return value.Replace("\\", "\\\\").Replace("\"", "\\\""); |
194 | | - } |
195 | | - |
196 | | - private static void WriteExecutableScript(string path, string scriptContent) |
197 | | - { |
198 | | - File.WriteAllText(path, scriptContent); |
199 | | - if (!OperatingSystem.IsWindows()) |
| 115 | + await foreach (var line in lines) |
200 | 116 | { |
201 | | - File.SetUnixFileMode(path, UnixFileMode.UserRead | UnixFileMode.UserWrite | UnixFileMode.UserExecute); |
| 117 | + result.Add(line); |
202 | 118 | } |
| 119 | + |
| 120 | + return result; |
203 | 121 | } |
204 | 122 | } |
0 commit comments