Skip to content

Commit 562d86b

Browse files
AMaini503Aayush Maini
andauthored
Fix Docker scan timeout cancellation not working (#1729)
* Fix Docker scan timeout cancellation not working The cancellation token passed to CreateAndRunContainerAsync was not being honored when blocked on ReadOutputToEndAsync, causing scans to hang indefinitely despite the 10-minute timeout. Changes: - Add ReadContainerOutputAsync with Task.WhenAny pattern to race read against cancellation - Kill container when cancelled to unblock socket-level read - Add DockerServiceStepTelemetryRecord for per-step telemetry to identify hung operations * Address PR review feedback - Observe readTask without blocking using ContinueWith - Make RemoveContainerAsync idempotent (ignore DockerContainerNotFoundException) - Capture all exceptions in step telemetry records * Use stream.Dispose() instead of ContinueWith per review feedback * Observe readTask after stream disposal to prevent unobserved task exceptions * Simplify task observation to empty continuation with OnlyOnFaulted * Use finally block for container cleanup to prevent leaks * Capture exception in ReadOutput step telemetry * Fix XML doc comment --------- Co-authored-by: Aayush Maini <aamaini@microsoft.com>
1 parent cd64b9b commit 562d86b

2 files changed

Lines changed: 212 additions & 39 deletions

File tree

src/Microsoft.ComponentDetection.Common/DockerService.cs

Lines changed: 171 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -184,78 +184,210 @@ public async Task<ContainerDetails> InspectImageAsync(string image, Cancellation
184184

185185
public async Task<(string Stdout, string Stderr)> CreateAndRunContainerAsync(string image, IList<string> command, CancellationToken cancellationToken = default)
186186
{
187+
var commandJson = JsonSerializer.Serialize(command);
188+
189+
// Summary record captures overall operation including stdout/stderr
187190
using var record = new DockerServiceTelemetryRecord
188191
{
189192
Image = image,
190-
Command = JsonSerializer.Serialize(command),
193+
Command = commandJson,
191194
};
195+
192196
await this.TryPullImageAsync(image, cancellationToken);
193197
var container = await CreateContainerAsync(image, command, cancellationToken);
194198
record.Container = JsonSerializer.Serialize(container);
195-
var stream = await AttachContainerAsync(container.ID, cancellationToken);
196-
await StartContainerAsync(container.ID, cancellationToken);
197-
var (stdout, stderr) = await stream.ReadOutputToEndAsync(cancellationToken);
198-
record.Stdout = stdout;
199-
record.Stderr = stderr;
200-
await RemoveContainerAsync(container.ID, cancellationToken);
201-
return (stdout, stderr);
199+
200+
try
201+
{
202+
var stream = await AttachContainerAsync(container.ID, cancellationToken);
203+
await StartContainerAsync(container.ID, cancellationToken);
204+
205+
var (stdout, stderr) = await ReadContainerOutputAsync(stream, container.ID, image, cancellationToken);
206+
207+
record.Stdout = stdout;
208+
record.Stderr = stderr;
209+
210+
return (stdout, stderr);
211+
}
212+
finally
213+
{
214+
// Best-effort container cleanup; RemoveContainerAsync already handles not-found.
215+
await RemoveContainerAsync(container.ID, CancellationToken.None);
216+
}
217+
}
218+
219+
/// <summary>
220+
/// Reads container output with proper cancellation support.
221+
/// ReadOutputToEndAsync doesn't properly honor cancellation when blocked on socket read,
222+
/// so we race it against a cancellation-aware delay and dispose the stream if cancelled.
223+
/// </summary>
224+
private static async Task<(string Stdout, string Stderr)> ReadContainerOutputAsync(
225+
MultiplexedStream stream,
226+
string containerId,
227+
string image,
228+
CancellationToken cancellationToken)
229+
{
230+
using var record = new DockerServiceStepTelemetryRecord
231+
{
232+
Step = "ReadOutput",
233+
ContainerId = containerId,
234+
Image = image,
235+
};
236+
237+
try
238+
{
239+
var readTask = stream.ReadOutputToEndAsync(CancellationToken.None);
240+
var delayTask = Task.Delay(Timeout.Infinite, cancellationToken);
241+
242+
var completedTask = await Task.WhenAny(readTask, delayTask);
243+
244+
if (completedTask == delayTask)
245+
{
246+
record.WasCancelled = true;
247+
248+
// Dispose the stream to unblock any pending read operation
249+
stream.Dispose();
250+
251+
// Observe the readTask to prevent unobserved task exceptions.
252+
// Running any continuation automatically marks the exception as observed.
253+
_ = readTask.ContinueWith(
254+
static _ => { },
255+
CancellationToken.None,
256+
TaskContinuationOptions.OnlyOnFaulted,
257+
TaskScheduler.Default);
258+
259+
// Caller is responsible for container cleanup via finally block
260+
cancellationToken.ThrowIfCancellationRequested();
261+
}
262+
263+
return await readTask;
264+
}
265+
catch (Exception ex)
266+
{
267+
record.ExceptionMessage = ex.Message;
268+
throw;
269+
}
202270
}
203271

204272
private static async Task<CreateContainerResponse> CreateContainerAsync(
205273
string image,
206274
IList<string> command,
207275
CancellationToken cancellationToken = default)
208276
{
209-
var parameters = new CreateContainerParameters
277+
using var record = new DockerServiceStepTelemetryRecord
210278
{
279+
Step = "CreateContainer",
211280
Image = image,
212-
Cmd = command,
213-
NetworkDisabled = true,
214-
HostConfig = new HostConfig
215-
{
216-
CapDrop =
217-
[
218-
"all",
219-
],
220-
SecurityOpt =
221-
[
222-
"no-new-privileges",
223-
],
224-
Binds =
225-
[
226-
$"{Path.GetTempPath()}:/tmp",
227-
"/var/run/docker.sock:/var/run/docker.sock",
228-
],
229-
},
281+
Command = JsonSerializer.Serialize(command),
230282
};
231-
return await Client.Containers.CreateContainerAsync(parameters, cancellationToken);
283+
284+
try
285+
{
286+
var parameters = new CreateContainerParameters
287+
{
288+
Image = image,
289+
Cmd = command,
290+
NetworkDisabled = true,
291+
HostConfig = new HostConfig
292+
{
293+
CapDrop =
294+
[
295+
"all",
296+
],
297+
SecurityOpt =
298+
[
299+
"no-new-privileges",
300+
],
301+
Binds =
302+
[
303+
$"{Path.GetTempPath()}:/tmp",
304+
"/var/run/docker.sock:/var/run/docker.sock",
305+
],
306+
},
307+
};
308+
309+
var response = await Client.Containers.CreateContainerAsync(parameters, cancellationToken);
310+
record.ContainerId = response.ID;
311+
return response;
312+
}
313+
catch (Exception ex)
314+
{
315+
record.ExceptionMessage = ex.Message;
316+
throw;
317+
}
232318
}
233319

234320
private static async Task<MultiplexedStream> AttachContainerAsync(string containerId, CancellationToken cancellationToken = default)
235321
{
236-
var parameters = new ContainerAttachParameters
322+
using var record = new DockerServiceStepTelemetryRecord
237323
{
238-
Stdout = true,
239-
Stderr = true,
240-
Stream = true,
324+
Step = "AttachContainer",
325+
ContainerId = containerId,
241326
};
242-
return await Client.Containers.AttachContainerAsync(containerId, false, parameters, cancellationToken);
327+
328+
try
329+
{
330+
var parameters = new ContainerAttachParameters
331+
{
332+
Stdout = true,
333+
Stderr = true,
334+
Stream = true,
335+
};
336+
return await Client.Containers.AttachContainerAsync(containerId, false, parameters, cancellationToken);
337+
}
338+
catch (Exception ex)
339+
{
340+
record.ExceptionMessage = ex.Message;
341+
throw;
342+
}
243343
}
244344

245345
private static async Task StartContainerAsync(string containerId, CancellationToken cancellationToken = default)
246346
{
247-
var parameters = new ContainerStartParameters();
248-
await Client.Containers.StartContainerAsync(containerId, parameters, cancellationToken);
347+
using var record = new DockerServiceStepTelemetryRecord
348+
{
349+
Step = "StartContainer",
350+
ContainerId = containerId,
351+
};
352+
353+
try
354+
{
355+
var parameters = new ContainerStartParameters();
356+
await Client.Containers.StartContainerAsync(containerId, parameters, cancellationToken);
357+
}
358+
catch (Exception ex)
359+
{
360+
record.ExceptionMessage = ex.Message;
361+
throw;
362+
}
249363
}
250364

251365
private static async Task RemoveContainerAsync(string containerId, CancellationToken cancellationToken = default)
252366
{
253-
var parameters = new ContainerRemoveParameters
367+
using var record = new DockerServiceStepTelemetryRecord
254368
{
255-
Force = true,
256-
RemoveVolumes = true,
369+
Step = "RemoveContainer",
370+
ContainerId = containerId,
257371
};
258-
await Client.Containers.RemoveContainerAsync(containerId, parameters, cancellationToken);
372+
373+
try
374+
{
375+
var parameters = new ContainerRemoveParameters
376+
{
377+
Force = true,
378+
RemoveVolumes = true,
379+
};
380+
await Client.Containers.RemoveContainerAsync(containerId, parameters, cancellationToken);
381+
}
382+
catch (DockerContainerNotFoundException)
383+
{
384+
// Container already removed - this is expected during cleanup
385+
}
386+
catch (Exception ex)
387+
{
388+
record.ExceptionMessage = ex.Message;
389+
throw;
390+
}
259391
}
260392

261393
private static int GetContainerId()
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
namespace Microsoft.ComponentDetection.Common.Telemetry.Records;
2+
3+
/// <summary>
4+
/// Telemetry record for individual Docker service operations.
5+
/// Each step emits its own record, allowing identification of hung operations
6+
/// by observing which step's record is missing.
7+
/// </summary>
8+
internal class DockerServiceStepTelemetryRecord : BaseDetectionTelemetryRecord
9+
{
10+
public override string RecordName => "DockerServiceStep";
11+
12+
/// <summary>
13+
/// The step being performed (CreateContainer, AttachContainer, StartContainer, ReadOutput, RemoveContainer).
14+
/// </summary>
15+
public string? Step { get; set; }
16+
17+
/// <summary>
18+
/// The container ID (for correlation across steps).
19+
/// </summary>
20+
public string? ContainerId { get; set; }
21+
22+
/// <summary>
23+
/// The image being scanned.
24+
/// </summary>
25+
public string? Image { get; set; }
26+
27+
/// <summary>
28+
/// The command passed to the container.
29+
/// </summary>
30+
public string? Command { get; set; }
31+
32+
/// <summary>
33+
/// Whether this step was cancelled due to timeout.
34+
/// </summary>
35+
public bool WasCancelled { get; set; }
36+
37+
/// <summary>
38+
/// Exception message if the step failed.
39+
/// </summary>
40+
public string? ExceptionMessage { get; set; }
41+
}

0 commit comments

Comments
 (0)