diff --git a/src/Microsoft.ComponentDetection.Common/DockerService.cs b/src/Microsoft.ComponentDetection.Common/DockerService.cs index d55540bd8..538a56013 100644 --- a/src/Microsoft.ComponentDetection.Common/DockerService.cs +++ b/src/Microsoft.ComponentDetection.Common/DockerService.cs @@ -216,8 +216,22 @@ public async Task InspectImageAsync(string image, Cancellation } finally { - // Best-effort container cleanup; RemoveContainerAsync already handles not-found. - await RemoveContainerAsync(container.ID, CancellationToken.None); + // Best-effort container cleanup with a bounded timeout. + // RemoveContainerAsync already handles not-found, but we must guard against + // the Docker daemon hanging on container removal (e.g. when the container + // process is stuck), which would block the detector indefinitely. + using var removeCts = new CancellationTokenSource(TimeSpan.FromSeconds(30)); + try + { + await RemoveContainerAsync(container.ID, removeCts.Token); + } + catch (Exception ex) + { + this.logger.LogWarning( + ex, + "Failed to remove container {ContainerId}; abandoning cleanup", + container.ID); + } } } diff --git a/src/Microsoft.ComponentDetection.Detectors/linux/LinuxContainerDetector.cs b/src/Microsoft.ComponentDetection.Detectors/linux/LinuxContainerDetector.cs index 8b3fff13e..e4aa56a74 100644 --- a/src/Microsoft.ComponentDetection.Detectors/linux/LinuxContainerDetector.cs +++ b/src/Microsoft.ComponentDetection.Detectors/linux/LinuxContainerDetector.cs @@ -127,6 +127,10 @@ public async Task ExecuteDetectorAsync( GetTimeout(request.DetectorArgs) ); } + catch (Exception e) + { + this.logger.LogError(e, "Unexpected error during Linux container image scanning"); + } return new IndividualDetectorScanResult { @@ -285,6 +289,10 @@ private async Task ResolveImageAsync( ); } } + catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) + { + throw; + } catch (Exception e) { this.logger.LogWarning(e, "Processing of image {ContainerImage} (kind {ImageType}) failed", imageRef.OriginalInput, imageRef.Kind); @@ -388,6 +396,10 @@ private async Task ScanDockerImageAsync( return this.RecordComponents(containerDetails, layers, componentRecorder); } + catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) + { + throw; + } catch (Exception e) { this.logger.LogWarning(e, "Scanning of image {ImageId} failed", containerDetails.ImageId); @@ -525,6 +537,10 @@ private async Task ScanLocalImageAsync( return this.RecordComponents(containerDetails, layers, componentRecorder); } + catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) + { + throw; + } catch (Exception e) { this.logger.LogWarning( diff --git a/src/Microsoft.ComponentDetection.Detectors/linux/LinuxScanner.cs b/src/Microsoft.ComponentDetection.Detectors/linux/LinuxScanner.cs index 6482958b9..0010604d6 100644 --- a/src/Microsoft.ComponentDetection.Detectors/linux/LinuxScanner.cs +++ b/src/Microsoft.ComponentDetection.Detectors/linux/LinuxScanner.cs @@ -296,7 +296,7 @@ private async Task RunSyftAsync( } catch (Exception e) { - syftTelemetryRecord.Exception = JsonSerializer.Serialize(e); + syftTelemetryRecord.Exception = e.ToString(); this.logger.LogError(e, "Failed to run syft"); throw; } diff --git a/test/Microsoft.ComponentDetection.Detectors.Tests/LinuxContainerDetectorTests.cs b/test/Microsoft.ComponentDetection.Detectors.Tests/LinuxContainerDetectorTests.cs index 12308a383..eb6780186 100644 --- a/test/Microsoft.ComponentDetection.Detectors.Tests/LinuxContainerDetectorTests.cs +++ b/test/Microsoft.ComponentDetection.Detectors.Tests/LinuxContainerDetectorTests.cs @@ -1290,4 +1290,209 @@ public async Task TestLinuxContainerDetector_ImageParseFailure_ContinuesScanning Times.Once ); } + + [TestMethod] + public async Task ExecuteDetectorAsync_ScannerThrowsOce_ReturnsSuccessWithEmptyResults() + { + // Verifies Fix 2 + OCE catch: when the scanning timeout fires, + // the OCE propagates out of the scan methods and is caught by + // ExecuteDetectorAsync, which returns Success with no components. + // We pass a pre-cancelled token so the linked timeoutCts is also + // cancelled, making the `when (cancellationToken.IsCancellationRequested)` + // guard match and re-throw the OCE to ExecuteDetectorAsync. + var componentRecorder = new ComponentRecorder(); + var detectorArgs = new Dictionary { { "Linux.ScanningTimeoutSec", "600" } }; + + var scanRequest = new ScanRequest( + new DirectoryInfo(Path.GetTempPath()), + (_, __) => false, + this.mockLogger.Object, + detectorArgs, + [NodeLatestImage], + componentRecorder + ); + + using var cts = new CancellationTokenSource(); + await cts.CancelAsync(); + + this.mockSyftLinuxScanner.Setup(scanner => + scanner.ScanLinuxAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny(), + It.IsAny>(), + It.IsAny(), + It.IsAny() + ) + ) + .ThrowsAsync(new OperationCanceledException()); + + var detector = new LinuxContainerDetector( + this.mockSyftLinuxScanner.Object, + this.mockDockerService.Object, + this.mockLinuxContainerDetectorLogger.Object + ); + + var result = await detector.ExecuteDetectorAsync(scanRequest, cts.Token); + + result.ResultCode.Should().Be(ProcessingResultCode.Success); + result.ContainerDetails.Should().BeEmpty(); + componentRecorder.GetDetectedComponents().Should().BeEmpty(); + } + + [TestMethod] + public async Task ExecuteDetectorAsync_ScannerThrowsUnexpectedException_ReturnsSuccessDoesNotCrash() + { + // Verifies Fix 4 safety net: an unexpected exception from the + // scanner must never escape the detector. It should be caught, + // and the detector should return Success with empty results. + var componentRecorder = new ComponentRecorder(); + + var scanRequest = new ScanRequest( + new DirectoryInfo(Path.GetTempPath()), + (_, __) => false, + this.mockLogger.Object, + null, + [NodeLatestImage], + componentRecorder + ); + + this.mockSyftLinuxScanner.Setup(scanner => + scanner.ScanLinuxAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny(), + It.IsAny>(), + It.IsAny(), + It.IsAny() + ) + ) + .ThrowsAsync(new InvalidOperationException("Unexpected Docker failure")); + + var detector = new LinuxContainerDetector( + this.mockSyftLinuxScanner.Object, + this.mockDockerService.Object, + this.mockLinuxContainerDetectorLogger.Object + ); + + // The critical assertion: the detector must NOT throw. + var result = await detector.ExecuteDetectorAsync(scanRequest); + + result.ResultCode.Should().Be(ProcessingResultCode.Success); + result.ContainerDetails.Should().BeEmpty(); + componentRecorder.GetDetectedComponents().Should().BeEmpty(); + } + + [TestMethod] + public async Task ExecuteDetectorAsync_ImageResolveThrowsOce_ReturnsSuccessWithEmptyResults() + { + // Verifies Fix 2 in the resolve phase: if the timeout fires + // during image pull/inspect, the OCE is re-thrown from + // ResolveImageAsync and caught by ExecuteDetectorAsync. + // We pass a pre-cancelled token so the linked timeoutCts is also + // cancelled, making the `when` guard match. + var componentRecorder = new ComponentRecorder(); + var detectorArgs = new Dictionary { { "Linux.ScanningTimeoutSec", "600" } }; + + var scanRequest = new ScanRequest( + new DirectoryInfo(Path.GetTempPath()), + (_, __) => false, + this.mockLogger.Object, + detectorArgs, + [NodeLatestImage], + componentRecorder + ); + + using var cts = new CancellationTokenSource(); + await cts.CancelAsync(); + + this.mockDockerService.Setup(service => + service.ImageExistsLocallyAsync(It.IsAny(), It.IsAny()) + ) + .ThrowsAsync(new OperationCanceledException()); + + var detector = new LinuxContainerDetector( + this.mockSyftLinuxScanner.Object, + this.mockDockerService.Object, + this.mockLinuxContainerDetectorLogger.Object + ); + + var result = await detector.ExecuteDetectorAsync(scanRequest, cts.Token); + + result.ResultCode.Should().Be(ProcessingResultCode.Success); + result.ContainerDetails.Should().BeEmpty(); + componentRecorder.GetDetectedComponents().Should().BeEmpty(); + } + + [TestMethod] + public async Task ExecuteDetectorAsync_ScanThrowsOce_OtherImageStillScanned() + { + // When scanning multiple images and one fails with an uncancelled OCE, + // the generic catch swallows it, so the other image still scans. + // Token is NOT cancelled here — this tests multi-image resilience + // via the generic catch path, not the timeout re-throw path. + var componentRecorder = new ComponentRecorder(); + const string secondImage = "alpine:latest"; + const string secondImageId = "alpine123"; + + this.mockDockerService.Setup(service => + service.InspectImageAsync(secondImage, It.IsAny()) + ) + .ReturnsAsync( + new ContainerDetails + { + Id = 2, + ImageId = secondImageId, + Layers = [], + } + ); + + // First image (NodeLatestImage) scans fine via default mock. + // Second image throws OCE (not tied to cancellation, so generic catch handles it). + this.mockSyftLinuxScanner.Setup(scanner => + scanner.ScanLinuxAsync( + secondImageId, + It.IsAny>(), + It.IsAny(), + It.IsAny>(), + It.IsAny(), + It.IsAny() + ) + ) + .ThrowsAsync(new OperationCanceledException()); + + var scanRequest = new ScanRequest( + new DirectoryInfo(Path.GetTempPath()), + (_, __) => false, + this.mockLogger.Object, + null, + [NodeLatestImage, secondImage], + componentRecorder + ); + + var detector = new LinuxContainerDetector( + this.mockSyftLinuxScanner.Object, + this.mockDockerService.Object, + this.mockLinuxContainerDetectorLogger.Object + ); + + var result = await detector.ExecuteDetectorAsync(scanRequest); + + // Must not crash, must return Success + result.ResultCode.Should().Be(ProcessingResultCode.Success); + + // The first image should have been scanned successfully + this.mockSyftLinuxScanner.Verify( + scanner => scanner.ScanLinuxAsync( + NodeLatestDigest, + It.IsAny>(), + It.IsAny(), + It.IsAny>(), + It.IsAny(), + It.IsAny()), + Times.Once); + + // The first image should have produced components + componentRecorder.GetDetectedComponents().Should().NotBeEmpty(); + } }