Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 53 additions & 3 deletions src/Microsoft.ComponentDetection.Common/DockerService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
namespace Microsoft.ComponentDetection.Common;

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.Json;
using System.Threading;
Expand All @@ -23,6 +23,13 @@ internal class DockerService : IDockerService
private const string BaseImageDigestAnnotation = "image.base.digest";

private static readonly DockerClient Client = new DockerClientConfiguration().CreateClient();

/// <summary>
/// Tracks in-flight image pulls so each image is pulled at most once concurrently.
/// Concurrent callers for the same image await the same task.
/// </summary>
private static readonly ConcurrentDictionary<string, Task<bool>> PullCache = new();
Comment thread
pauld-msft marked this conversation as resolved.

Comment thread
pauld-msft marked this conversation as resolved.
private static int incrementingContainerId;

private readonly ILogger logger;
Expand Down Expand Up @@ -77,11 +84,13 @@ public async Task<bool> ImageExistsLocallyAsync(string image, CancellationToken
{
var imageInspectResponse = await this.InspectImageAndSanitizeVarsAsync(image, cancellationToken);
record.ImageInspectResponse = JsonSerializer.Serialize(imageInspectResponse);
this.logger.LogDebug("Image {Image} found locally", image);
return true;
}
catch (Exception e)
{
record.ExceptionMessage = e.Message;
this.logger.LogDebug("Image {Image} not found locally", image);
cancellationToken.ThrowIfCancellationRequested();
return false;
}
Expand All @@ -95,6 +104,48 @@ private async Task<ImageInspectResponse> InspectImageAndSanitizeVarsAsync(string
}

public async Task<bool> TryPullImageAsync(string image, CancellationToken cancellationToken = default)
{
// Check if already available locally before attempting a pull
if (await this.ImageExistsLocallyAsync(image, cancellationToken))
{
return true;
}

var tcs = new TaskCompletionSource<bool>(TaskCreationOptions.RunContinuationsAsynchronously);
var existingTask = PullCache.GetOrAdd(image, tcs.Task);

if (existingTask != tcs.Task)
{
// Another caller is already pulling this image — await their result
this.logger.LogDebug("Image {Image} is already being pulled by another caller, waiting", image);
return await existingTask.WaitAsync(cancellationToken);
}
Comment thread
pauld-msft marked this conversation as resolved.
Comment thread
pauld-msft marked this conversation as resolved.

// We own this cache entry — perform the actual pull.
try
{
this.logger.LogDebug("Pulling image {Image}...", image);
var result = await this.PullImageCoreAsync(image, cancellationToken);
this.logger.LogDebug("Pull of image {Image} completed (success={Success})", image, result);
tcs.SetResult(result);
return result;
Comment thread
pauld-msft marked this conversation as resolved.
Comment thread
pauld-msft marked this conversation as resolved.
}
Comment thread
pauld-msft marked this conversation as resolved.
catch (Exception ex)
{
this.logger.LogDebug(ex, "Pull of image {Image} failed", image);
tcs.SetException(ex);
throw;
}
finally
{
// Remove the entry once complete. The cache only deduplicates concurrent
// in-flight pulls — subsequent callers will hit ImageExistsLocallyAsync
// for images that were already pulled successfully.
PullCache.TryRemove(image, out _);
}
}

private async Task<bool> PullImageCoreAsync(string image, CancellationToken cancellationToken)
{
using var record = new DockerServiceTryPullImageTelemetryRecord
{
Expand Down Expand Up @@ -213,7 +264,7 @@ public async Task<ContainerDetails> InspectImageAsync(string image, Cancellation
var stream = await AttachContainerAsync(container.ID, cancellationToken);
await StartContainerAsync(container.ID, cancellationToken);

this.logger.LogInformation("Container {ContainerId} started for image {Image}, reading output...", container.ID, image);
this.logger.LogInformation("Container {ContainerId} started with image {Image} to execute {Command}, reading output...", container.ID, image, commandJson);

// Flush telemetry before the long-running ReadOutput so we get mid-scan
// data in App Insights even if the process hangs during the read.
Expand Down Expand Up @@ -353,7 +404,6 @@ private static async Task<CreateContainerResponse> CreateContainerAsync(
{
var binds = new List<string>
{
$"{Path.GetTempPath()}:/tmp",
Comment thread
pauld-msft marked this conversation as resolved.
"/var/run/docker.sock:/var/run/docker.sock",
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,24 @@ await this.dockerService.ImageExistsLocallyAsync(refWithDigest, cancellationToke
refWithDigest,
cancellationToken
);

if (baseImageDetails == null)
{
record.BaseImageLayerMessage = "Failed to inspect base image after pull";
this.logger.LogInformation(
"Failed to inspect base image {BaseImage} after pull. Results will not be mapped to base image layers",
refWithDigest
);
return 0;
}

this.logger.LogDebug(
"Base image {BaseImage} resolved for {ContainerImage} with {LayerCount} layers",
refWithDigest,
image,
baseImageDetails.Layers.Count()
);

if (!ValidateBaseImageLayers(scannedImageDetails, baseImageDetails))
{
record.BaseImageLayerMessage =
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
namespace Microsoft.ComponentDetection.Detectors.Linux;

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
Expand Down Expand Up @@ -31,6 +32,13 @@ internal class LinuxScanner : ILinuxScanner

private static readonly SemaphoreSlim ContainerSemaphore = new SemaphoreSlim(2);

/// <summary>
/// Caches in-flight syft runs.
/// When multiple detectors scan the same image concurrently, the second
/// caller awaits the already-running task instead of launching a new container.
/// </summary>
Comment thread
pauld-msft marked this conversation as resolved.
private static readonly ConcurrentDictionary<(string Source, LinuxScannerScope Scope, string Binds), Task<string>> SyftRunCache = new();
Comment thread
pauld-msft marked this conversation as resolved.

private static readonly int SemaphoreTimeout = Convert.ToInt32(
TimeSpan.FromHours(1).TotalMilliseconds
);
Expand Down Expand Up @@ -253,6 +261,7 @@ private IEnumerable<LayerMappedLinuxComponents> ProcessSyftOutputWithTelemetry(

/// <summary>
/// Runs the Syft scanner container and returns the stdout output.
/// Results are cached so that callers with identical parameters share a single container run.
/// </summary>
private async Task<string> RunSyftAsync(
string syftSource,
Expand All @@ -261,6 +270,51 @@ private async Task<string> RunSyftAsync(
LinuxScannerTelemetryRecord record,
LinuxScannerSyftTelemetryRecord syftTelemetryRecord,
CancellationToken cancellationToken)
{
var bindsKey = string.Join(";", (additionalBinds ?? []).OrderBy(b => b, StringComparer.Ordinal));
var cacheKey = (syftSource, scope, bindsKey);
var tcs = new TaskCompletionSource<string>(TaskCreationOptions.RunContinuationsAsynchronously);
var existingTask = SyftRunCache.GetOrAdd(cacheKey, tcs.Task);

Comment thread
pauld-msft marked this conversation as resolved.
if (existingTask != tcs.Task)
{
// Another caller is already running syft for this image+scope — await their result,
// but allow this caller's cancellation token to abort the wait.
this.logger.LogDebug("Syft run for {SyftSource} (scope={Scope}) is already in-flight, reusing existing result", syftSource, scope);
return await existingTask.WaitAsync(cancellationToken);
}

// We own this cache entry — run syft and propagate the result.
try
{
var result = await this.RunSyftCoreAsync(syftSource, scope, additionalBinds ?? [], record, syftTelemetryRecord, cancellationToken);
tcs.SetResult(result);
Comment thread
pauld-msft marked this conversation as resolved.
return result;
Comment thread
pauld-msft marked this conversation as resolved.
}
catch (Exception ex)
{
tcs.SetException(ex);
throw;
}
finally
{
// Remove the entry once complete. The cache only deduplicates concurrent
// in-flight calls — keeping completed entries would leak memory for the
// lifetime of the process.
SyftRunCache.TryRemove(cacheKey, out _);
}
}

/// <summary>
/// Executes the Syft scanner container and returns the stdout output.
/// </summary>
private async Task<string> RunSyftCoreAsync(
string syftSource,
LinuxScannerScope scope,
IList<string> additionalBinds,
LinuxScannerTelemetryRecord record,
LinuxScannerSyftTelemetryRecord syftTelemetryRecord,
CancellationToken cancellationToken)
{
var acquired = false;
var stdout = string.Empty;
Expand Down Expand Up @@ -357,4 +411,9 @@ HashSet<IArtifactComponentFactory> enabledFactories
var layerIds = artifact.Locations?.Select(location => location.LayerId).Distinct() ?? [];
return (component, layerIds);
}

/// <summary>
/// Clears the syft run cache. Intended for test isolation only.
/// </summary>
internal static void ResetCache() => SyftRunCache.Clear();
}
Original file line number Diff line number Diff line change
Expand Up @@ -1495,4 +1495,50 @@ public async Task ExecuteDetectorAsync_ScanThrowsOce_OtherImageStillScanned()
// The first image should have produced components
componentRecorder.GetDetectedComponents().Should().NotBeEmpty();
}

[TestMethod]
public async Task TestLinuxContainerDetector_DuplicateImageReferences_ScansOnlyOnceAsync()
{
var componentRecorder = new ComponentRecorder();

// Pass the exact same image reference twice.
var scanRequest = new ScanRequest(
new DirectoryInfo(Path.GetTempPath()),
(_, __) => false,
this.mockLogger.Object,
null,
[NodeLatestImage, NodeLatestImage],
componentRecorder
);

var linuxContainerDetector = new LinuxContainerDetector(
this.mockSyftLinuxScanner.Object,
this.mockDockerService.Object,
this.mockLinuxContainerDetectorLogger.Object
);

var scanResult = await linuxContainerDetector.ExecuteDetectorAsync(scanRequest);

scanResult.ResultCode.Should().Be(ProcessingResultCode.Success);
scanResult.ContainerDetails.Should().ContainSingle();

var detectedComponents = componentRecorder.GetDetectedComponents().ToList();
detectedComponents.Should().ContainSingle();
detectedComponents.First().Component.Id.Should().Be(BashPackageId);

// Both references resolve to the same ImageId via InspectImageAsync,
// so the ConcurrentDictionary in ProcessImagesAsync deduplicates them.
this.mockSyftLinuxScanner.Verify(
scanner =>
scanner.ScanLinuxAsync(
It.IsAny<string>(),
It.IsAny<IEnumerable<DockerLayer>>(),
It.IsAny<int>(),
It.IsAny<ISet<ComponentType>>(),
It.IsAny<LinuxScannerScope>(),
It.IsAny<CancellationToken>()
),
Times.Once
);
}
}
Loading
Loading