Skip to content

Commit e94e276

Browse files
committed
initial attempts at speeding up linux detection
1 parent 79782b8 commit e94e276

3 files changed

Lines changed: 112 additions & 2 deletions

File tree

src/Microsoft.ComponentDetection.Common/DockerService.cs

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
namespace Microsoft.ComponentDetection.Common;
33

44
using System;
5+
using System.Collections.Concurrent;
56
using System.Collections.Generic;
6-
using System.IO;
77
using System.Linq;
88
using System.Text.Json;
99
using System.Threading;
@@ -23,6 +23,15 @@ internal class DockerService : IDockerService
2323
private const string BaseImageDigestAnnotation = "image.base.digest";
2424

2525
private static readonly DockerClient Client = new DockerClientConfiguration().CreateClient();
26+
27+
/// <summary>
28+
/// Serializes image pull operations so only one pull runs at a time,
29+
/// and tracks which images have already been pulled to avoid redundant work.
30+
/// </summary>
31+
private static readonly SemaphoreSlim PullSemaphore = new(1, 1);
32+
33+
private static readonly ConcurrentDictionary<string, bool> PulledImages = new();
34+
2635
private static int incrementingContainerId;
2736

2837
private readonly ILogger logger;
@@ -95,6 +104,46 @@ private async Task<ImageInspectResponse> InspectImageAndSanitizeVarsAsync(string
95104
}
96105

97106
public async Task<bool> TryPullImageAsync(string image, CancellationToken cancellationToken = default)
107+
{
108+
// Fast path: already pulled in this process
109+
if (PulledImages.ContainsKey(image))
110+
{
111+
return true;
112+
}
113+
114+
// Check if already available locally before acquiring the semaphore
115+
if (await this.ImageExistsLocallyAsync(image, cancellationToken))
116+
{
117+
PulledImages.TryAdd(image, true);
118+
return true;
119+
}
120+
121+
await PullSemaphore.WaitAsync(cancellationToken);
122+
try
123+
{
124+
// Double-check after acquiring semaphore — another caller may have
125+
// pulled this image (or a different image that satisfied the local check)
126+
// while we were waiting.
127+
if (PulledImages.ContainsKey(image))
128+
{
129+
return true;
130+
}
131+
132+
var result = await this.PullImageCoreAsync(image, cancellationToken);
133+
if (result)
134+
{
135+
PulledImages.TryAdd(image, true);
136+
}
137+
138+
return result;
139+
}
140+
finally
141+
{
142+
PullSemaphore.Release();
143+
}
144+
}
145+
146+
private async Task<bool> PullImageCoreAsync(string image, CancellationToken cancellationToken)
98147
{
99148
using var record = new DockerServiceTryPullImageTelemetryRecord
100149
{
@@ -353,7 +402,6 @@ private static async Task<CreateContainerResponse> CreateContainerAsync(
353402
{
354403
var binds = new List<string>
355404
{
356-
$"{Path.GetTempPath()}:/tmp",
357405
"/var/run/docker.sock:/var/run/docker.sock",
358406
};
359407

src/Microsoft.ComponentDetection.Detectors/linux/LinuxScanner.cs

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
namespace Microsoft.ComponentDetection.Detectors.Linux;
22

33
using System;
4+
using System.Collections.Concurrent;
45
using System.Collections.Generic;
56
using System.Linq;
67
using System.Text.Json;
@@ -31,6 +32,13 @@ internal class LinuxScanner : ILinuxScanner
3132

3233
private static readonly SemaphoreSlim ContainerSemaphore = new SemaphoreSlim(2);
3334

35+
/// <summary>
36+
/// Caches in-flight and completed syft runs keyed by (source, scope).
37+
/// When multiple detectors scan the same image concurrently, the second
38+
/// caller awaits the already-running task instead of launching a new container.
39+
/// </summary>
40+
private static readonly ConcurrentDictionary<(string Source, LinuxScannerScope Scope), Task<string>> SyftRunCache = new();
41+
3442
private static readonly int SemaphoreTimeout = Convert.ToInt32(
3543
TimeSpan.FromHours(1).TotalMilliseconds
3644
);
@@ -253,6 +261,8 @@ private IEnumerable<LayerMappedLinuxComponents> ProcessSyftOutputWithTelemetry(
253261

254262
/// <summary>
255263
/// Runs the Syft scanner container and returns the stdout output.
264+
/// For Docker image scans (no additional binds), results are cached so that
265+
/// concurrent callers scanning the same image+scope share a single container run.
256266
/// </summary>
257267
private async Task<string> RunSyftAsync(
258268
string syftSource,
@@ -261,6 +271,50 @@ private async Task<string> RunSyftAsync(
261271
LinuxScannerTelemetryRecord record,
262272
LinuxScannerSyftTelemetryRecord syftTelemetryRecord,
263273
CancellationToken cancellationToken)
274+
{
275+
// Local image scans use additional binds specific to each call, so they
276+
// cannot be deduplicated safely — run them directly.
277+
if (additionalBinds is { Count: > 0 })
278+
{
279+
return await this.RunSyftCoreAsync(syftSource, scope, additionalBinds, record, syftTelemetryRecord, cancellationToken);
280+
}
281+
282+
var cacheKey = (syftSource, scope);
283+
var tcs = new TaskCompletionSource<string>(TaskCreationOptions.RunContinuationsAsynchronously);
284+
var existingTask = SyftRunCache.GetOrAdd(cacheKey, tcs.Task);
285+
286+
if (existingTask != tcs.Task)
287+
{
288+
// Another caller is already running syft for this image+scope — await their result.
289+
return await existingTask;
290+
}
291+
292+
// We own this cache entry — run syft and propagate the result.
293+
try
294+
{
295+
var result = await this.RunSyftCoreAsync(syftSource, scope, additionalBinds, record, syftTelemetryRecord, cancellationToken);
296+
tcs.SetResult(result);
297+
return result;
298+
}
299+
catch (Exception ex)
300+
{
301+
// Remove the failed entry so a retry can start fresh.
302+
SyftRunCache.TryRemove(cacheKey, out _);
303+
tcs.SetException(ex);
304+
throw;
305+
}
306+
}
307+
308+
/// <summary>
309+
/// Executes the Syft scanner container and returns the stdout output.
310+
/// </summary>
311+
private async Task<string> RunSyftCoreAsync(
312+
string syftSource,
313+
LinuxScannerScope scope,
314+
IList<string> additionalBinds,
315+
LinuxScannerTelemetryRecord record,
316+
LinuxScannerSyftTelemetryRecord syftTelemetryRecord,
317+
CancellationToken cancellationToken)
264318
{
265319
var acquired = false;
266320
var stdout = string.Empty;
@@ -357,4 +411,9 @@ HashSet<IArtifactComponentFactory> enabledFactories
357411
var layerIds = artifact.Locations?.Select(location => location.LayerId).Distinct() ?? [];
358412
return (component, layerIds);
359413
}
414+
415+
/// <summary>
416+
/// Clears the syft run cache. Intended for test isolation only.
417+
/// </summary>
418+
internal static void ResetCache() => SyftRunCache.Clear();
360419
}

test/Microsoft.ComponentDetection.Detectors.Tests/LinuxScannerTests.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,9 @@ public class LinuxScannerTests
228228

229229
public LinuxScannerTests()
230230
{
231+
// Clear the static syft run cache to prevent cross-test interference.
232+
LinuxScanner.ResetCache();
233+
231234
this.mockDockerService = new Mock<IDockerService>();
232235
this.mockDockerService.Setup(service =>
233236
service.CanPingDockerAsync(It.IsAny<CancellationToken>())

0 commit comments

Comments
 (0)