Skip to content

Commit 97f3d73

Browse files
authored
Reapply "Add OCI image support to Linux scanner (#1708)" (#1716) (#1717)
* Reapply "Add OCI image support to Linux scanner (#1708)" (#1716) This reverts commit db58407. * Reapply "Add Docker archive support to Linux scanner (#1711)" (#1715) (#1718) This reverts commit 69a2057. * Handle parse failures gracefully * Fix merge error
1 parent af0cff9 commit 97f3d73

13 files changed

Lines changed: 2120 additions & 184 deletions

File tree

docs/detectors/linux.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,31 @@ Linux detection depends on the following:
1111
Linux package detection is performed by running [Syft](https://github.com/anchore/syft) and parsing the output.
1212
The output contains the package name, version, and the layer of the container in which it was found.
1313

14+
### Supported Input Types
15+
16+
The Linux detector runs on container images passed under the `--DockerImagesToScan` flag.
17+
18+
Supported image reference formats are:
19+
20+
#### Name and Tag/Digest
21+
22+
Images in the local Docker daemon or a remote registry can be referenced by name and tag or digest. For example, `ubuntu:16.04`. Remote images will be pulled if they are not present locally.
23+
24+
#### Digest Only
25+
26+
Images already present in the local Docker daemon can be referenced by just a digest. For example, `sha256:56bab49eef2ef07505f6a1b0d5bd3a601dfc3c76ad4460f24c91d6fa298369ab`.
27+
28+
#### OCI Images
29+
30+
Images present on the filesystem as either an [OCI layout directory](https://specs.opencontainers.org/image-spec/image-layout/) or an OCI image archive (tarball) can be referenced by file path.
31+
32+
- For OCI image layout directories, use the prefix `oci-dir:` followed by the path to the directory, e.g. `oci-dir:/path/to/image`
33+
- For OCI image archives (tarballs), use the prefix `oci-archive:` followed by the path to the archive file, e.g. `oci-archive:/path/to/image.tar`
34+
35+
#### Docker Archives
36+
37+
Images saved to disk via `docker save` can be referenced using the `docker-archive:` prefix followed by the path to the tarball, e.g. `docker-archive:/path/to/image.tar`.
38+
1439
### Scanner Scope
1540

1641
By default, this detector invokes Syft with the `all-layers` scanning scope (i.e. the Syft argument `--scope all-layers`).
@@ -28,3 +53,4 @@ For example:
2853
## Known limitations
2954

3055
- Windows container scanning is not supported
56+
- Multiplatform images are not supported

src/Microsoft.ComponentDetection.Common/DockerService.cs

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,11 @@ public async Task<ContainerDetails> InspectImageAsync(string image, Cancellation
183183
}
184184

185185
public async Task<(string Stdout, string Stderr)> CreateAndRunContainerAsync(string image, IList<string> command, CancellationToken cancellationToken = default)
186+
{
187+
return await this.CreateAndRunContainerAsync(image, command, additionalBinds: null, cancellationToken);
188+
}
189+
190+
public async Task<(string Stdout, string Stderr)> CreateAndRunContainerAsync(string image, IList<string> command, IList<string> additionalBinds, CancellationToken cancellationToken = default)
186191
{
187192
var commandJson = JsonSerializer.Serialize(command);
188193

@@ -194,7 +199,7 @@ public async Task<ContainerDetails> InspectImageAsync(string image, Cancellation
194199
};
195200

196201
await this.TryPullImageAsync(image, cancellationToken);
197-
var container = await CreateContainerAsync(image, command, cancellationToken);
202+
var container = await CreateContainerAsync(image, command, additionalBinds, cancellationToken);
198203
record.Container = JsonSerializer.Serialize(container);
199204

200205
try
@@ -272,6 +277,7 @@ public async Task<ContainerDetails> InspectImageAsync(string image, Cancellation
272277
private static async Task<CreateContainerResponse> CreateContainerAsync(
273278
string image,
274279
IList<string> command,
280+
IList<string> additionalBinds,
275281
CancellationToken cancellationToken = default)
276282
{
277283
using var record = new DockerServiceStepTelemetryRecord
@@ -283,6 +289,17 @@ private static async Task<CreateContainerResponse> CreateContainerAsync(
283289

284290
try
285291
{
292+
var binds = new List<string>
293+
{
294+
$"{Path.GetTempPath()}:/tmp",
295+
"/var/run/docker.sock:/var/run/docker.sock",
296+
};
297+
298+
if (additionalBinds != null)
299+
{
300+
binds.AddRange(additionalBinds);
301+
}
302+
286303
var parameters = new CreateContainerParameters
287304
{
288305
Image = image,
@@ -298,11 +315,7 @@ private static async Task<CreateContainerResponse> CreateContainerAsync(
298315
[
299316
"no-new-privileges",
300317
],
301-
Binds =
302-
[
303-
$"{Path.GetTempPath()}:/tmp",
304-
"/var/run/docker.sock:/var/run/docker.sock",
305-
],
318+
Binds = binds,
306319
},
307320
};
308321

@@ -394,4 +407,10 @@ private static int GetContainerId()
394407
{
395408
return Interlocked.Increment(ref incrementingContainerId);
396409
}
410+
411+
/// <inheritdoc/>
412+
public ContainerDetails GetEmptyContainerDetails()
413+
{
414+
return new ContainerDetails { Id = GetContainerId() };
415+
}
397416
}

src/Microsoft.ComponentDetection.Contracts/IDockerService.cs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,26 @@ public interface IDockerService
5252
/// <summary>
5353
/// Creates and runs a container with the given image and command.
5454
/// </summary>
55-
/// <param name="image">The image to inspect.</param>
55+
/// <param name="image">The image to run.</param>
5656
/// <param name="command">The command to run in the container.</param>
5757
/// <param name="cancellationToken">The cancellation token.</param>
5858
/// <returns>A tuple of stdout and stderr from the container.</returns>
5959
Task<(string Stdout, string Stderr)> CreateAndRunContainerAsync(string image, IList<string> command, CancellationToken cancellationToken = default);
60+
61+
/// <summary>
62+
/// Creates and runs a container with the given image, command, and additional volume binds.
63+
/// </summary>
64+
/// <param name="image">The image to run.</param>
65+
/// <param name="command">The command to run in the container.</param>
66+
/// <param name="additionalBinds">Additional volume bind mounts to add to the container (e.g., "/host/path:/container/path:ro").</param>
67+
/// <param name="cancellationToken">The cancellation token.</param>
68+
/// <returns>A tuple of stdout and stderr from the container.</returns>
69+
Task<(string Stdout, string Stderr)> CreateAndRunContainerAsync(string image, IList<string> command, IList<string> additionalBinds, CancellationToken cancellationToken = default);
70+
71+
/// <summary>
72+
/// Creates an empty <see cref="ContainerDetails"/> with a unique ID assigned.
73+
/// Used for image types where details are not obtained from Docker inspect (e.g., OCI layout images).
74+
/// </summary>
75+
/// <returns>A <see cref="ContainerDetails"/> with only the <see cref="ContainerDetails.Id"/> populated.</returns>
76+
ContainerDetails GetEmptyContainerDetails();
6077
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
namespace Microsoft.ComponentDetection.Detectors.Linux.Contracts;
2+
3+
using System.Text.Json;
4+
5+
/// <summary>
6+
/// Extends the auto-generated <see cref="SourceClass"/> with a method to
7+
/// deserialize its untyped <see cref="Metadata"/> into a
8+
/// strongly-typed <see cref="SyftSourceMetadata"/>.
9+
/// </summary>
10+
public partial class SourceClass
11+
{
12+
/// <summary>
13+
/// Deserializes the <see cref="Metadata"/> property into a <see cref="SyftSourceMetadata"/>.
14+
/// Returns null if <see cref="Metadata"/> is null or not a <see cref="JsonElement"/>.
15+
/// </summary>
16+
/// <returns>A deserialized <see cref="SyftSourceMetadata"/> instance, or null.</returns>
17+
internal SyftSourceMetadata? GetSyftSourceMetadata()
18+
{
19+
if (this.Metadata is JsonElement element)
20+
{
21+
return JsonSerializer.Deserialize<SyftSourceMetadata>(element.GetRawText());
22+
}
23+
24+
return null;
25+
}
26+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
namespace Microsoft.ComponentDetection.Detectors.Linux.Contracts;
2+
3+
using System.Text.Json.Serialization;
4+
5+
/// <summary>
6+
/// Represents a single layer in the image source metadata from Syft output.
7+
/// </summary>
8+
internal class SyftSourceLayer
9+
{
10+
[JsonPropertyName("mediaType")]
11+
public string? MediaType { get; set; }
12+
13+
[JsonPropertyName("digest")]
14+
public string? Digest { get; set; }
15+
16+
[JsonPropertyName("size")]
17+
public long? Size { get; set; }
18+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
namespace Microsoft.ComponentDetection.Detectors.Linux.Contracts;
2+
3+
using System.Collections.Generic;
4+
using System.Text.Json.Serialization;
5+
6+
/// <summary>
7+
/// Represents the metadata from a Syft scan source of type "image".
8+
/// Contains image details such as layers, labels, tags, and image ID.
9+
/// Deserialized from the <c>source.metadata</c> field in Syft JSON output,
10+
/// which is typed as <c>object</c> in the auto-generated <see cref="SourceClass"/>.
11+
/// </summary>
12+
internal class SyftSourceMetadata
13+
{
14+
[JsonPropertyName("userInput")]
15+
public string? UserInput { get; set; }
16+
17+
[JsonPropertyName("imageID")]
18+
public string? ImageId { get; set; }
19+
20+
[JsonPropertyName("manifestDigest")]
21+
public string? ManifestDigest { get; set; }
22+
23+
[JsonPropertyName("mediaType")]
24+
public string? MediaType { get; set; }
25+
26+
[JsonPropertyName("tags")]
27+
public string[]? Tags { get; set; }
28+
29+
[JsonPropertyName("imageSize")]
30+
public long? ImageSize { get; set; }
31+
32+
[JsonPropertyName("layers")]
33+
public SyftSourceLayer[]? Layers { get; set; }
34+
35+
[JsonPropertyName("repoDigests")]
36+
public string[]? RepoDigests { get; set; }
37+
38+
[JsonPropertyName("architecture")]
39+
public string? Architecture { get; set; }
40+
41+
[JsonPropertyName("os")]
42+
public string? Os { get; set; }
43+
44+
[JsonPropertyName("labels")]
45+
public Dictionary<string, string>? Labels { get; set; }
46+
}

src/Microsoft.ComponentDetection.Detectors/linux/ILinuxScanner.cs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ namespace Microsoft.ComponentDetection.Detectors.Linux;
55
using System.Threading.Tasks;
66
using Microsoft.ComponentDetection.Contracts.BcdeModels;
77
using Microsoft.ComponentDetection.Contracts.TypedComponent;
8+
using Microsoft.ComponentDetection.Detectors.Linux.Contracts;
89

910
/// <summary>
1011
/// Interface for scanning Linux container layers to identify components.
@@ -13,6 +14,7 @@ public interface ILinuxScanner
1314
{
1415
/// <summary>
1516
/// Scans a Linux container image for components and maps them to their respective layers.
17+
/// Runs Syft and processes the output in a single step.
1618
/// </summary>
1719
/// <param name="imageHash">The hash identifier of the container image to scan.</param>
1820
/// <param name="containerLayers">The collection of Docker layers that make up the container image.</param>
@@ -29,4 +31,33 @@ public Task<IEnumerable<LayerMappedLinuxComponents>> ScanLinuxAsync(
2931
LinuxScannerScope scope,
3032
CancellationToken cancellationToken = default
3133
);
34+
35+
/// <summary>
36+
/// Runs the Syft scanner and returns the raw parsed output without processing components.
37+
/// Use this when the caller needs access to the full Syft output (e.g., to extract source metadata for OCI images).
38+
/// </summary>
39+
/// <param name="syftSource">The source argument passed to Syft (e.g., an image hash or "oci-dir:/oci-image").</param>
40+
/// <param name="additionalBinds">Additional volume bind mounts for the Syft container (e.g., for mounting OCI directories).</param>
41+
/// <param name="scope">The scope for scanning the image.</param>
42+
/// <param name="cancellationToken">A token to monitor for cancellation requests.</param>
43+
/// <returns>A task that represents the asynchronous operation. The task result contains the parsed <see cref="SyftOutput"/>.</returns>
44+
public Task<SyftOutput> GetSyftOutputAsync(
45+
string syftSource,
46+
IList<string> additionalBinds,
47+
LinuxScannerScope scope,
48+
CancellationToken cancellationToken = default
49+
);
50+
51+
/// <summary>
52+
/// Processes parsed Syft output into layer-mapped components.
53+
/// </summary>
54+
/// <param name="syftOutput">The parsed Syft output.</param>
55+
/// <param name="containerLayers">The layers to map components to.</param>
56+
/// <param name="enabledComponentTypes">The set of component types to include in the results.</param>
57+
/// <returns>A collection of <see cref="LayerMappedLinuxComponents"/> representing the components found and their associated layers.</returns>
58+
public IEnumerable<LayerMappedLinuxComponents> ProcessSyftOutput(
59+
SyftOutput syftOutput,
60+
IEnumerable<DockerLayer> containerLayers,
61+
ISet<ComponentType> enabledComponentTypes
62+
);
3263
}
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
namespace Microsoft.ComponentDetection.Detectors.Linux;
2+
3+
using System;
4+
5+
/// <summary>
6+
/// Specifies the type of image reference.
7+
/// </summary>
8+
internal enum ImageReferenceKind
9+
{
10+
/// <summary>
11+
/// A Docker image reference (e.g., "node:latest", "sha256:abc123").
12+
/// </summary>
13+
DockerImage,
14+
15+
/// <summary>
16+
/// An OCI Image Layout directory on disk (e.g., "oci-dir:/path/to/image").
17+
/// </summary>
18+
OciLayout,
19+
20+
/// <summary>
21+
/// An OCI archive (tarball) file on disk (e.g., "oci-archive:/path/to/image.tar").
22+
/// </summary>
23+
OciArchive,
24+
25+
/// <summary>
26+
/// A Docker archive (tarball) file on disk created by "docker save" (e.g., "docker-archive:/path/to/image.tar").
27+
/// </summary>
28+
DockerArchive,
29+
}
30+
31+
/// <summary>
32+
/// Represents a parsed image reference from the scan input, with its type and cleaned reference string.
33+
/// </summary>
34+
internal class ImageReference
35+
{
36+
private const string OciDirPrefix = "oci-dir:";
37+
private const string OciArchivePrefix = "oci-archive:";
38+
private const string DockerArchivePrefix = "docker-archive:";
39+
40+
/// <summary>
41+
/// Gets the original input string as provided by the user.
42+
/// </summary>
43+
public required string OriginalInput { get; init; }
44+
45+
/// <summary>
46+
/// Gets the cleaned reference string with any scheme prefix removed.
47+
/// For Docker images, this is lowercased. For file paths, case is preserved.
48+
/// </summary>
49+
public required string Reference { get; init; }
50+
51+
/// <summary>
52+
/// Gets the kind of image reference.
53+
/// </summary>
54+
public required ImageReferenceKind Kind { get; init; }
55+
56+
/// <summary>
57+
/// Parses an input image string into an <see cref="ImageReference"/>.
58+
/// </summary>
59+
/// <param name="input">The raw image input string.</param>
60+
/// <returns>A parsed <see cref="ImageReference"/>.</returns>
61+
public static ImageReference Parse(string input)
62+
{
63+
if (input.StartsWith(OciDirPrefix, StringComparison.OrdinalIgnoreCase))
64+
{
65+
var path = input[OciDirPrefix.Length..];
66+
if (string.IsNullOrWhiteSpace(path))
67+
{
68+
throw new ArgumentException($"Input with '{OciDirPrefix}' prefix must include a path.", nameof(input));
69+
}
70+
71+
return new ImageReference
72+
{
73+
OriginalInput = input,
74+
Reference = path,
75+
Kind = ImageReferenceKind.OciLayout,
76+
};
77+
}
78+
79+
if (input.StartsWith(OciArchivePrefix, StringComparison.OrdinalIgnoreCase))
80+
{
81+
var path = input[OciArchivePrefix.Length..];
82+
if (string.IsNullOrWhiteSpace(path))
83+
{
84+
throw new ArgumentException($"Input with '{OciArchivePrefix}' prefix must include a path.", nameof(input));
85+
}
86+
87+
return new ImageReference
88+
{
89+
OriginalInput = input,
90+
Reference = path,
91+
Kind = ImageReferenceKind.OciArchive,
92+
};
93+
}
94+
95+
if (input.StartsWith(DockerArchivePrefix, StringComparison.OrdinalIgnoreCase))
96+
{
97+
var path = input[DockerArchivePrefix.Length..];
98+
if (string.IsNullOrWhiteSpace(path))
99+
{
100+
throw new ArgumentException($"Input with '{DockerArchivePrefix}' prefix must include a path.", nameof(input));
101+
}
102+
103+
return new ImageReference
104+
{
105+
OriginalInput = input,
106+
Reference = path,
107+
Kind = ImageReferenceKind.DockerArchive,
108+
};
109+
}
110+
111+
#pragma warning disable CA1308
112+
return new ImageReference
113+
{
114+
OriginalInput = input,
115+
Reference = input.ToLowerInvariant(),
116+
Kind = ImageReferenceKind.DockerImage,
117+
};
118+
#pragma warning restore CA1308
119+
}
120+
}

0 commit comments

Comments
 (0)