diff --git a/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs b/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs
index fe3122647..83eba3413 100644
--- a/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs
+++ b/src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs
@@ -28,6 +28,7 @@ namespace Microsoft.ComponentDetection.Common;
using System;
using System.Diagnostics.CodeAnalysis;
+using System.Text.RegularExpressions;
using Microsoft.ComponentDetection.Contracts;
using Microsoft.Extensions.Logging;
@@ -39,14 +40,29 @@ public static class DockerReferenceUtility
private const string LEGACYDEFAULTDOMAIN = "index.docker.io";
private const string OFFICIALREPOSITORYNAME = "library";
+ // Characters that only appear in an image reference as part of an unresolved templating
+ // token. '$', '{' and '}' cover shell / Helm / Go-template placeholders (e.g. ${VAR},
+ // {{ .Values.tag }}); '#' covers Azure DevOps and other token-replacement placeholders
+ // (e.g. #imageTag#) and is never valid in a resolved docker reference.
+ private static readonly char[] TemplateDelimiters = ['$', '{', '}', '#'];
+
+ // Matches token-replacement placeholders that wrap an identifier in double underscores,
+ // e.g. __IMAGE_TAG__ or __MCR_ENDPOINT__. Without this they parse as an uppercase repository
+ // name and surface as a noisy parse failure instead of being skipped as a templated value.
+ private static readonly Regex DoubleUnderscoreTokenRegex = new(@"__\w+__");
+
///
- /// Returns true if the reference contains unresolved variable placeholders (e.g., ${VAR}, {{ .Values.tag }}).
- /// Such references should be skipped before calling or .
+ /// Returns true if the reference contains unresolved variable or templating placeholders,
+ /// e.g. ${VAR}, {{ .Values.tag }}, #imageTag#, or __IMAGE_TAG__.
+ /// Such references are not real, resolvable images, so they should be skipped before calling
+ /// or and treated as
+ /// unresolved values rather than reported as parse failures.
///
/// The image reference string to check.
/// true if the reference contains variable placeholder characters; otherwise false.
public static bool HasUnresolvedVariables(string reference) =>
- reference.IndexOfAny(['$', '{', '}']) >= 0;
+ reference.IndexOfAny(TemplateDelimiters) >= 0 ||
+ DoubleUnderscoreTokenRegex.IsMatch(reference);
///
/// Attempts to parse an image reference string into a .
diff --git a/src/Microsoft.ComponentDetection.Detectors/dockercompose/DockerComposeComponentDetector.cs b/src/Microsoft.ComponentDetection.Detectors/dockercompose/DockerComposeComponentDetector.cs
index 8f2c857d1..ce2669795 100644
--- a/src/Microsoft.ComponentDetection.Detectors/dockercompose/DockerComposeComponentDetector.cs
+++ b/src/Microsoft.ComponentDetection.Detectors/dockercompose/DockerComposeComponentDetector.cs
@@ -40,7 +40,15 @@ public DockerComposeComponentDetector(
public override IEnumerable Categories => [nameof(DetectorClass.DockerCompose)];
- protected override async Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary detectorArgs, CancellationToken cancellationToken = default)
+ ///
+ /// Gets or sets a value indicating whether compose files are processed concurrently.
+ /// Each file is parsed independently into its own
+ /// and is stateless, so parsing is thread-safe and
+ /// scales across cores for repositories containing many compose files.
+ ///
+ protected override bool EnableParallelism { get; set; } = true;
+
+ protected override Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary detectorArgs, CancellationToken cancellationToken = default)
{
var singleFileComponentRecorder = processRequest.SingleFileComponentRecorder;
var file = processRequest.ComponentStream;
@@ -49,18 +57,18 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
{
this.Logger.LogInformation("Discovered Docker Compose file: {Location}", file.Location);
- string contents;
+ // Parse directly from the stream; the content is already buffered in memory by
+ // LazyComponentStream, so reading it into an intermediate string only adds an
+ // extra full-file allocation and GC pressure under parallel processing.
+ var yaml = new YamlStream();
using (var reader = new StreamReader(file.Stream))
{
- contents = await reader.ReadToEndAsync(cancellationToken);
+ yaml.Load(reader);
}
- var yaml = new YamlStream();
- yaml.Load(new StringReader(contents));
-
if (yaml.Documents.Count == 0)
{
- return;
+ return Task.CompletedTask;
}
foreach (var document in yaml.Documents)
@@ -75,6 +83,8 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
{
this.Logger.LogError(e, "Failed to parse Docker Compose file: {Location}", file.Location);
}
+
+ return Task.CompletedTask;
}
private static YamlMappingNode? GetMappingChild(YamlMappingNode parent, string key)
diff --git a/src/Microsoft.ComponentDetection.Detectors/helm/HelmComponentDetector.cs b/src/Microsoft.ComponentDetection.Detectors/helm/HelmComponentDetector.cs
index 9da078303..030f30069 100644
--- a/src/Microsoft.ComponentDetection.Detectors/helm/HelmComponentDetector.cs
+++ b/src/Microsoft.ComponentDetection.Detectors/helm/HelmComponentDetector.cs
@@ -17,6 +17,14 @@ namespace Microsoft.ComponentDetection.Detectors.Helm;
public class HelmComponentDetector : FileComponentDetector, IExperimentalDetector
{
+ ///
+ /// Maximum size (in bytes) of a values file the detector will parse. The "*values*" globs
+ /// can match large, non-Helm YAML files whose full-DOM parse dominates worst-case runtime;
+ /// files above this limit are skipped so a single pathological file cannot exhaust the
+ /// detector's time budget.
+ ///
+ private const long MaxValuesFileSizeBytes = 20 * 1024 * 1024; // 20 MB
+
public HelmComponentDetector(
IComponentStreamEnumerableFactory componentStreamEnumerableFactory,
IObservableDirectoryWalkerFactory walkerFactory,
@@ -41,6 +49,14 @@ public HelmComponentDetector(
public override IEnumerable Categories => [nameof(DetectorClass.Helm)];
+ ///
+ /// Gets or sets a value indicating whether values files are processed concurrently.
+ /// Each file is parsed independently into its own
+ /// and is stateless, so parsing is thread-safe and
+ /// scales across cores for repositories containing many charts.
+ ///
+ protected override bool EnableParallelism { get; set; } = true;
+
///
/// Pre-filters scan work to only values files co-located with a Chart.yaml/Chart.yml.
/// Materializes all matched files, identifies Helm chart directories, then filters.
@@ -65,7 +81,7 @@ protected override async Task> OnPrepareDetectionAsy
.ToObservable();
}
- protected override async Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary detectorArgs, CancellationToken cancellationToken = default)
+ protected override Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary detectorArgs, CancellationToken cancellationToken = default)
{
var file = processRequest.ComponentStream;
@@ -74,20 +90,34 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
// filename/directory checks are needed.
try
{
+ // Check the size before touching ComponentStream so an oversized file is never
+ // buffered into memory. The "*values*" globs can match large, non-Helm YAML files
+ // whose full-DOM parse is the main driver of worst-case (timeout) runtime.
+ var fileInfo = new FileInfo(file.Location);
+ if (fileInfo.Exists && fileInfo.Length > MaxValuesFileSizeBytes)
+ {
+ this.Logger.LogWarning(
+ "Skipping Helm values file exceeding size limit ({Length} bytes > {Limit} bytes): {Location}",
+ fileInfo.Length,
+ MaxValuesFileSizeBytes,
+ file.Location);
+ return Task.CompletedTask;
+ }
+
this.Logger.LogInformation("Discovered Helm values file: {Location}", file.Location);
- string contents;
+ // Parse directly from the stream; the content is already buffered in memory by
+ // LazyComponentStream, so reading it into an intermediate string only adds an
+ // extra full-file allocation and GC pressure under parallel processing.
+ var yaml = new YamlStream();
using (var reader = new StreamReader(file.Stream))
{
- contents = await reader.ReadToEndAsync(cancellationToken);
+ yaml.Load(reader);
}
- var yaml = new YamlStream();
- yaml.Load(new StringReader(contents));
-
if (yaml.Documents.Count == 0)
{
- return;
+ return Task.CompletedTask;
}
this.ExtractImageReferencesFromValues(yaml, processRequest.SingleFileComponentRecorder);
@@ -96,6 +126,8 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
{
this.Logger.LogError(e, "Failed to parse Helm file: {Location}", file.Location);
}
+
+ return Task.CompletedTask;
}
///
diff --git a/test/Microsoft.ComponentDetection.Common.Tests/DockerReferenceUtilityTests.cs b/test/Microsoft.ComponentDetection.Common.Tests/DockerReferenceUtilityTests.cs
index d50c65be0..902e80b9d 100644
--- a/test/Microsoft.ComponentDetection.Common.Tests/DockerReferenceUtilityTests.cs
+++ b/test/Microsoft.ComponentDetection.Common.Tests/DockerReferenceUtilityTests.cs
@@ -284,18 +284,66 @@ public void HasUnresolvedVariables_ReturnsTrueForBraces()
DockerReferenceUtility.HasUnresolvedVariables("{{ .Values.image }}").Should().BeTrue();
}
+ [TestMethod]
+ public void HasUnresolvedVariables_ReturnsTrueForDoubleUnderscoreTokens()
+ {
+ DockerReferenceUtility.HasUnresolvedVariables("__MCR_ENDPOINT__/aks/devinfra/helm3sample:__IMAGE_TAG__").Should().BeTrue();
+ }
+
+ [TestMethod]
+ public void HasUnresolvedVariables_ReturnsTrueForHashDelimitedTokens()
+ {
+ DockerReferenceUtility.HasUnresolvedVariables("#cs_containerRegistryLoginServerUrl#/coreservicesaksservice_#cs_aks_workloadName#_#cs_aks_serviceTrackIdentifier#/#serviceName#:#imageTag#").Should().BeTrue();
+ }
+
[TestMethod]
public void HasUnresolvedVariables_ReturnsFalseForPlainReference()
{
DockerReferenceUtility.HasUnresolvedVariables("docker.io/library/nginx:latest").Should().BeFalse();
}
+ [TestMethod]
+ public void HasUnresolvedVariables_ReturnsFalseForReferenceWithUnderscores()
+ {
+ DockerReferenceUtility.HasUnresolvedVariables("mcr.microsoft.com/some_repo/my_image:1.0").Should().BeFalse();
+ }
+
[TestMethod]
public void TryParseImageReference_ReturnsNullForUnresolvedVariables()
{
DockerReferenceUtility.TryParseImageReference("${IMAGE}:latest").Should().BeNull();
}
+ [TestMethod]
+ public void TryParseImageReference_ReturnsNullForDoubleUnderscoreTokens()
+ {
+ DockerReferenceUtility.TryParseImageReference("__MCR_ENDPOINT__/aks/devinfra/helm3sample:__IMAGE_TAG__").Should().BeNull();
+ }
+
+ [TestMethod]
+ public void TryParseImageReference_ReturnsNullForHashDelimitedTokens()
+ {
+ DockerReferenceUtility.TryParseImageReference("#cs_containerRegistryLoginServerUrl#/svc/#serviceName#:#imageTag#").Should().BeNull();
+ }
+
+ [TestMethod]
+ public void TryParseImageReference_DoesNotLogWarningForTemplatedReference()
+ {
+ var logger = new Mock();
+
+ var result = DockerReferenceUtility.TryParseImageReference("__MCR_ENDPOINT__/aks/devinfra/helm3sample:__IMAGE_TAG__", logger.Object);
+
+ result.Should().BeNull();
+ logger.Verify(
+ l => l.Log(
+ It.IsAny(),
+ It.IsAny(),
+ It.IsAny(),
+ It.IsAny(),
+ It.IsAny>()),
+ Times.Never);
+ }
+
[TestMethod]
public void TryParseImageReference_ReturnsNullForInvalidReference()
{