Skip to content

Commit b06a7d2

Browse files
Aayush MainiCopilot
andcommitted
Reconcile bare/rich component identities in ComponentsFound
Within a single detector, components registered under bare Ids (no DownloadUrl/SourceUrl) are now merged into their rich counterparts sharing the same BaseId. Changes: - ComponentRecorder.GetDetectedComponents(): group by BaseId, merge bare metadata (licenses, suppliers, containers) into all rich entries - DefaultGraphTranslationService.GatherSetOfDetectedComponentsUnmerged(): extend graph lookup to match on BaseId so rich components absorb graph data (roots, ancestors, devDep, scope, file paths) from bare-Id graphs - Tests for both reconciliation levels Addresses work item #2372676. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent af0cff9 commit b06a7d2

File tree

4 files changed

+400
-42
lines changed

4 files changed

+400
-42
lines changed

src/Microsoft.ComponentDetection.Common/DependencyGraph/ComponentRecorder.cs

Lines changed: 119 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -34,56 +34,142 @@ public TypedComponent GetComponent(string componentId)
3434

3535
public IEnumerable<DetectedComponent> GetDetectedComponents()
3636
{
37-
IEnumerable<DetectedComponent> detectedComponents;
3837
if (this.singleFileRecorders == null)
3938
{
4039
return [];
4140
}
4241

43-
detectedComponents = this.singleFileRecorders.Values
44-
.SelectMany(singleFileRecorder => singleFileRecorder.GetDetectedComponents().Values)
45-
.GroupBy(x => x.Component.Id)
46-
.Select(grouping =>
47-
{
48-
// We pick a winner here -- any stateful props could get lost at this point.
49-
var winningDetectedComponent = grouping.First();
42+
var allComponents = this.singleFileRecorders.Values
43+
.SelectMany(singleFileRecorder => singleFileRecorder.GetDetectedComponents().Values);
5044

51-
HashSet<string> mergedLicenses = null;
52-
HashSet<ActorInfo> mergedSuppliers = null;
45+
// When both rich and bare entries exist for the same BaseId, rich entries are used as merge targets for bare entries.
46+
var reconciledComponents = new List<DetectedComponent>();
5347

54-
foreach (var component in grouping.Skip(1))
55-
{
56-
winningDetectedComponent.ContainerDetailIds.UnionWith(component.ContainerDetailIds);
48+
foreach (var baseIdGroup in allComponents.GroupBy(x => x.Component.BaseId))
49+
{
50+
var richEntries = new List<DetectedComponent>();
51+
var bareEntries = new List<DetectedComponent>();
5752

58-
// Defensive: merge in case different file recorders set different values for the same component.
59-
if (component.LicensesConcluded != null)
60-
{
61-
mergedLicenses ??= new HashSet<string>(winningDetectedComponent.LicensesConcluded ?? [], StringComparer.OrdinalIgnoreCase);
62-
mergedLicenses.UnionWith(component.LicensesConcluded);
63-
}
53+
// Sub-group by full Id first: merge duplicates of the same Id (existing behavior).
54+
foreach (var idGroup in baseIdGroup.GroupBy(x => x.Component.Id))
55+
{
56+
var merged = MergeDetectedComponentGroup(idGroup);
6457

65-
if (component.Suppliers != null)
66-
{
67-
mergedSuppliers ??= new HashSet<ActorInfo>(winningDetectedComponent.Suppliers ?? []);
68-
mergedSuppliers.UnionWith(component.Suppliers);
69-
}
58+
if (merged.Component.Id == merged.Component.BaseId)
59+
{
60+
bareEntries.Add(merged);
7061
}
71-
72-
if (mergedLicenses != null)
62+
else
7363
{
74-
winningDetectedComponent.LicensesConcluded = mergedLicenses.Where(x => x != null).OrderBy(x => x, StringComparer.OrdinalIgnoreCase).ToList();
64+
richEntries.Add(merged);
7565
}
66+
}
7667

77-
if (mergedSuppliers != null)
68+
if (richEntries.Count > 0 && bareEntries.Count > 0)
69+
{
70+
// Merge each bare entry's metadata into every rich entry, then drop the bare.
71+
foreach (var bare in bareEntries)
7872
{
79-
winningDetectedComponent.Suppliers = mergedSuppliers.Where(s => s != null).OrderBy(s => s.Name).ThenBy(s => s.Type).ToList();
73+
foreach (var rich in richEntries)
74+
{
75+
MergeComponentMetadata(source: bare, target: rich);
76+
}
8077
}
8178

82-
return winningDetectedComponent;
83-
})
84-
.ToArray();
79+
reconciledComponents.AddRange(richEntries);
80+
}
81+
else
82+
{
83+
// No conflict: either all rich (different Ids kept separate) or all bare.
84+
reconciledComponents.AddRange(richEntries);
85+
reconciledComponents.AddRange(bareEntries);
86+
}
87+
}
88+
89+
return reconciledComponents.ToArray();
90+
}
91+
92+
/// <summary>
93+
/// Merges component-level metadata from <paramref name="source"/> into <paramref name="target"/>.
94+
/// </summary>
95+
private static void MergeComponentMetadata(DetectedComponent source, DetectedComponent target)
96+
{
97+
target.ContainerDetailIds.UnionWith(source.ContainerDetailIds);
98+
99+
foreach (var kvp in source.ContainerLayerIds)
100+
{
101+
if (target.ContainerLayerIds.TryGetValue(kvp.Key, out var existingLayers))
102+
{
103+
target.ContainerLayerIds[kvp.Key] = existingLayers.Union(kvp.Value).Distinct().ToList();
104+
}
105+
else
106+
{
107+
target.ContainerLayerIds[kvp.Key] = kvp.Value.ToList();
108+
}
109+
}
110+
111+
target.LicensesConcluded = MergeAndNormalizeLicenses(target.LicensesConcluded, source.LicensesConcluded);
112+
target.Suppliers = MergeAndNormalizeSuppliers(target.Suppliers, source.Suppliers);
113+
}
114+
115+
private static IList<string> MergeAndNormalizeLicenses(IList<string> target, IList<string> source)
116+
{
117+
if (target == null && source == null)
118+
{
119+
return null;
120+
}
121+
122+
var merged = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
123+
124+
if (target != null)
125+
{
126+
merged.UnionWith(target.Where(x => x != null));
127+
}
128+
129+
if (source != null)
130+
{
131+
merged.UnionWith(source.Where(x => x != null));
132+
}
133+
134+
return merged.OrderBy(x => x, StringComparer.OrdinalIgnoreCase).ToList();
135+
}
136+
137+
private static IList<ActorInfo> MergeAndNormalizeSuppliers(IList<ActorInfo> target, IList<ActorInfo> source)
138+
{
139+
if (target == null && source == null)
140+
{
141+
return null;
142+
}
143+
144+
var merged = new HashSet<ActorInfo>();
145+
146+
if (target != null)
147+
{
148+
merged.UnionWith(target.Where(s => s != null));
149+
}
150+
151+
if (source != null)
152+
{
153+
merged.UnionWith(source.Where(s => s != null));
154+
}
155+
156+
return merged.OrderBy(s => s.Name).ThenBy(s => s.Type).ToList();
157+
}
158+
159+
/// <summary>
160+
/// Merges a group of <see cref="DetectedComponent"/>s that share the same <see cref="TypedComponent.Id"/>
161+
/// into a single entry.
162+
/// </summary>
163+
private static DetectedComponent MergeDetectedComponentGroup(IEnumerable<DetectedComponent> grouping)
164+
{
165+
var winner = grouping.First();
166+
167+
foreach (var component in grouping.Skip(1))
168+
{
169+
MergeComponentMetadata(source: component, target: winner);
170+
}
85171

86-
return detectedComponents;
172+
return winner;
87173
}
88174

89175
public IEnumerable<string> GetSkippedComponents()

src/Microsoft.ComponentDetection.Orchestrator/Services/GraphTranslation/DefaultGraphTranslationService.cs

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,16 @@ private static ConcurrentHashSet<string> MergeTargetFrameworks(ConcurrentHashSet
7171
return left;
7272
}
7373

74+
/// <summary>
75+
/// Checks whether the graph contains the component by its full Id, or by its BaseId
76+
/// when the component is a rich entry (Id != BaseId) whose bare counterpart was registered in the graph.
77+
/// </summary>
78+
private static bool GraphContainsComponent(IDependencyGraph graph, TypedComponent component)
79+
{
80+
return graph.Contains(component.Id) ||
81+
(component.Id != component.BaseId && graph.Contains(component.BaseId));
82+
}
83+
7484
private void LogComponentScopeTelemetry(List<DetectedComponent> components)
7585
{
7686
using var record = new DetectedComponentScopeRecord();
@@ -126,25 +136,30 @@ private IEnumerable<DetectedComponent> GatherSetOfDetectedComponentsUnmerged(IEn
126136
}
127137

128138
// Information about each component is relative to all of the graphs it is present in, so we take all graphs containing a given component and apply the graph data.
129-
foreach (var graphKvp in dependencyGraphsByLocation.Where(x => x.Value.Contains(component.Component.Id)))
139+
foreach (var graphKvp in dependencyGraphsByLocation.Where(x => GraphContainsComponent(x.Value, component.Component)))
130140
{
131141
var location = graphKvp.Key;
132142
var dependencyGraph = graphKvp.Value;
133143

144+
// Determine the Id stored in this graph — may be the rich Id or the bare BaseId.
145+
var graphComponentId = dependencyGraph.Contains(component.Component.Id)
146+
? component.Component.Id
147+
: component.Component.BaseId;
148+
134149
// Calculate roots of the component
135150
var rootStartTime = DateTime.UtcNow;
136-
this.AddRootsToDetectedComponent(component, dependencyGraph, componentRecorder);
151+
this.AddRootsToDetectedComponent(component, graphComponentId, dependencyGraph, componentRecorder);
137152
var rootEndTime = DateTime.UtcNow;
138153
totalTimeToAddRoots += rootEndTime - rootStartTime;
139154

140155
// Calculate Ancestors of the component
141156
var ancestorStartTime = DateTime.UtcNow;
142-
this.AddAncestorsToDetectedComponent(component, dependencyGraph, componentRecorder);
157+
this.AddAncestorsToDetectedComponent(component, graphComponentId, dependencyGraph, componentRecorder);
143158
var ancestorEndTime = DateTime.UtcNow;
144159
totalTimeToAddAncestors += ancestorEndTime - ancestorStartTime;
145160

146-
component.DevelopmentDependency = this.MergeDevDependency(component.DevelopmentDependency, dependencyGraph.IsDevelopmentDependency(component.Component.Id));
147-
component.DependencyScope = DependencyScopeComparer.GetMergedDependencyScope(component.DependencyScope, dependencyGraph.GetDependencyScope(component.Component.Id));
161+
component.DevelopmentDependency = this.MergeDevDependency(component.DevelopmentDependency, dependencyGraph.IsDevelopmentDependency(graphComponentId));
162+
component.DependencyScope = DependencyScopeComparer.GetMergedDependencyScope(component.DependencyScope, dependencyGraph.GetDependencyScope(graphComponentId));
148163
component.DetectedBy = detector;
149164

150165
// Experiments uses this service to build the dependency graph for analysis. In this case, we do not want to update the locations of the component.
@@ -269,26 +284,26 @@ private DetectedComponent MergeComponents(IEnumerable<DetectedComponent> enumera
269284
return firstComponent;
270285
}
271286

272-
private void AddRootsToDetectedComponent(DetectedComponent detectedComponent, IDependencyGraph dependencyGraph, IComponentRecorder componentRecorder)
287+
private void AddRootsToDetectedComponent(DetectedComponent detectedComponent, string graphComponentId, IDependencyGraph dependencyGraph, IComponentRecorder componentRecorder)
273288
{
274289
detectedComponent.DependencyRoots ??= new HashSet<TypedComponent>(new ComponentComparer());
275290
if (dependencyGraph == null)
276291
{
277292
return;
278293
}
279294

280-
detectedComponent.DependencyRoots.UnionWith(dependencyGraph.GetRootsAsTypedComponents(detectedComponent.Component.Id, componentRecorder.GetComponent));
295+
detectedComponent.DependencyRoots.UnionWith(dependencyGraph.GetRootsAsTypedComponents(graphComponentId, componentRecorder.GetComponent));
281296
}
282297

283-
private void AddAncestorsToDetectedComponent(DetectedComponent detectedComponent, IDependencyGraph dependencyGraph, IComponentRecorder componentRecorder)
298+
private void AddAncestorsToDetectedComponent(DetectedComponent detectedComponent, string graphComponentId, IDependencyGraph dependencyGraph, IComponentRecorder componentRecorder)
284299
{
285300
detectedComponent.AncestralDependencyRoots ??= new HashSet<TypedComponent>(new ComponentComparer());
286301
if (dependencyGraph == null)
287302
{
288303
return;
289304
}
290305

291-
detectedComponent.AncestralDependencyRoots.UnionWith(dependencyGraph.GetAncestorsAsTypedComponents(detectedComponent.Component.Id, componentRecorder.GetComponent));
306+
detectedComponent.AncestralDependencyRoots.UnionWith(dependencyGraph.GetAncestorsAsTypedComponents(graphComponentId, componentRecorder.GetComponent));
292307
}
293308

294309
private HashSet<string> MakeFilePathsRelative(ILogger logger, DirectoryInfo rootDirectory, HashSet<string> filePaths)

test/Microsoft.ComponentDetection.Common.Tests/ComponentRecorderTests.cs

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,4 +428,136 @@ public void GetAllDependencyGraphs_ReturnedGraphsAreImmutable()
428428
Action attemptedAdd = () => asCollection.Add("should't work");
429429
attemptedAdd.Should().Throw<NotSupportedException>();
430430
}
431+
432+
[TestMethod]
433+
public void GetDetectedComponents_BareAndRichAcrossFiles_BareSubsumedIntoRich()
434+
{
435+
var recorder1 = this.componentRecorder.CreateSingleFileComponentRecorder("package.json");
436+
var recorder2 = this.componentRecorder.CreateSingleFileComponentRecorder("package-lock.json");
437+
438+
var bareComponent = new DetectedComponent(new NpmComponent("lodash", "4.17.23"));
439+
var richComponent = new DetectedComponent(new NpmComponent("lodash", "4.17.23") { DownloadUrl = new Uri("https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz") });
440+
441+
recorder1.RegisterUsage(bareComponent);
442+
recorder2.RegisterUsage(richComponent);
443+
444+
var results = this.componentRecorder.GetDetectedComponents().ToList();
445+
446+
// Only the rich entry should remain
447+
results.Should().ContainSingle();
448+
results[0].Component.Id.Should().Be(richComponent.Component.Id);
449+
results[0].Component.Id.Should().NotBe(bareComponent.Component.Id);
450+
}
451+
452+
[TestMethod]
453+
public void GetDetectedComponents_BareAndMultipleRichAcrossFiles_BareMergesIntoAllRich()
454+
{
455+
var recorder1 = this.componentRecorder.CreateSingleFileComponentRecorder("package.json");
456+
var recorder2 = this.componentRecorder.CreateSingleFileComponentRecorder("lockfile-a.json");
457+
var recorder3 = this.componentRecorder.CreateSingleFileComponentRecorder("lockfile-b.json");
458+
459+
var bareComponent = new DetectedComponent(new NpmComponent("lodash", "4.17.23"))
460+
{
461+
LicensesConcluded = ["MIT"],
462+
};
463+
464+
var richA = new DetectedComponent(new NpmComponent("lodash", "4.17.23") { DownloadUrl = new Uri("https://registry-a.example.com/lodash-4.17.23.tgz") });
465+
var richB = new DetectedComponent(new NpmComponent("lodash", "4.17.23") { DownloadUrl = new Uri("https://registry-b.example.com/lodash-4.17.23.tgz") });
466+
467+
recorder1.RegisterUsage(bareComponent);
468+
recorder2.RegisterUsage(richA);
469+
recorder3.RegisterUsage(richB);
470+
471+
var results = this.componentRecorder.GetDetectedComponents().ToList();
472+
473+
// Two rich entries, bare dropped
474+
results.Should().HaveCount(2);
475+
results.Should().NotContain(c => c.Component.Id == bareComponent.Component.Id);
476+
477+
// Bare's license merged into both rich entries
478+
results.Should().OnlyContain(c => c.LicensesConcluded != null && c.LicensesConcluded.Contains("MIT"));
479+
}
480+
481+
[TestMethod]
482+
public void GetDetectedComponents_TwoRichDifferentUrls_BothKeptSeparate()
483+
{
484+
var recorder1 = this.componentRecorder.CreateSingleFileComponentRecorder("lockfile-a.json");
485+
var recorder2 = this.componentRecorder.CreateSingleFileComponentRecorder("lockfile-b.json");
486+
487+
var richA = new DetectedComponent(new NpmComponent("lodash", "4.17.23") { DownloadUrl = new Uri("https://registry-a.example.com/lodash-4.17.23.tgz") });
488+
var richB = new DetectedComponent(new NpmComponent("lodash", "4.17.23") { DownloadUrl = new Uri("https://registry-b.example.com/lodash-4.17.23.tgz") });
489+
490+
recorder1.RegisterUsage(richA);
491+
recorder2.RegisterUsage(richB);
492+
493+
var results = this.componentRecorder.GetDetectedComponents().ToList();
494+
495+
results.Should().HaveCount(2);
496+
results.Should().Contain(c => c.Component.Id == richA.Component.Id);
497+
results.Should().Contain(c => c.Component.Id == richB.Component.Id);
498+
}
499+
500+
[TestMethod]
501+
public void GetDetectedComponents_BareOnlyAcrossFiles_MergesIntoSingleBare()
502+
{
503+
var recorder1 = this.componentRecorder.CreateSingleFileComponentRecorder("package.json");
504+
var recorder2 = this.componentRecorder.CreateSingleFileComponentRecorder("other-package.json");
505+
506+
var bare1 = new DetectedComponent(new NpmComponent("lodash", "4.17.23"))
507+
{
508+
LicensesConcluded = ["MIT"],
509+
};
510+
511+
var bare2 = new DetectedComponent(new NpmComponent("lodash", "4.17.23"))
512+
{
513+
LicensesConcluded = ["Apache-2.0"],
514+
};
515+
516+
recorder1.RegisterUsage(bare1);
517+
recorder2.RegisterUsage(bare2);
518+
519+
var results = this.componentRecorder.GetDetectedComponents().ToList();
520+
521+
// Same Id → merged into one
522+
results.Should().ContainSingle();
523+
results[0].LicensesConcluded.Should().Contain("MIT");
524+
results[0].LicensesConcluded.Should().Contain("Apache-2.0");
525+
}
526+
527+
[TestMethod]
528+
public void GetDetectedComponents_BareAndRich_MetadataMergedCorrectly()
529+
{
530+
var recorder1 = this.componentRecorder.CreateSingleFileComponentRecorder("package.json");
531+
var recorder2 = this.componentRecorder.CreateSingleFileComponentRecorder("package-lock.json");
532+
533+
var bareComponent = new DetectedComponent(new NpmComponent("lodash", "4.17.23"))
534+
{
535+
LicensesConcluded = ["MIT"],
536+
Suppliers = [new ActorInfo { Name = "Lodash Team", Type = "Organization" }],
537+
};
538+
539+
var richComponent = new DetectedComponent(new NpmComponent("lodash", "4.17.23") { DownloadUrl = new Uri("https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz") })
540+
{
541+
LicensesConcluded = ["Apache-2.0"],
542+
Suppliers = [new ActorInfo { Name = "Contoso", Type = "Organization" }],
543+
};
544+
545+
recorder1.RegisterUsage(bareComponent);
546+
recorder2.RegisterUsage(richComponent);
547+
548+
var results = this.componentRecorder.GetDetectedComponents().ToList();
549+
550+
results.Should().ContainSingle();
551+
var result = results[0];
552+
553+
// Licenses from both bare and rich should be merged
554+
result.LicensesConcluded.Should().HaveCount(2);
555+
result.LicensesConcluded.Should().Contain("Apache-2.0");
556+
result.LicensesConcluded.Should().Contain("MIT");
557+
558+
// Suppliers from both should be merged
559+
result.Suppliers.Should().HaveCount(2);
560+
result.Suppliers.Should().Contain(s => s.Name == "Lodash Team");
561+
result.Suppliers.Should().Contain(s => s.Name == "Contoso");
562+
}
431563
}

0 commit comments

Comments
 (0)