-
Notifications
You must be signed in to change notification settings - Fork 130
Expand file tree
/
Copy pathMavenWithFallbackDetector.cs
More file actions
1381 lines (1205 loc) · 60.6 KB
/
Copy pathMavenWithFallbackDetector.cs
File metadata and controls
1381 lines (1205 loc) · 60.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#nullable disable
namespace Microsoft.ComponentDetection.Detectors.Maven;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Reactive.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using System.Threading.Tasks.Dataflow;
using System.Xml;
using Microsoft.ComponentDetection.Common;
using Microsoft.ComponentDetection.Contracts;
using Microsoft.ComponentDetection.Contracts.Internal;
using Microsoft.ComponentDetection.Contracts.TypedComponent;
using Microsoft.Extensions.Logging;
/// <summary>
/// Enum representing which detection method was used.
/// </summary>
internal enum MavenDetectionMethod
{
/// <summary>No detection performed.</summary>
None,
/// <summary>MvnCli was used successfully for all files.</summary>
MvnCliOnly,
/// <summary>Static parser was used for all files (MvnCli not available or failed completely).</summary>
StaticParserOnly,
/// <summary>MvnCli succeeded for some files, static parser used for failed files.</summary>
Mixed,
}
/// <summary>
/// Enum representing why fallback occurred.
/// </summary>
internal enum MavenFallbackReason
{
/// <summary>No fallback was needed.</summary>
None,
/// <summary>Maven CLI was explicitly disabled via the CD_MAVEN_DISABLE_CLI environment variable.</summary>
MvnCliDisabledByUser,
/// <summary>Maven CLI was not available in PATH.</summary>
MavenCliNotAvailable,
/// <summary>MvnCli failed due to authentication error (401/403).</summary>
AuthenticationFailure,
/// <summary>MvnCli failed due to other reasons.</summary>
OtherMvnCliFailure,
}
/// <summary>
/// Maven detector that combines MvnCli detection with static pom.xml parsing fallback.
/// Runs MvnCli detection first, then checks if detection produced any results.
/// If MvnCli fails for any pom.xml, falls back to static parsing for failed files.
/// </summary>
public class MvnCliComponentDetector : FileComponentDetector
{
/// <summary>
/// Environment variable to disable MvnCli and use only static pom.xml parsing.
/// Set to "true" to disable MvnCli detection.
/// Usage: Set CD_MAVEN_DISABLE_CLI=true as a pipeline/environment variable.
/// </summary>
internal const string DisableMvnCliEnvVar = "CD_MAVEN_DISABLE_CLI";
private const string MavenManifest = "pom.xml";
private const string MavenXmlNamespace = "http://maven.apache.org/POM/4.0.0";
private const string ProjNamespace = "proj";
private const string DependencyNode = "//proj:dependency";
private const string GroupIdSelector = "groupId";
private const string ArtifactIdSelector = "artifactId";
private const string VersionSelector = "version";
private static readonly Regex VersionRegex = new(
@"^\$\{(.*)\}$",
RegexOptions.Compiled | RegexOptions.IgnoreCase);
// Auth error patterns to detect in Maven error output
private static readonly string[] AuthErrorPatterns =
[
"401",
"403",
"Unauthorized",
"Access denied",
];
// Pattern to initially extract URLs from Maven error messages.
// Matched values are subsequently normalized (scheme+host+port only) before
// being stored in logs or telemetry to avoid leaking credentials or tokens.
private static readonly Regex EndpointRegex = new(
@"https?://[^\s\]\)>]+",
RegexOptions.Compiled | RegexOptions.IgnoreCase);
/// <summary>
/// Maximum time allowed for the OnPrepareDetectionAsync phase.
/// This is a safety guardrail to prevent hangs.
/// Most repos should complete the full Maven CLI scan within this window.
/// </summary>
private static readonly TimeSpan PrepareDetectionTimeout = TimeSpan.FromMinutes(5);
private readonly IMavenCommandService mavenCommandService;
private readonly IEnvironmentVariableService envVarService;
private readonly IFileUtilityService fileUtilityService;
// Two-pass static parsing: collect variables first, then resolve components
private readonly ConcurrentDictionary<string, string> collectedVariables = new();
private readonly ConcurrentQueue<PendingComponent> pendingComponents = new();
// Track Maven parent-child relationships for proper variable resolution
private readonly ConcurrentDictionary<string, string> mavenParentChildRelationships = new();
// Track processed Maven projects by coordinates (groupId:artifactId -> file path)
private readonly ConcurrentDictionary<string, string> processedMavenProjects = new();
// Track files that couldn't establish parent relationships during first pass (for second pass re-evaluation)
private readonly ConcurrentQueue<(string FilePath, string ParentGroupId, string ParentArtifactId)> unresolvedParentRelationships = new();
// Track original pom.xml files for potential fallback
private readonly ConcurrentQueue<ProcessRequest> originalPomFiles = [];
// Track Maven CLI errors for analysis
private readonly ConcurrentQueue<string> mavenCliErrors = [];
private readonly ConcurrentQueue<string> failedEndpoints = [];
/// <summary>
/// Cache for parent POM lookups to avoid repeated file system operations.
/// Key: current file path, Value: parent POM path or empty string if not found.
/// </summary>
private readonly ConcurrentDictionary<string, string> parentPomCache = new();
// Telemetry tracking
private MavenDetectionMethod usedDetectionMethod = MavenDetectionMethod.None;
private MavenFallbackReason fallbackReason = MavenFallbackReason.None;
private int mvnCliComponentCount;
private int staticParserComponentCount;
private int unresolvedVariableCount;
private int pendingComponentCountBeforeResolution;
private bool mavenCliAvailable;
public MvnCliComponentDetector(
IComponentStreamEnumerableFactory componentStreamEnumerableFactory,
IObservableDirectoryWalkerFactory walkerFactory,
IMavenCommandService mavenCommandService,
IEnvironmentVariableService envVarService,
IFileUtilityService fileUtilityService,
ILogger<MvnCliComponentDetector> logger)
{
this.ComponentStreamEnumerableFactory = componentStreamEnumerableFactory;
this.Scanner = walkerFactory;
this.mavenCommandService = mavenCommandService;
this.envVarService = envVarService;
this.fileUtilityService = fileUtilityService;
this.Logger = logger;
}
public override string Id => MavenConstants.MvnCliDetectorId;
public override IList<string> SearchPatterns => [MavenManifest];
public override IEnumerable<ComponentType> SupportedComponentTypes => [ComponentType.Maven];
public override int Version => 5;
public override IEnumerable<string> Categories => [Enum.GetName(typeof(DetectorClass), DetectorClass.Maven)];
// Normalizes a directory path by ensuring it ends with a directory separator.
// This prevents false matches like "C:\foo" matching "C:\foobar".
private static string NormalizeDirectoryPath(string path)
{
if (string.IsNullOrEmpty(path))
{
return path;
}
var lastChar = path[^1];
return lastChar == Path.DirectorySeparatorChar || lastChar == Path.AltDirectorySeparatorChar
? path
: path + Path.DirectorySeparatorChar;
}
private static bool IsAuthenticationError(string errorMessage)
{
if (string.IsNullOrWhiteSpace(errorMessage))
{
return false;
}
// Use ReadOnlySpan for more efficient string searching
var messageSpan = errorMessage.AsSpan();
foreach (var pattern in AuthErrorPatterns)
{
if (messageSpan.Contains(pattern, StringComparison.OrdinalIgnoreCase))
{
return true;
}
}
return false;
}
/// <summary>
/// Normalizes a raw URL string to scheme+host+port only, stripping any
/// userinfo (credentials), path, query string, and fragment that may
/// appear in Maven error messages and could contain sensitive tokens.
/// Returns <see langword="null"/> when the input is not a well-formed
/// absolute URI with an http/https scheme.
/// </summary>
private static string NormalizeEndpointUrl(string rawUrl)
{
if (!Uri.TryCreate(rawUrl, UriKind.Absolute, out var uri))
{
return null;
}
// Only accept http/https — the regex already enforces this but be explicit.
if (uri.Scheme is not "http" and not "https")
{
return null;
}
// Reconstruct scheme://host[:port] explicitly, omitting UserInfo (credentials),
// path, query, and fragment. Uri.GetLeftPart(UriPartial.Authority) preserves
// UserInfo, so we cannot use it here.
var port = uri.IsDefaultPort ? string.Empty : $":{uri.Port}";
return $"{uri.Scheme}://{uri.Host}{port}";
}
private void LogDebugWithId(string message) =>
this.Logger.LogDebug("{DetectorId}: {Message}", this.Id, message);
private void LogWarning(string message) =>
this.Logger.LogWarning("{DetectorId}: {Message}", this.Id, message);
/// <summary>
/// Resets all per-scan state to prevent stale data from leaking between scans.
/// This is critical because detectors are registered as singletons.
/// </summary>
private void ResetScanState()
{
// Clear all concurrent collections
this.collectedVariables.Clear();
this.mavenParentChildRelationships.Clear();
this.processedMavenProjects.Clear();
this.parentPomCache.Clear();
// Drain all concurrent queues
while (this.pendingComponents.TryDequeue(out _))
{
// Intentionally empty - just draining the queue
}
while (this.unresolvedParentRelationships.TryDequeue(out _))
{
// Intentionally empty - just draining the queue
}
while (this.originalPomFiles.TryDequeue(out _))
{
// Intentionally empty - just draining the queue
}
while (this.mavenCliErrors.TryDequeue(out _))
{
// Intentionally empty - just draining the queue
}
while (this.failedEndpoints.TryDequeue(out _))
{
// Intentionally empty - just draining the queue
}
// Reset telemetry counters and flags
this.usedDetectionMethod = MavenDetectionMethod.None;
this.fallbackReason = MavenFallbackReason.None;
this.mvnCliComponentCount = 0;
this.staticParserComponentCount = 0;
this.unresolvedVariableCount = 0;
this.pendingComponentCountBeforeResolution = 0;
this.mavenCliAvailable = false;
}
protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(
IObservable<ProcessRequest> processRequests,
IDictionary<string, string> detectorArgs,
CancellationToken cancellationToken = default)
{
// Reset all per-scan state to prevent stale data from previous scans
// This is critical because detectors are registered as singletons
this.ResetScanState();
// Wrap the entire method in a try-catch with timeout to protect against hangs.
// OnPrepareDetectionAsync doesn't have the same guardrails as OnFileFoundAsync,
// so we need to be extra careful here.
try
{
using var timeoutCts = new CancellationTokenSource(PrepareDetectionTimeout);
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken, timeoutCts.Token);
return await this.OnPrepareDetectionCoreAsync(processRequests, linkedCts.Token);
}
catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested)
{
// Timeout occurred (not user cancellation)
this.LogWarning($"OnPrepareDetectionAsync timed out after {PrepareDetectionTimeout.TotalMinutes} minutes. Falling back to static pom.xml parsing.");
this.Telemetry["TimedOut"] = "true";
this.fallbackReason = MavenFallbackReason.OtherMvnCliFailure;
this.usedDetectionMethod = MavenDetectionMethod.Mixed;
return processRequests;
}
catch (Exception ex)
{
// Unexpected error - log and fall back to static parsing
this.LogWarning($"OnPrepareDetectionAsync failed with unexpected error: {ex.Message}. Falling back to static pom.xml parsing.");
this.Telemetry["PrepareDetectionError"] = ex.GetType().Name;
this.fallbackReason = MavenFallbackReason.OtherMvnCliFailure;
this.usedDetectionMethod = MavenDetectionMethod.Mixed;
return processRequests;
}
}
/// <summary>
/// Core implementation of OnPrepareDetectionAsync, called within the timeout wrapper.
/// </summary>
private async Task<IObservable<ProcessRequest>> OnPrepareDetectionCoreAsync(
IObservable<ProcessRequest> processRequests,
CancellationToken cancellationToken)
{
// Check if we should skip Maven CLI and use static parsing only
if (this.ShouldSkipMavenCli())
{
return processRequests;
}
// Check if Maven CLI is available
if (!await this.TryInitializeMavenCliAsync())
{
return processRequests;
}
// Create per-scan dictionary to track nested pom.xml mappings
// This prevents state accumulation across scans since detectors are singletons
var parentPomDictionary = new ConcurrentDictionary<string, IList<ProcessRequest>>(StringComparer.OrdinalIgnoreCase);
// Run Maven CLI detection on all pom.xml files
// Returns deps files for CLI successes, pom.xml files for CLI failures
return await this.RunMavenCliDetectionAsync(processRequests, parentPomDictionary, cancellationToken);
}
/// <summary>
/// Checks if Maven CLI should be skipped due to environment variable configuration.
/// </summary>
/// <returns>True if Maven CLI should be skipped; otherwise, false.</returns>
private bool ShouldSkipMavenCli()
{
if (this.envVarService.IsEnvironmentVariableValueTrue(DisableMvnCliEnvVar))
{
this.LogDebugWithId($"MvnCli detection disabled via {DisableMvnCliEnvVar} environment variable. Using static pom.xml parsing only.");
this.usedDetectionMethod = MavenDetectionMethod.StaticParserOnly;
this.fallbackReason = MavenFallbackReason.MvnCliDisabledByUser;
this.mavenCliAvailable = false;
return true;
}
return false;
}
/// <summary>
/// Checks if Maven CLI is available.
/// </summary>
/// <returns>True if Maven CLI is available; otherwise, false.</returns>
private async Task<bool> TryInitializeMavenCliAsync()
{
this.mavenCliAvailable = await this.mavenCommandService.MavenCLIExistsAsync();
if (!this.mavenCliAvailable)
{
this.LogDebugWithId("Maven CLI not found in PATH. Will use static pom.xml parsing only.");
this.usedDetectionMethod = MavenDetectionMethod.StaticParserOnly;
this.fallbackReason = MavenFallbackReason.MavenCliNotAvailable;
return false;
}
this.LogDebugWithId("Maven CLI is available. Running MvnCli detection.");
return true;
}
/// <summary>
/// Runs Maven CLI detection on all root pom.xml files.
/// For each pom.xml, if CLI succeeds, the deps file is added to results.
/// If CLI fails, all pom.xml files under that directory are added for static parsing fallback.
/// </summary>
/// <param name="processRequests">The incoming process requests.</param>
/// <param name="parentPomDictionary">Dictionary to track nested pom.xml mappings for fallback scenarios.</param>
/// <param name="cancellationToken">Cancellation token for the operation.</param>
/// <returns>An observable of process requests (deps files for CLI success, pom.xml for CLI failure).</returns>
private async Task<IObservable<ProcessRequest>> RunMavenCliDetectionAsync(
IObservable<ProcessRequest> processRequests,
ConcurrentDictionary<string, IList<ProcessRequest>> parentPomDictionary,
CancellationToken cancellationToken)
{
var results = new ConcurrentQueue<ProcessRequest>();
var failedDirectories = new ConcurrentQueue<string>();
var cliSuccessCount = 0;
var cliFailureCount = 0;
// Process pom.xml files sequentially to avoid Maven local repository lock contention and
// reduces memory pressure from concurrent Maven JVM processes.
var processPomFile = new ActionBlock<ProcessRequest>(
async processRequest =>
{
// Check for cancellation before processing each pom.xml
cancellationToken.ThrowIfCancellationRequested();
// Store original pom.xml for telemetry
this.originalPomFiles.Enqueue(processRequest);
var pomFile = processRequest.ComponentStream;
var pomDir = Path.GetDirectoryName(pomFile.Location);
var depsFileName = this.mavenCommandService.BcdeMvnDependencyFileName;
var depsFilePath = Path.Combine(pomDir, depsFileName);
// Generate dependency file using Maven CLI.
var result = await this.mavenCommandService.GenerateDependenciesFileAsync(
processRequest,
cancellationToken);
if (result.Success)
{
// CLI succeeded - verify deps file was generated
// Use existence check to avoid redundant I/O (file will be read during directory scan)
if (this.fileUtilityService.Exists(depsFilePath))
{
Interlocked.Increment(ref cliSuccessCount);
}
else
{
// CLI reported success but deps file is missing - treat as failure
Interlocked.Increment(ref cliFailureCount);
failedDirectories.Enqueue(pomDir);
this.LogWarning($"Maven CLI succeeded but deps file not found: {depsFilePath}");
}
}
else
{
// CLI failed - track directory for nested pom.xml scanning
Interlocked.Increment(ref cliFailureCount);
failedDirectories.Enqueue(pomDir);
// Capture error output for later analysis
if (!string.IsNullOrWhiteSpace(result.ErrorOutput))
{
this.mavenCliErrors.Enqueue(result.ErrorOutput);
}
}
},
new ExecutionDataflowBlockOptions
{
CancellationToken = cancellationToken,
});
await this.RemoveNestedPomXmls(processRequests, parentPomDictionary, cancellationToken).ForEachAsync(
processRequest =>
{
processPomFile.Post(processRequest);
},
cancellationToken);
processPomFile.Complete();
await processPomFile.Completion;
// For failed directories, scan and add all pom.xml files for static parsing
if (!failedDirectories.IsEmpty)
{
foreach (var failedDir in failedDirectories)
{
cancellationToken.ThrowIfCancellationRequested();
var normalizedFailedDir = NormalizeDirectoryPath(failedDir);
if (parentPomDictionary.TryGetValue(normalizedFailedDir, out var staticParsingRequests))
{
// Note: staticParsingRequests is already in parent-first order due to the sorted processing
// during dictionary building in RemoveNestedPomXmls
foreach (var request in staticParsingRequests)
{
cancellationToken.ThrowIfCancellationRequested();
results.Enqueue(request);
}
}
}
}
// Determine detection method based on results
this.DetermineDetectionMethod(cliSuccessCount, cliFailureCount);
this.LogDebugWithId($"Maven CLI processing complete: {cliSuccessCount} succeeded, {cliFailureCount} failed out of {this.originalPomFiles.Count} root pom.xml files. Retrieving generated dependency graphs.");
// Use comprehensive directory scanning after Maven CLI execution to find all generated dependency files
// This ensures we find dependency files from submodules even if Maven CLI was only run on parent pom.xml
var allGeneratedDependencyFiles = this.ComponentStreamEnumerableFactory
.GetComponentStreams(
this.CurrentScanRequest.SourceDirectory,
[this.mavenCommandService.BcdeMvnDependencyFileName],
this.CurrentScanRequest.DirectoryExclusionPredicate)
.Select(componentStream =>
{
// Read and store content to avoid stream disposal issues
using var reader = new StreamReader(componentStream.Stream);
var content = reader.ReadToEnd();
return new ProcessRequest
{
ComponentStream = new ComponentStream
{
Stream = new MemoryStream(Encoding.UTF8.GetBytes(content)),
Location = componentStream.Location,
Pattern = componentStream.Pattern,
},
SingleFileComponentRecorder = this.ComponentRecorder.CreateSingleFileComponentRecorder(
Path.Combine(Path.GetDirectoryName(componentStream.Location), MavenManifest)),
};
});
// Combine dependency files from CLI success with pom.xml files from CLI failures
return results.Concat(allGeneratedDependencyFiles).ToObservable();
}
/// <summary>
/// Determines the detection method based on CLI success/failure counts and analyzes any failures.
/// </summary>
/// <param name="cliSuccessCount">Number of successful CLI executions.</param>
/// <param name="cliFailureCount">Number of failed CLI executions.</param>
private void DetermineDetectionMethod(int cliSuccessCount, int cliFailureCount)
{
if (cliFailureCount == 0 && cliSuccessCount > 0)
{
this.usedDetectionMethod = MavenDetectionMethod.MvnCliOnly;
this.LogDebugWithId("All pom.xml files processed successfully with Maven CLI.");
}
else if (cliFailureCount > 0)
{
this.usedDetectionMethod = MavenDetectionMethod.Mixed;
this.LogWarning($"Maven CLI failed for {cliFailureCount} pom.xml files. Using mixed detection.");
this.AnalyzeMvnCliFailure();
}
}
protected override Task OnFileFoundAsync(
ProcessRequest processRequest,
IDictionary<string, string> detectorArgs,
CancellationToken cancellationToken = default)
{
var pattern = processRequest.ComponentStream.Pattern;
if (pattern == this.mavenCommandService.BcdeMvnDependencyFileName)
{
// Process MvnCli result
this.ProcessMvnCliResult(processRequest);
// Delete the deps file now that its content has been consumed (was read into MemoryStream during prepare phase)
if (this.CurrentScanRequest?.CleanupCreatedFiles == true)
{
var filePath = processRequest.ComponentStream.Location;
try
{
this.fileUtilityService.Delete(filePath);
this.Logger.LogDebug("Cleaned up Maven deps file {File}", filePath);
}
catch (Exception e)
{
this.Logger.LogDebug(e, "Failed to delete Maven deps file {File}", filePath);
}
}
}
else
{
// Process via static XML parsing
this.ProcessPomFileStatically(processRequest);
}
return Task.CompletedTask;
}
protected override Task OnDetectionFinishedAsync()
{
// Second pass: resolve any parent relationships that couldn't be resolved during first pass
// This handles cases where parent POM was processed after child POM
this.ResolveUnresolvedParentRelationships();
// Third pass: resolve all pending components with collected variables and complete hierarchy
this.ResolvePendingComponents();
// Record telemetry - cache string conversions
var detectionMethodStr = this.usedDetectionMethod.ToString();
var fallbackReasonStr = this.fallbackReason.ToString();
var mvnCliCountStr = this.mvnCliComponentCount.ToString();
var staticCountStr = this.staticParserComponentCount.ToString();
this.Telemetry["DetectionMethod"] = detectionMethodStr;
this.Telemetry["FallbackReason"] = fallbackReasonStr;
this.Telemetry["MvnCliComponentCount"] = mvnCliCountStr;
this.Telemetry["StaticParserComponentCount"] = staticCountStr;
this.Telemetry["TotalComponentCount"] = (this.mvnCliComponentCount + this.staticParserComponentCount).ToString();
this.Telemetry["MavenCliAvailable"] = this.mavenCliAvailable.ToString();
this.Telemetry["OriginalPomFileCount"] = this.originalPomFiles.Count.ToString();
this.Telemetry["CollectedVariableCount"] = this.collectedVariables.Count.ToString();
this.Telemetry["PendingComponentCount"] = this.pendingComponentCountBeforeResolution.ToString();
this.Telemetry["UnresolvedVariableCount"] = this.unresolvedVariableCount.ToString();
if (!this.failedEndpoints.IsEmpty)
{
this.Telemetry["FailedEndpoints"] = string.Join(";", this.failedEndpoints.Distinct().Take(10));
}
this.LogDebugWithId($"Detection completed. Method: {detectionMethodStr}, " +
$"FallbackReason: {fallbackReasonStr}, " +
$"MvnCli components: {mvnCliCountStr}, " +
$"Static parser components: {staticCountStr}");
return Task.CompletedTask;
}
/// <summary>
/// Analyzes Maven CLI failure by checking logged errors for authentication issues.
/// </summary>
private void AnalyzeMvnCliFailure()
{
// Check if any recorded errors indicate authentication failure
var hasAuthError = this.mavenCliErrors.Any(IsAuthenticationError);
if (hasAuthError)
{
this.fallbackReason = MavenFallbackReason.AuthenticationFailure;
// Extract failed endpoints from error messages
foreach (var endpoint in this.mavenCliErrors.SelectMany(this.ExtractFailedEndpoints))
{
this.failedEndpoints.Enqueue(endpoint);
}
this.LogAuthErrorGuidance();
}
else
{
this.fallbackReason = MavenFallbackReason.OtherMvnCliFailure;
this.LogWarning("Maven CLI failed. Check Maven logs for details.");
}
}
private void ProcessMvnCliResult(ProcessRequest processRequest)
{
this.mavenCommandService.ParseDependenciesFile(processRequest);
// Count components registered to this specific file's recorder to avoid race conditions
// when OnFileFoundAsync runs concurrently for multiple files.
var componentsInFile = processRequest.SingleFileComponentRecorder.GetDetectedComponents().Count;
Interlocked.Add(ref this.mvnCliComponentCount, componentsInFile);
}
private void ProcessPomFileStatically(ProcessRequest processRequest)
{
var file = processRequest.ComponentStream;
var singleFileComponentRecorder = processRequest.SingleFileComponentRecorder;
var filePath = file.Location;
try
{
var document = new XmlDocument();
document.Load(file.Stream);
// Single XML parsing pass: create namespace manager once
var namespaceManager = new XmlNamespaceManager(document.NameTable);
namespaceManager.AddNamespace(ProjNamespace, MavenXmlNamespace);
// Collect variables from this document into a local dictionary first
var localVariables = new Dictionary<string, string>();
this.CollectVariablesFromDocument(document, namespaceManager, filePath, localVariables);
// Batch add local variables to global collection for better performance
// Key format: "filePath::variableName" enables Maven hierarchy-aware lookup
if (localVariables.Count > 0)
{
var keyBuilder = new StringBuilder(filePath.Length + 64); // Pre-allocate capacity
var filePathWithSeparator = filePath + "::";
foreach (var (variableName, variableValue) in localVariables)
{
keyBuilder.Clear();
keyBuilder.Append(filePathWithSeparator).Append(variableName);
var key = keyBuilder.ToString();
this.collectedVariables.AddOrUpdate(key, variableValue, (_, _) => variableValue);
}
this.Logger.LogDebug("{DetectorId}: Collected {Count} variables from {File}", this.Id, localVariables.Count, Path.GetFileName(filePath));
}
// First pass: collect dependencies (may have unresolved variables)
var dependencyList = document.SelectNodes(DependencyNode, namespaceManager);
foreach (XmlNode dependency in dependencyList)
{
var groupId = dependency[GroupIdSelector]?.InnerText;
var artifactId = dependency[ArtifactIdSelector]?.InnerText;
if (groupId == null || artifactId == null)
{
continue;
}
var version = dependency[VersionSelector];
if (version != null && !version.InnerText.Contains(','))
{
var versionRef = version.InnerText.Trim('[', ']');
if (versionRef.StartsWith("${"))
{
// Only resolve immediately if local variable exists (highest priority)
// Otherwise, defer to second pass to ensure proper hierarchy-aware resolution
var resolvedVersion = this.ResolveVersionFromLocalOnly(versionRef, localVariables);
if (!resolvedVersion.StartsWith("${"))
{
// Local variable found - resolve immediately (highest priority)
var component = new MavenComponent(groupId, artifactId, resolvedVersion);
var detectedComponent = new DetectedComponent(component);
singleFileComponentRecorder.RegisterUsage(detectedComponent);
Interlocked.Increment(ref this.staticParserComponentCount);
}
else
{
// No local variable - defer to second pass for hierarchy-aware resolution
// This ensures we consider all variable definitions before resolving
this.pendingComponents.Enqueue(new PendingComponent(
groupId,
artifactId,
versionRef,
singleFileComponentRecorder,
filePath));
}
}
else
{
// Direct version - register immediately
var component = new MavenComponent(groupId, artifactId, versionRef);
var detectedComponent = new DetectedComponent(component);
singleFileComponentRecorder.RegisterUsage(detectedComponent);
Interlocked.Increment(ref this.staticParserComponentCount);
}
}
else
{
this.Logger.LogDebug(
"Version string for component {Group}/{Artifact} is invalid or unsupported and a component will not be recorded.",
groupId,
artifactId);
}
}
}
catch (Exception e)
{
this.Logger.LogError(e, "Failed to read file {Path}", filePath);
}
}
/// <summary>
/// Collects all variable definitions from a POM document into the provided local dictionary.
/// Optimized to reuse XmlNamespaceManager and minimize XPath queries.
/// </summary>
/// <param name="document">The XML document to scan for variables.</param>
/// <param name="namespaceManager">Pre-configured namespace manager to reuse.</param>
/// <param name="filePath">The file path for logging purposes.</param>
/// <param name="localVariables">Local dictionary to collect variables into.</param>
private void CollectVariablesFromDocument(XmlDocument document, XmlNamespaceManager namespaceManager, string filePath, Dictionary<string, string> localVariables)
{
try
{
// Query project coordinates once - used for both variable collection and project tracking
var projectGroupIdNode = document.SelectSingleNode("/proj:project/proj:groupId", namespaceManager);
var projectArtifactIdNode = document.SelectSingleNode("/proj:project/proj:artifactId", namespaceManager);
var projectVersionNode = document.SelectSingleNode("/proj:project/proj:version", namespaceManager);
// Track this project by Maven coordinates for parent resolution (reuses queried nodes)
this.TrackMavenProjectCoordinates(document, namespaceManager, filePath, projectGroupIdNode, projectArtifactIdNode);
// Parse Maven parent relationship to build proper hierarchy
this.ParseMavenParentRelationship(document, namespaceManager, filePath);
// Collect properties variables from ALL properties sections (handles malformed XML with multiple <properties>)
var propertiesNodes = document.SelectNodes("//proj:properties", namespaceManager);
if (propertiesNodes?.Count > 0)
{
if (propertiesNodes.Count > 1)
{
this.Logger.LogDebug("{DetectorId}: Found {Count} properties sections in {File}", this.Id, propertiesNodes.Count, Path.GetFileName(filePath));
}
foreach (XmlNode propertiesNode in propertiesNodes)
{
foreach (XmlNode propertyNode in propertiesNode.ChildNodes)
{
if (propertyNode.NodeType == XmlNodeType.Element && !string.IsNullOrWhiteSpace(propertyNode.InnerText))
{
// Later properties sections override earlier ones (last wins - Maven behavior)
localVariables[propertyNode.Name] = propertyNode.InnerText;
}
}
}
}
// Collect project-level variables from already-queried nodes
if (projectVersionNode != null && !string.IsNullOrWhiteSpace(projectVersionNode.InnerText))
{
localVariables["version"] = projectVersionNode.InnerText;
localVariables["project.version"] = projectVersionNode.InnerText;
}
if (projectGroupIdNode != null && !string.IsNullOrWhiteSpace(projectGroupIdNode.InnerText))
{
localVariables["groupId"] = projectGroupIdNode.InnerText;
localVariables["project.groupId"] = projectGroupIdNode.InnerText;
}
if (projectArtifactIdNode != null && !string.IsNullOrWhiteSpace(projectArtifactIdNode.InnerText))
{
localVariables["artifactId"] = projectArtifactIdNode.InnerText;
localVariables["project.artifactId"] = projectArtifactIdNode.InnerText;
}
}
catch (Exception e)
{
this.Logger.LogError(e, "Failed to collect variables from file {Path}", filePath);
}
}
/// <summary>
/// Parses Maven parent relationship from pom.xml to build proper inheritance hierarchy.
/// This is needed for Maven-compliant variable resolution that respects parent-child relationships.
/// </summary>
/// <param name="document">The XML document to parse.</param>
/// <param name="namespaceManager">XML namespace manager for Maven POM.</param>
/// <param name="currentFilePath">Current pom.xml file path.</param>
private void ParseMavenParentRelationship(XmlDocument document, XmlNamespaceManager namespaceManager, string currentFilePath)
{
try
{
// Query parent element once and access children directly (more efficient than union XPath)
var parentNode = document.SelectSingleNode("/proj:project/proj:parent", namespaceManager);
if (parentNode != null)
{
var parentGroupId = parentNode["groupId"]?.InnerText;
var parentArtifactId = parentNode["artifactId"]?.InnerText;
if (!string.IsNullOrWhiteSpace(parentArtifactId))
{
// Try to find parent pom.xml file by searching processed files for matching artifactId
// This works if parent was processed before child
var parentPath = this.FindParentPomByArtifactId(parentGroupId, parentArtifactId, currentFilePath);
if (!string.IsNullOrEmpty(parentPath))
{
this.mavenParentChildRelationships[currentFilePath] = parentPath;
this.Logger.LogDebug(
"{DetectorId}: Parsed parent relationship: {Child} → {Parent}",
this.Id,
Path.GetFileName(currentFilePath),
Path.GetFileName(parentPath));
}
else
{
// Parent not found yet - queue for second pass resolution after all files are processed
this.unresolvedParentRelationships.Enqueue((currentFilePath, parentGroupId, parentArtifactId));
this.Logger.LogDebug(
"{DetectorId}: Queued unresolved parent relationship for {Child} → {ParentArtifactId}",
this.Id,
Path.GetFileName(currentFilePath),
parentArtifactId);
}
}
}
}
catch (Exception e)
{
this.Logger.LogError(e, "Failed to parse parent relationship from {FilePath}", currentFilePath);
}
}
/// <summary>
/// Finds parent pom.xml file path by Maven coordinates (groupId:artifactId).
/// First searches by coordinates among processed projects, then falls back to directory traversal.
/// </summary>
/// <param name="parentGroupId">Parent groupId to match.</param>
/// <param name="parentArtifactId">Parent artifactId to match.</param>
/// <param name="currentFilePath">Current file path to start searching from.</param>
/// <returns>Parent pom.xml file path, or empty string if not found.</returns>
private string FindParentPomByArtifactId(string parentGroupId, string parentArtifactId, string currentFilePath)
{
// Use cache to avoid repeated operations for the same file
return this.parentPomCache.GetOrAdd(currentFilePath, filePath =>
{
try
{
// First, try to find by Maven coordinates (handles sibling projects)
if (!string.IsNullOrWhiteSpace(parentArtifactId))
{
var coordinateKey = string.IsNullOrWhiteSpace(parentGroupId)
? parentArtifactId
: $"{parentGroupId}:{parentArtifactId}";
if (this.processedMavenProjects.TryGetValue(coordinateKey, out var coordinateBasedPath))
{
this.Logger.LogDebug(
"{DetectorId}: Found parent {ParentCoordinate} at {Path} for {Child}",
this.Id,
coordinateKey,
Path.GetFileName(coordinateBasedPath),
Path.GetFileName(filePath));
return coordinateBasedPath;
}
}
// Fallback: Maven convention parent directory search
var currentDir = Path.GetDirectoryName(filePath);
var parentDir = Path.GetDirectoryName(currentDir);
// Track visited directories to prevent infinite loops from circular directory structures
var visitedDirectories = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
while (!string.IsNullOrEmpty(parentDir))
{
// Prevent infinite loops from circular directory references or file system anomalies
if (!visitedDirectories.Add(parentDir))
{
this.Logger.LogDebug(
"{DetectorId}: Circular directory reference detected while searching for parent POM, breaking at {Directory}",
this.Id,
parentDir);
break;
}
var parentPomPath = Path.Combine(parentDir, "pom.xml");
if (this.fileUtilityService.Exists(parentPomPath) &&
!string.Equals(parentPomPath, filePath, StringComparison.OrdinalIgnoreCase))
{
return parentPomPath;
}
var nextParentDir = Path.GetDirectoryName(parentDir);
if (string.Equals(nextParentDir, parentDir, StringComparison.OrdinalIgnoreCase))
{
break; // Reached file system root
}
parentDir = nextParentDir;
}
return string.Empty; // Not found
}
catch (Exception ex)
{
this.Logger.LogDebug(ex, "Error finding parent POM for {FilePath}", Path.GetFileName(filePath));
return string.Empty;
}
});
}
/// <summary>
/// Tracks a Maven project by its coordinates to enable coordinate-based parent resolution.
/// </summary>
/// <param name="document">The XML document to parse.</param>
/// <param name="namespaceManager">XML namespace manager for Maven POM.</param>
/// <param name="filePath">Current pom.xml file path.</param>
/// <param name="groupIdNode">Pre-queried groupId node (can be null).</param>
/// <param name="artifactIdNode">Pre-queried artifactId node (can be null).</param>
private void TrackMavenProjectCoordinates(XmlDocument document, XmlNamespaceManager namespaceManager, string filePath, XmlNode groupIdNode, XmlNode artifactIdNode)
{
try
{
// If project doesn't have its own groupId, try to get it from parent
groupIdNode ??= document.SelectSingleNode("/proj:project/proj:parent/proj:groupId", namespaceManager);
if (artifactIdNode != null && !string.IsNullOrWhiteSpace(artifactIdNode.InnerText))
{
var groupId = groupIdNode?.InnerText;
var artifactId = artifactIdNode.InnerText;
// Store with both artifactId-only and groupId:artifactId keys for flexible lookup
this.processedMavenProjects.TryAdd(artifactId, filePath);
if (!string.IsNullOrWhiteSpace(groupId))
{
this.processedMavenProjects.TryAdd($"{groupId}:{artifactId}", filePath);
}
this.Logger.LogDebug(