-
Notifications
You must be signed in to change notification settings - Fork 129
Expand file tree
/
Copy pathRubyComponentDetector.cs
More file actions
290 lines (250 loc) · 12.7 KB
/
Copy pathRubyComponentDetector.cs
File metadata and controls
290 lines (250 loc) · 12.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
#nullable disable
// Ruby detection highlights and todos:
//
// Dependencies are "fuzzy versions":
// this in and of itself could be solved by deferring dependency resolution alone until after all components are registered.
// Different sections of Ruby's lockfile can point into other sections, and the authoritative version is not replicated across
// sections-- it's only stored in, say, the Gems section.
//
// Git components are even stranger in Ruby land:
// they have an annotation for a git component that is a "name" that has no relationship to how we normally think of
// a GitComponent (remote / version). The mapping from git component name to a GitComponent can't really be handled
// in ComponentRecorder today, because "component name" for a Git component is a Ruby specific concept.
// This could be pointing to a sideloaded storage in ComponentRecorder (e.g. a <TContext> style storage that detectors
// could use to track related state as their execution goes on).
//
// The basic approach in ruby is to do two passes:
// first, make sure you have all authoritative components, then, resolve and register all dependency relationships.
//
// If we had sideloaded state for nodes in the graph, I could see us at least being able to remove the "name" mapping from ruby.
// Deferred dependencies is a lot more complicated, you would basically need a way to set up a pointer to a component based on a mapped value
// (in this case, just component name sans version) that would be resolved in an arbitrary way after the graph writing was "done".
// I don't think this is impossible (having a custom delegate for a detector to identify and map nodes to one another seems pretty easy),
// but seems complicated.
//
// There is a possibility to use manual root detection instead of automatic:
// Gemfile.lock comes with a section called "Dependencies", in the section are listed the dependencies that the user specified in the Gemfile,
// is necessary to investigate if this section is a new adition or always has been there.
namespace Microsoft.ComponentDetection.Detectors.Ruby;
using System;
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.ComponentDetection.Contracts;
using Microsoft.ComponentDetection.Contracts.Internal;
using Microsoft.ComponentDetection.Contracts.TypedComponent;
using Microsoft.Extensions.Logging;
public class RubyComponentDetector : FileComponentDetector
{
private static readonly Regex HeadingRegex = new Regex("^[A-Z ]+$", RegexOptions.Compiled);
private static readonly Regex DependencyDefinitionRegex = new Regex("^ {4}[A-Za-z-]+", RegexOptions.Compiled);
private static readonly Regex SubDependencyRegex = new Regex("^ {6}[A-Za-z-]+", RegexOptions.Compiled);
public RubyComponentDetector(
IComponentStreamEnumerableFactory componentStreamEnumerableFactory,
IObservableDirectoryWalkerFactory walkerFactory,
ILogger<RubyComponentDetector> logger)
{
this.ComponentStreamEnumerableFactory = componentStreamEnumerableFactory;
this.Scanner = walkerFactory;
this.Logger = logger;
}
private enum SectionType
{
GEM,
GIT,
PATH,
}
public override string Id { get; } = "Ruby";
public override IEnumerable<string> Categories => [Enum.GetName(typeof(DetectorClass), DetectorClass.RubyGems)];
public override IList<string> SearchPatterns { get; } = ["Gemfile.lock"];
public override IEnumerable<ComponentType> SupportedComponentTypes { get; } = [ComponentType.RubyGems];
public override int Version { get; } = 3;
public override bool NeedsAutomaticRootDependencyCalculation => true;
protected override Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary<string, string> detectorArgs, CancellationToken cancellationToken = default)
{
var singleFileComponentRecorder = processRequest.SingleFileComponentRecorder;
var file = processRequest.ComponentStream;
this.Logger.LogDebug("Found Gemfile.lock {FileLocation}", file.Location);
this.ParseGemLockFile(singleFileComponentRecorder, file);
return Task.CompletedTask;
}
private void ParseGemLockFile(ISingleFileComponentRecorder singleFileComponentRecorder, IComponentStream file)
{
var components = new Dictionary<string, DetectedComponent>();
var dependencies = new Dictionary<string, List<Dependency>>();
var text = string.Empty;
using (var reader = new StreamReader(file.Stream))
{
text = reader.ReadToEnd();
}
var lines = new List<string>(text.Split("\n"));
while (lines.Count > 0)
{
if (HeadingRegex.IsMatch(lines[0].Trim()))
{
var heading = lines[0].Trim();
lines.RemoveAt(0);
// Get the lines from the section sections end with a blank line
var sublines = new List<string>();
while (lines.Count > 0 && lines[0].Trim().Length > 0)
{
sublines.Add(lines[0]);
lines.RemoveAt(0);
}
// lines[0] is now a blank line, so lets remove it
if (lines.Count > 0)
{
lines.RemoveAt(0);
}
switch (heading)
{
case "GIT":
this.ParseSection(singleFileComponentRecorder, SectionType.GIT, sublines, components, dependencies, file);
break;
case "GEM":
this.ParseSection(singleFileComponentRecorder, SectionType.GEM, sublines, components, dependencies, file);
break;
case "PATH":
this.ParseSection(singleFileComponentRecorder, SectionType.PATH, sublines, components, dependencies, file);
break;
case "BUNDLED WITH":
var line = sublines[0].Trim();
var name = "bundler";
// Nothing in the lockfile tells us where bundler came from
var addComponent = new DetectedComponent(new RubyGemsComponent(name, line, "unknown"));
components.TryAdd<string, DetectedComponent>(string.Format("{0}:{1}", name, file.Location), addComponent);
dependencies.TryAdd(string.Format("{0}:{1}", name, file.Location), []);
break;
default:
// We ignore other sections
break;
}
}
else
{
// Throw this line away. Is this malformed? We were expecting a header
this.Logger.LogDebug("{MalformedLine}", lines[0]);
this.Logger.LogDebug("Appears to be malformed/is not expected here. Expected heading. {Line}", lines[0]);
lines.RemoveAt(0);
}
}
foreach (var detectedComponent in components.Values)
{
singleFileComponentRecorder.RegisterUsage(detectedComponent);
}
foreach (var key in dependencies.Keys)
{
foreach (var dependency in dependencies[key])
{
// there are cases that we ommit the dependency
// because its version is not valid like for example
// is a relative version instead of an absolute one
// because of that there are children elements
// that does not contains a entry in the dictionary
// those elements should be removed
if (components.ContainsKey(dependency.Id))
{
singleFileComponentRecorder.RegisterUsage(components[dependency.Id], parentComponentId: components[key].Component.Id);
}
}
}
}
private void ParseSection(ISingleFileComponentRecorder singleFileComponentRecorder, SectionType sectionType, List<string> lines, Dictionary<string, DetectedComponent> components, Dictionary<string, List<Dependency>> dependencies, IComponentStream file)
{
string name, remote, revision;
name = remote = revision = string.Empty;
var wasParentDependencyExcluded = false;
while (lines.Count > 0)
{
var line = lines[0].Trim();
lines.RemoveAt(0);
if (line.StartsWith("remote:"))
{
remote = line[8..];
// revision is only used for Git components.
revision = string.Empty;
}
else if (line.StartsWith("revision:"))
{
revision = line[10..];
}
else if (line.StartsWith("specs:"))
{
while (lines.Count > 0)
{
line = lines[0].TrimEnd();
lines.RemoveAt(0);
if (string.IsNullOrEmpty(line.Trim()))
{
break;
}
// Sub-dependency, store dependencies data of parents that were not excluded because of relative version
else if (SubDependencyRegex.IsMatch(line) && !wasParentDependencyExcluded)
{
var depName = line.Trim().Split(' ')[0];
dependencies[string.Format("{0}:{1}", name, file.Location)].Add(new Dependency(depName, file.Location));
}
else if (DependencyDefinitionRegex.IsMatch(line))
{
wasParentDependencyExcluded = false;
var splits = line.Trim().Split(" ");
name = splits[0].Trim();
var fullVersion = splits[1][1..^1];
TypedComponent newComponent;
if (this.IsVersionRelative(fullVersion))
{
this.Logger.LogWarning("Found component with invalid version, name = {RubyComponentName} and version = {RubyComponentVersion}", name, fullVersion);
singleFileComponentRecorder.RegisterPackageParseFailure($"{name} - {fullVersion}");
wasParentDependencyExcluded = true;
continue;
}
// Ruby version strings are formatted differently from pure semantic versioning.
// Specifically, everything after the dash is the platform, not the pre-release version. For
// example: "1.19.4-x86_64-linux-gnu". Ruby represents pre-release versions by placing a letter
// in the main version string. For example: "2.0.0.rc1".
// See: https://guides.rubygems.org/patterns/#prerelease-gems
// Remove the platform from the version string (if it exists).
var versionParts = fullVersion.Split("-", 2);
var version = versionParts[0];
if (sectionType == SectionType.GEM || sectionType == SectionType.PATH)
{
newComponent = new RubyGemsComponent(name, version, remote);
}
else
{
newComponent = new GitComponent(new Uri(remote), revision);
}
var addComponent = new DetectedComponent(newComponent);
var lookupKey = string.Format("{0}:{1}", name, file.Location);
if (components.ContainsKey(lookupKey))
{
components.TryAdd<string, DetectedComponent>(string.Format("{0}:{1}", lookupKey, version), addComponent);
}
else
{
components.TryAdd<string, DetectedComponent>(lookupKey, addComponent);
dependencies.Add(lookupKey, []);
}
}
}
}
}
}
private bool IsVersionRelative(string version)
{
return version.StartsWith('~') || version.StartsWith('=');
}
private class Dependency
{
public Dependency(string name, string location)
{
this.Name = name;
this.Location = location;
}
public string Name { get; }
public string Location { get; }
public string Id => $"{this.Name}:{this.Location}";
}
}