-
-
Notifications
You must be signed in to change notification settings - Fork 231
Expand file tree
/
Copy pathgithub_advisory_sync.rb
More file actions
468 lines (381 loc) · 13.5 KB
/
github_advisory_sync.rb
File metadata and controls
468 lines (381 loc) · 13.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
require "active_support"
require "active_support/core_ext/enumerable"
require "date"
require "faraday"
require "json"
require 'fileutils'
require "yaml"
require "open-uri"
module GitHub
class GitHubAdvisorySync
# Sync makes sure there are rubysec advisories for all GitHub advisories
# It writes a set of yaml files, one for each GitHub Advisory that
# is not already present in this repo
#
# The min_year argument specifies the earliest year CVE to sync.
# It is more important to sync the newer ones, so this allows the user to
# control how old of CVEs the sync should pull over
def self.sync(min_year: 2000, gem_name: nil)
gh_advisories = GraphQLAPIClient.new.all_rubygem_advisories(gem_name: gem_name)
# Filter out advisories with a CVE year that is before the min_year
gh_advisories.select! { |v| v.cve_after_year?(min_year) }
files_written = gh_advisories.filter_map(&:sync).flatten.compact!
puts "\nSync completed"
if files_written.empty?
puts "Nothing to sync today! All CVEs starting from #{min_year} are already present"
else
puts "Wrote these files:\n#{files_written.to_yaml}"
end
files_written
end
end
class GraphQLAPIClient
GITHUB_API_URL = "https://api.github.com/graphql".freeze
GitHubApiTokenMissingError = Class.new(StandardError)
# return a lazy initialized connection to github api
def github_api(adapter = :net_http)
@github_api ||= begin
puts "Initializing GitHub API connection to URL: #{GITHUB_API_URL}"
Faraday.new do |conn_builder|
conn_builder.adapter adapter
conn_builder.headers = {
"User-Agent" => "rubysec/ruby-advisory-db rubysec sync script",
"Content-Type" => "application/json",
"Authorization" => "token #{github_api_token}"
}
end
end
end
# An error class which gets raised when a GraphQL request fails
GitHubGraphQLAPIError = Class.new(StandardError)
# all interactions with the API go through this method to standardize
# error checking and how queries and requests are formed
def github_graphql_query(graphql_query_name, graphql_variables = {})
graphql_query_str = GraphQLQueries.const_get graphql_query_name
graphql_body = JSON.generate(query: graphql_query_str, variables: graphql_variables)
puts "Executing GraphQL request: #{graphql_query_name}. Request variables:\n#{graphql_variables.to_yaml}\n"
faraday_response = github_api.post do |req|
req.url GITHUB_API_URL
req.body = graphql_body
end
puts "Got response code: #{faraday_response.status}"
if faraday_response.status != 200
raise(
GitHubGraphQLAPIError,
"GitHub GraphQL request to #{faraday_response.env.url} failed: #{faraday_response.body}"
)
end
body_obj = JSON.parse faraday_response.body
if body_obj["errors"]
raise(GitHubGraphQLAPIError, body_obj["errors"].map { |e| e["message"] }.join(", "))
end
body_obj
end
def all_rubygem_advisories(gem_name: nil)
advisories = {}
retrieve_all_rubygem_vulnerabilities(gem_name: gem_name).each do |vulnerability|
advisory = GitHubAdvisory.new(vulnerability["advisory"])
next if advisory.withdrawn?
advisories[advisory.primary_id] ||= advisory
advisories[advisory.primary_id].vulnerabilities << vulnerability.except("advisory")
end
advisories.values
end
def retrieve_all_rubygem_vulnerabilities(max_pages = 1000, page_size = 100, gem_name: nil)
all_vulnerabilities = []
variables = { "first" => page_size, "gem_name" => gem_name }
max_pages.times do |page_num|
puts "Getting page #{page_num + 1} of GitHub Vulnerabilities"
page = github_graphql_query(:RUBYGEM_VULNERABILITIES_WITH_GITHUB_ADVISORIES, variables)
vulnerabilities_this_page = page["data"]["securityVulnerabilities"]["nodes"]
all_vulnerabilities += vulnerabilities_this_page
break unless page["data"]["securityVulnerabilities"]["pageInfo"]["hasNextPage"] == true
variables["after"] = page["data"]["securityVulnerabilities"]["pageInfo"]["endCursor"]
end
puts "Retrieved #{all_vulnerabilities.length} Vulnerabilities from GitHub API"
all_vulnerabilities
end
module GraphQLQueries
RUBYGEM_VULNERABILITIES_WITH_GITHUB_ADVISORIES = <<-GRAPHQL.freeze
query($first: Int, $after: String, $gem_name: String) {
securityVulnerabilities(first: $first, after: $after, ecosystem:RUBYGEMS, package: $gem_name) {
pageInfo {
endCursor
hasNextPage
hasPreviousPage
startCursor
}
nodes {
package {
name
ecosystem
}
vulnerableVersionRange
firstPatchedVersion {
identifier
}
advisory {
identifiers {
type
value
}
summary
description
severity
cvss {
score
vectorString
}
references {
url
}
publishedAt
withdrawnAt
}
}
}
}
GRAPHQL
end
private
def github_api_token
unless ENV["GH_API_TOKEN"]
raise(
GitHubApiTokenMissingError,
"Unable to make API requests. Must define 'GH_API_TOKEN' environment variable."
)
end
ENV["GH_API_TOKEN"]
end
end
class GitHubAdvisory
class Package
attr_reader :name
def initialize(advisory, name)
@advisory = advisory
@name = name
end
def updating?
File.exist? filename
end
def filename
# These packages appear to have been named differently in the past
# This 'corrects' them so updates don't affect existing vulnerabilities
package_name = case name
when "arabic-prawn"
"Arabic-Prawn"
when "redcloth"
"RedCloth"
else
name
end
File.join("gems", package_name, "#{@advisory.primary_id}.yml")
end
def framework
case name
when "actioncable", "actionmailbox", "actionmailer", "actionpack",
"actiontext", "actionview", "activejob", "activemodel",
"activerecord", "activestorage", "activesupport", "railties",
"jquery-rails"
"rails"
end
end
def to_h
{
"gem" => name,
"framework" => framework,
}.merge(@advisory.to_h)
end
def merge_data(saved_data)
data = {}
# Creating the hash like this makes the key insert order consistent so
# the output should always be the same for the same data
KEYS.each do |key|
data[key] = saved_data[key] || to_h[key]
end
data.compact!
end
KEYS = %w[
gem library framework platform cve osvdb ghsa url title date description
cvss_v2 cvss_v3 cvss_v4 unaffected_versions patched_versions related notes
].freeze
end
attr_reader :advisory, :vulnerabilities
def initialize(advisory)
@advisory = advisory
@vulnerabilities = []
end
def identifier_list
advisory["identifiers"]
end
# extract the CVE identifier from the GitHub Advisory identifier list
def cve_id
cve_id_obj = identifier_list.find { |id| id["type"] == "CVE" }
return nil unless cve_id_obj
cve_id_obj["value"]
end
def ghsa_id
id_obj = identifier_list.find { |id| id["type"] == "GHSA" }
id_obj["value"]
end
# advisories should be identified by CVE ID if there is one
# but for maintainer submitted advisories there may not be one,
# so a GitHub Security Advisory ID (ghsa_id) is used instead
def primary_id
return cve_id if cve_id
ghsa_id
end
# return a date as a string like 2019-03-21.
def published_day
return unless advisory["publishedAt"]
pub_date = Date.parse(advisory["publishedAt"])
# pub_date.strftime("%Y-%m-%d")
pub_date
end
def withdrawn?
!advisory["withdrawnAt"].nil?
end
def cvss
return if advisory["cvss"]["vectorString"].nil?
advisory["cvss"]["score"].to_f
end
def external_reference
ref_obj = advisory["references"].find do |ref|
!ref["url"].start_with?("https://nvd.nist.gov/vuln/detail/")
end
ref_obj["url"]
end
def packages
vulnerabilities.map { |v| v["package"]["name"] }.uniq.map do |name|
Package.new(self, name)
end
end
def to_h
{
"cve" => (cve_id[4..20] if cve_id),
"date" => published_day,
"ghsa" => ghsa_id[5..],
"url" => external_reference,
"title" => advisory["summary"],
"description" => advisory["description"],
"cvss_v3" => cvss,
}.compact
end
def sync
packages.map do |package|
if package.updating?
update(package)
else
create(package)
end
end
end
def update(package)
saved_data = YAML.safe_load_file(package.filename, permitted_classes: [Date])
new_data = package.merge_data(saved_data)
return if saved_data == new_data
File.open(package.filename, 'w') do |file|
file.write YAML.dump(new_data)
end
puts "Updated: #{package.filename}"
package.filename
end
def vulnerable_version_ranges_for(package)
vulnerabilities.select { |v|
v['package']['name'] == package.name
}.map { |v|
v['vulnerableVersionRange'].split(', ',2).map do |version_range|
version_range.split(' ',2)
end
}.sort_by { |((lower_op,lower_version),(upper_op,upper_version))|
lower_version
}
end
def unaffected_versions_for(package)
if (version_range = vulnerable_version_ranges_for(package).first)
lower_version_range = version_range[0]
operator, version = lower_version_range
case operator
when '>'
["<= #{version}"]
when '>=', '='
["< #{version}"]
end
end
end
def first_patched_versions_for(package)
first_patched_versions = []
vulnerabilities.each do |v|
if v['package']['name'] == package.name &&
v['firstPatchedVersion'] &&
v['firstPatchedVersion']['identifier']
first_patched_versions << v['firstPatchedVersion']['identifier']
end
end
first_patched_versions.sort
end
def patched_versions_for(package)
first_patched_versions = first_patched_versions_for(package)
patched_versions = []
if !first_patched_versions.empty?
first_patched_versions[0..-2].each do |version|
patched_versions << "~> #{version}"
end
patched_versions << ">= #{first_patched_versions.last}"
end
return patched_versions
end
def create(package)
filename_to_write = package.filename
new_data = package.merge_data(
"cvss_v3" => ("<FILL IN IF AVAILABLE>" unless cvss),
"cvss_v4" => "<FILL IN IF AVAILABLE>"
)
if (unaffected_versions = unaffected_versions_for(package))
new_data['unaffected_versions'] = unaffected_versions
end
patched_versions = patched_versions_for(package)
if !patched_versions.empty?
new_data['patched_versions'] = patched_versions
else
new_data['notes'] = "Never patched"
end
# populate the related information
new_data["related"] = {
"url" => advisory["references"].map { |reference| reference['url'] }.reject(&:empty?)
}
FileUtils.mkdir_p(File.dirname(filename_to_write))
File.open(filename_to_write, "w") do |file|
# create an automatically generated advisory yaml file
file.write new_data.to_yaml
# The data we just wrote is incomplete,
# and therefore should not be committed as is
# We can not directly translate from GitHub to rubysec advisory format
#
# The patched_versions field is not exactly available.
# - GitHub has a first_patched_version field,
# but rubysec advisory needs a ruby version spec
#
# The unaffected_versions field is similarly not directly available
# This optional field must be inferred from the vulnerableVersionRange
#
# To help write those fields, we put all the github data below.
#
# The second block of yaml in a .yaml file is ignored (after the second "---" line)
# This effectively makes this data a large comment
# Still it should be removed before the data goes into rubysec
file.write "# GitHub advisory data below - **Remove this data before committing**\n"
file.write "# Use this data to write patched_versions (and potentially unaffected_versions) above\n"
file.write advisory.merge("vulnerabilities" => vulnerabilities).to_yaml
end
puts "Wrote: #{filename_to_write}"
filename_to_write
end
def cve_after_year?(year)
# all advisories without a CVE are included too
return true unless cve_id
_, cve_year = cve_id.match(/^CVE-(\d+)-\d+$/).to_a
cve_year.to_i >= year
end
end
end