Skip to content

Commit eecd1a5

Browse files
committed
Refactor Gitlab v2 importer pipeline and live importer to reduce duplicate code #1903
Signed-off-by: Michael Ehab Mikhail <michael.ehab@hotmail.com>
1 parent 79429df commit eecd1a5

3 files changed

Lines changed: 191 additions & 234 deletions

File tree

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import json
11+
import logging
12+
import traceback
13+
14+
import pytz
15+
from dateutil import parser as dateparser
16+
from univers.version_range import RANGE_CLASS_BY_SCHEMES
17+
from univers.version_range import from_gitlab_native
18+
19+
from vulnerabilities.importer import AdvisoryData
20+
from vulnerabilities.importer import AffectedPackageV2
21+
from vulnerabilities.importer import ReferenceV2
22+
from vulnerabilities.utils import build_description
23+
from vulnerabilities.utils import get_cwe_id
24+
25+
26+
def advisory_dict_to_advisory_data(
27+
advisory: dict,
28+
*,
29+
purl_type_by_gitlab_scheme,
30+
gitlab_scheme_by_purl_type,
31+
logger,
32+
get_purl_fn,
33+
purl=None,
34+
advisory_url=None,
35+
):
36+
"""
37+
Convert a GitLab advisory mapping (already loaded from YAML or JSON) to an
38+
`AdvisoryData` instance.
39+
Returns None when no affected or fixed version range can be derived.
40+
41+
Parameters:
42+
- advisory: dict per GitLab schema (identifier, package_slug, ...)
43+
- purl_type_by_gitlab_scheme: mapping of GitLab package type to PackageURL type
44+
- gitlab_scheme_by_purl_type: inverse mapping of PackageURL type to GitLab type
45+
- logger: callable like pipeline.log(message, level=logging.LEVEL)
46+
- get_purl_fn: function to build a version-less PURL from package_slug
47+
- purl: optional PURL (may include version); used only for context, ranges use
48+
a version-less PURL derived from package_slug via get_purl_fn
49+
- advisory_url: optional URL; if not provided, a default URL will be built when possible
50+
"""
51+
52+
aliases = list(advisory.get("identifiers", []) or [])
53+
identifier = advisory.get("identifier") or ""
54+
package_slug = advisory.get("package_slug")
55+
56+
advisory_id = f"{package_slug}/{identifier}" if package_slug else identifier
57+
if advisory_id in aliases:
58+
try:
59+
aliases.remove(advisory_id)
60+
except ValueError:
61+
pass
62+
63+
summary = build_description(advisory.get("title"), advisory.get("description"))
64+
urls = advisory.get("urls") or []
65+
references = [ReferenceV2.from_url(u) for u in urls]
66+
67+
cwe_ids = advisory.get("cwe_ids") or []
68+
cwe_list = list(map(get_cwe_id, cwe_ids))
69+
70+
date_published = dateparser.parse(advisory.get("pubdate")) if advisory.get("pubdate") else None
71+
if date_published:
72+
date_published = date_published.replace(tzinfo=pytz.UTC)
73+
74+
# Prefer a version-less PURL derived from package_slug for affected/fixed ranges
75+
purl_for_package = None
76+
if package_slug:
77+
purl_for_package = get_purl_fn(
78+
package_slug=package_slug,
79+
purl_type_by_gitlab_scheme=purl_type_by_gitlab_scheme,
80+
logger=logger,
81+
)
82+
83+
if not purl_for_package:
84+
logger(
85+
f"advisory_dict_to_advisory_data: purl is not valid: {package_slug!r}",
86+
level=logging.ERROR,
87+
)
88+
return AdvisoryData(
89+
advisory_id=advisory_id,
90+
aliases=aliases,
91+
summary=summary,
92+
references_v2=references,
93+
date_published=date_published,
94+
url=advisory_url,
95+
original_advisory_text=json.dumps(advisory, indent=2, ensure_ascii=False),
96+
)
97+
98+
# Compute affected and fixed ranges
99+
affected_version_range = None
100+
fixed_versions = advisory.get("fixed_versions") or []
101+
affected_range = advisory.get("affected_range")
102+
gitlab_native_schemes = {"pypi", "gem", "npm", "go", "packagist", "conan"}
103+
vrc = RANGE_CLASS_BY_SCHEMES[purl_for_package.type]
104+
gitlab_scheme = gitlab_scheme_by_purl_type[purl_for_package.type]
105+
try:
106+
if affected_range:
107+
if gitlab_scheme in gitlab_native_schemes:
108+
affected_version_range = from_gitlab_native(
109+
gitlab_scheme=gitlab_scheme, string=affected_range
110+
)
111+
else:
112+
affected_version_range = vrc.from_native(affected_range)
113+
except Exception as e:
114+
logger(
115+
(
116+
"advisory_dict_to_advisory_data: affected_range is not parsable: "
117+
f"{affected_range!r} for: {purl_for_package!s} error: {e!r}\n {traceback.format_exc()}"
118+
),
119+
level=logging.ERROR,
120+
)
121+
122+
parsed_fixed_versions = []
123+
for fixed_version in fixed_versions:
124+
try:
125+
fixed_version = vrc.version_class(fixed_version)
126+
parsed_fixed_versions.append(fixed_version.string)
127+
except Exception as e:
128+
logger(
129+
(
130+
"advisory_dict_to_advisory_data: fixed_version is not parsable`: "
131+
f"{fixed_version!r} error: {e!r}\n {traceback.format_exc()}"
132+
),
133+
level=logging.ERROR,
134+
)
135+
136+
if affected_version_range:
137+
vrc = affected_version_range.__class__
138+
139+
fixed_version_range = vrc.from_versions(parsed_fixed_versions)
140+
if not fixed_version_range and not affected_version_range:
141+
return
142+
143+
affected_package = AffectedPackageV2(
144+
package=purl_for_package,
145+
affected_version_range=affected_version_range,
146+
fixed_version_range=fixed_version_range,
147+
)
148+
149+
# Build a default advisory URL if not provided
150+
if not advisory_url and package_slug and identifier:
151+
from urllib.parse import urljoin
152+
153+
advisory_url = urljoin(
154+
"https://gitlab.com/gitlab-org/advisories-community/-/blob/main/",
155+
package_slug + "/" + identifier + ".yml",
156+
)
157+
158+
return AdvisoryData(
159+
advisory_id=advisory_id,
160+
aliases=aliases,
161+
summary=summary,
162+
references_v2=references,
163+
date_published=date_published,
164+
affected_packages=[affected_package],
165+
weaknesses=cwe_list,
166+
url=advisory_url,
167+
original_advisory_text=json.dumps(advisory, indent=2, ensure_ascii=False),
168+
)

vulnerabilities/pipelines/v2_importers/gitlab_importer.py

Lines changed: 10 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -7,28 +7,21 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10-
import json
1110
import logging
12-
import traceback
1311
from pathlib import Path
1412
from typing import Iterable
1513
from typing import Tuple
1614

17-
import pytz
1815
import saneyaml
19-
from dateutil import parser as dateparser
2016
from fetchcode.vcs import fetch_via_vcs
2117
from packageurl import PackageURL
22-
from univers.version_range import RANGE_CLASS_BY_SCHEMES
23-
from univers.version_range import from_gitlab_native
2418

2519
from vulnerabilities.importer import AdvisoryData
26-
from vulnerabilities.importer import AffectedPackageV2
27-
from vulnerabilities.importer import ReferenceV2
2820
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
29-
from vulnerabilities.utils import build_description
21+
from vulnerabilities.pipelines.v2_importers.gitlab_advisory_utils import (
22+
advisory_dict_to_advisory_data as shared_advisory_dict_to_advisory_data,
23+
)
3024
from vulnerabilities.utils import get_advisory_url
31-
from vulnerabilities.utils import get_cwe_id
3225

3326

3427
class GitLabImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
@@ -208,97 +201,18 @@ def parse_gitlab_advisory(
208201
)
209202
return
210203

211-
# refer to schema here https://gitlab.com/gitlab-org/advisories-community/-/blob/main/ci/schema/schema.json
212-
aliases = gitlab_advisory.get("identifiers")
213-
advisory_id = gitlab_advisory.get("identifier")
214-
package_slug = gitlab_advisory.get("package_slug")
215-
advisory_id = f"{package_slug}/{advisory_id}" if package_slug else advisory_id
216-
if advisory_id in aliases:
217-
aliases.remove(advisory_id)
218-
summary = build_description(gitlab_advisory.get("title"), gitlab_advisory.get("description"))
219-
urls = gitlab_advisory.get("urls")
220-
references = [ReferenceV2.from_url(u) for u in urls]
221-
222-
cwe_ids = gitlab_advisory.get("cwe_ids") or []
223-
cwe_list = list(map(get_cwe_id, cwe_ids))
224-
225-
date_published = dateparser.parse(gitlab_advisory.get("pubdate"))
226-
date_published = date_published.replace(tzinfo=pytz.UTC)
204+
# Build a stable URL to the advisory file within the repo for traceability
227205
advisory_url = get_advisory_url(
228206
file=file,
229207
base_path=base_path,
230208
url="https://gitlab.com/gitlab-org/advisories-community/-/blob/main/",
231209
)
232-
purl: PackageURL = get_purl(
233-
package_slug=package_slug,
210+
211+
return shared_advisory_dict_to_advisory_data(
212+
advisory=gitlab_advisory,
234213
purl_type_by_gitlab_scheme=purl_type_by_gitlab_scheme,
214+
gitlab_scheme_by_purl_type=gitlab_scheme_by_purl_type,
235215
logger=logger,
236-
)
237-
if not purl:
238-
logger(
239-
f"parse_yaml_file: purl is not valid: {file!r} {package_slug!r}", level=logging.ERROR
240-
)
241-
return AdvisoryData(
242-
advisory_id=advisory_id,
243-
aliases=aliases,
244-
summary=summary,
245-
references_v2=references,
246-
date_published=date_published,
247-
url=advisory_url,
248-
original_advisory_text=json.dumps(gitlab_advisory, indent=2, ensure_ascii=False),
249-
)
250-
affected_version_range = None
251-
fixed_versions = gitlab_advisory.get("fixed_versions") or []
252-
affected_range = gitlab_advisory.get("affected_range")
253-
gitlab_native_schemes = set(["pypi", "gem", "npm", "go", "packagist", "conan"])
254-
vrc = RANGE_CLASS_BY_SCHEMES[purl.type]
255-
gitlab_scheme = gitlab_scheme_by_purl_type[purl.type]
256-
try:
257-
if affected_range:
258-
if gitlab_scheme in gitlab_native_schemes:
259-
affected_version_range = from_gitlab_native(
260-
gitlab_scheme=gitlab_scheme, string=affected_range
261-
)
262-
else:
263-
affected_version_range = vrc.from_native(affected_range)
264-
except Exception as e:
265-
logger(
266-
f"parse_yaml_file: affected_range is not parsable: {affected_range!r} for: {purl!s} error: {e!r}\n {traceback.format_exc()}",
267-
level=logging.ERROR,
268-
)
269-
270-
parsed_fixed_versions = []
271-
for fixed_version in fixed_versions:
272-
try:
273-
fixed_version = vrc.version_class(fixed_version)
274-
parsed_fixed_versions.append(fixed_version.string)
275-
except Exception as e:
276-
logger(
277-
f"parse_yaml_file: fixed_version is not parsable`: {fixed_version!r} error: {e!r}\n {traceback.format_exc()}",
278-
level=logging.ERROR,
279-
)
280-
281-
if affected_version_range:
282-
vrc = affected_version_range.__class__
283-
284-
fixed_version_range = vrc.from_versions(parsed_fixed_versions)
285-
if not fixed_version_range and not affected_version_range:
286-
return
287-
288-
affected_package = AffectedPackageV2(
289-
package=purl,
290-
affected_version_range=affected_version_range,
291-
fixed_version_range=fixed_version_range,
292-
)
293-
294-
return AdvisoryData(
295-
advisory_id=advisory_id,
296-
aliases=aliases,
297-
summary=summary,
298-
references_v2=references,
299-
date_published=date_published,
300-
affected_packages=[affected_package],
301-
weaknesses=cwe_list,
302-
url=advisory_url,
303-
original_advisory_text=json.dumps(gitlab_advisory, indent=2, ensure_ascii=False),
216+
get_purl_fn=get_purl,
217+
advisory_url=advisory_url,
304218
)

0 commit comments

Comments
 (0)