Skip to content

Commit 57cdf99

Browse files
authored
Merge pull request #2155 from aboutcode-org/advisory_data_v2
Introduce AdvisoryDataV2 class
2 parents 66c914b + ce1da6e commit 57cdf99

File tree

95 files changed

+1816
-1814
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+1816
-1814
lines changed

vulnerabilities/importer.py

Lines changed: 77 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -253,8 +253,8 @@ def from_dict(cls, data: dict):
253253
@dataclasses.dataclass(eq=True)
254254
@functools.total_ordering
255255
class PatchData:
256-
patch_url: Optional[str] = None
257-
patch_text: Optional[str] = None
256+
patch_url: Optional[str] = ""
257+
patch_text: Optional[str] = ""
258258
patch_checksum: Optional[str] = dataclasses.field(init=False, default=None)
259259

260260
def __post_init__(self):
@@ -271,9 +271,9 @@ def __lt__(self, other):
271271

272272
def _cmp_key(self):
273273
return (
274-
self.patch_url,
275-
self.patch_text,
276-
self.patch_checksum,
274+
self.patch_url or "",
275+
self.patch_text or "",
276+
self.patch_checksum or "",
277277
)
278278

279279
def to_dict(self) -> dict:
@@ -556,23 +556,63 @@ def from_dict(cls, affected_pkg: dict):
556556
class AdvisoryData:
557557
"""
558558
This data class expresses the contract between data sources and the import runner.
559+
"""
560+
561+
aliases: List[str] = dataclasses.field(default_factory=list)
562+
summary: Optional[str] = ""
563+
affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list)
564+
references: List[Reference] = dataclasses.field(default_factory=list)
565+
date_published: Optional[datetime.datetime] = None
566+
weaknesses: List[int] = dataclasses.field(default_factory=list)
567+
url: Optional[str] = None
568+
569+
def __post_init__(self):
570+
if self.summary:
571+
self.summary = clean_summary(self.summary)
559572

560-
If a vulnerability_id is present then:
561-
summary or affected_packages or references must be present
562-
otherwise
563-
either affected_package or references should be present
573+
def to_dict(self):
574+
return {
575+
"aliases": self.aliases,
576+
"summary": self.summary,
577+
"affected_packages": [pkg.to_dict() for pkg in self.affected_packages],
578+
"references": [ref.to_dict() for ref in self.references],
579+
"date_published": self.date_published.isoformat() if self.date_published else None,
580+
"weaknesses": self.weaknesses,
581+
"url": self.url if self.url else "",
582+
}
564583

565-
date_published must be aware datetime
584+
@classmethod
585+
def from_dict(cls, advisory_data):
586+
date_published = advisory_data["date_published"]
587+
transformed = {
588+
"aliases": advisory_data["aliases"],
589+
"summary": advisory_data["summary"],
590+
"affected_packages": [
591+
AffectedPackage.from_dict(pkg)
592+
for pkg in advisory_data["affected_packages"]
593+
if pkg is not None
594+
],
595+
"references": [Reference.from_dict(ref) for ref in advisory_data["references"]],
596+
"date_published": datetime.datetime.fromisoformat(date_published)
597+
if date_published
598+
else None,
599+
"weaknesses": advisory_data["weaknesses"],
600+
"url": advisory_data.get("url") or None,
601+
}
602+
return cls(**transformed)
603+
604+
605+
@dataclasses.dataclass(order=True)
606+
class AdvisoryDataV2:
607+
"""
608+
This data class expresses the contract between data sources and the import runner.
566609
"""
567610

568611
advisory_id: str = ""
569612
aliases: List[str] = dataclasses.field(default_factory=list)
570613
summary: Optional[str] = ""
571-
affected_packages: Union[List[AffectedPackage], List[AffectedPackageV2]] = dataclasses.field(
572-
default_factory=list
573-
)
574-
references: List[Reference] = dataclasses.field(default_factory=list)
575-
references_v2: List[ReferenceV2] = dataclasses.field(default_factory=list)
614+
affected_packages: List[AffectedPackageV2] = dataclasses.field(default_factory=list)
615+
references: List[ReferenceV2] = dataclasses.field(default_factory=list)
576616
patches: List[PatchData] = dataclasses.field(default_factory=list)
577617
date_published: Optional[datetime.datetime] = None
578618
weaknesses: List[int] = dataclasses.field(default_factory=list)
@@ -581,46 +621,24 @@ class AdvisoryData:
581621
original_advisory_text: Optional[str] = None
582622

583623
def __post_init__(self):
624+
if not self.advisory_id:
625+
raise ValueError("advisory_id is required for AdvisoryDataV2")
584626
if self.advisory_id and self.advisory_id in self.aliases:
585627
raise ValueError(
586628
f"advisory_id {self.advisory_id} should not be present in aliases {self.aliases}"
587629
)
588630
if self.summary:
589-
self.summary = self.clean_summary(self.summary)
590-
591-
def clean_summary(self, summary):
592-
# https://nvd.nist.gov/vuln/detail/CVE-2013-4314
593-
# https://github.com/cms-dev/cms/issues/888#issuecomment-516977572
594-
summary = summary.strip()
595-
if summary:
596-
summary = summary.replace("\x00", "\uFFFD")
597-
return summary
631+
self.summary = clean_summary(self.summary)
598632

599633
def to_dict(self):
600-
is_adv_v2 = (
601-
self.advisory_id
602-
or self.severities
603-
or self.references_v2
604-
or (self.affected_packages and isinstance(self.affected_packages[0], AffectedPackageV2))
605-
)
606-
if is_adv_v2:
607-
return {
608-
"advisory_id": self.advisory_id,
609-
"aliases": self.aliases,
610-
"summary": self.summary,
611-
"affected_packages": [pkg.to_dict() for pkg in self.affected_packages],
612-
"references_v2": [ref.to_dict() for ref in self.references_v2],
613-
"patches": [patch.to_dict() for patch in self.patches],
614-
"severities": [sev.to_dict() for sev in self.severities],
615-
"date_published": self.date_published.isoformat() if self.date_published else None,
616-
"weaknesses": self.weaknesses,
617-
"url": self.url if self.url else "",
618-
}
619634
return {
635+
"advisory_id": self.advisory_id,
620636
"aliases": self.aliases,
621637
"summary": self.summary,
622638
"affected_packages": [pkg.to_dict() for pkg in self.affected_packages],
623639
"references": [ref.to_dict() for ref in self.references],
640+
"patches": [patch.to_dict() for patch in self.patches],
641+
"severities": [sev.to_dict() for sev in self.severities],
624642
"date_published": self.date_published.isoformat() if self.date_published else None,
625643
"weaknesses": self.weaknesses,
626644
"url": self.url if self.url else "",
@@ -629,31 +647,37 @@ def to_dict(self):
629647
@classmethod
630648
def from_dict(cls, advisory_data):
631649
date_published = advisory_data["date_published"]
632-
affected_packages = advisory_data["affected_packages"]
633-
affected_package_cls = AffectedPackage
634-
if affected_packages:
635-
affected_package_cls = (
636-
AffectedPackageV2
637-
if "fixed_version_range" in affected_packages[0]
638-
else AffectedPackage
639-
)
640650
transformed = {
641651
"aliases": advisory_data["aliases"],
642652
"summary": advisory_data["summary"],
643653
"affected_packages": [
644-
affected_package_cls.from_dict(pkg) for pkg in affected_packages if pkg is not None
654+
AffectedPackageV2.from_dict(pkg)
655+
for pkg in advisory_data["affected_packages"]
656+
if pkg is not None
645657
],
646658
"patches": [PatchData.from_dict(patch) for patch in advisory_data.get("patches", [])],
647-
"references": [Reference.from_dict(ref) for ref in advisory_data["references"]],
659+
"references": [ReferenceV2.from_dict(ref) for ref in advisory_data["references"]],
648660
"date_published": datetime.datetime.fromisoformat(date_published)
649661
if date_published
650662
else None,
651663
"weaknesses": advisory_data["weaknesses"],
664+
"severities": [
665+
VulnerabilitySeverity.from_dict(sev) for sev in advisory_data.get("severities", [])
666+
],
652667
"url": advisory_data.get("url") or None,
653668
}
654669
return cls(**transformed)
655670

656671

672+
def clean_summary(summary):
673+
# https://nvd.nist.gov/vuln/detail/CVE-2013-4314
674+
# https://github.com/cms-dev/cms/issues/888#issuecomment-516977572
675+
summary = summary.strip()
676+
if summary:
677+
summary = summary.replace("\x00", "\uFFFD")
678+
return summary
679+
680+
657681
class NoLicenseError(Exception):
658682
pass
659683

vulnerabilities/importers/curl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def parse_advisory_data(raw_data) -> AdvisoryData:
9797
... ]
9898
... }
9999
>>> parse_advisory_data(raw_data)
100-
AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], patches=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], url='https://curl.se/docs/CVE-2024-2379.json', original_advisory_text=None)
100+
AdvisoryData(aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], url='https://curl.se/docs/CVE-2024-2379.json')
101101
"""
102102

103103
affected = get_item(raw_data, "affected")[0] if len(get_item(raw_data, "affected")) > 0 else []

vulnerabilities/importers/osv.py

Lines changed: 0 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -111,83 +111,6 @@ def parse_advisory_data(
111111
)
112112

113113

114-
def parse_advisory_data_v2(
115-
raw_data: dict, supported_ecosystems, advisory_url: str, advisory_text: str
116-
) -> Optional[AdvisoryData]:
117-
"""
118-
Return an AdvisoryData build from a ``raw_data`` mapping of OSV advisory and
119-
a ``supported_ecosystem`` string.
120-
"""
121-
advisory_id = raw_data.get("id") or ""
122-
if not advisory_id:
123-
logger.error(f"Missing advisory id in OSV data: {raw_data}")
124-
return None
125-
summary = raw_data.get("summary") or ""
126-
details = raw_data.get("details") or ""
127-
summary = build_description(summary=summary, description=details)
128-
aliases = raw_data.get("aliases") or []
129-
130-
date_published = get_published_date(raw_data=raw_data)
131-
severities = list(get_severities(raw_data=raw_data))
132-
references = get_references_v2(raw_data=raw_data)
133-
134-
affected_packages = []
135-
136-
for affected_pkg in raw_data.get("affected") or []:
137-
purl = get_affected_purl(affected_pkg=affected_pkg, raw_id=advisory_id)
138-
139-
if not purl or purl.type not in supported_ecosystems:
140-
logger.error(f"Unsupported package type: {affected_pkg!r} in OSV: {advisory_id!r}")
141-
continue
142-
143-
affected_version_range = get_affected_version_range(
144-
affected_pkg=affected_pkg,
145-
raw_id=advisory_id,
146-
supported_ecosystem=purl.type,
147-
)
148-
149-
fixed_versions = []
150-
fixed_version_range = None
151-
for fixed_range in affected_pkg.get("ranges") or []:
152-
fixed_version = get_fixed_versions(
153-
fixed_range=fixed_range, raw_id=advisory_id, supported_ecosystem=purl.type
154-
)
155-
fixed_versions.extend([v.string for v in fixed_version])
156-
157-
fixed_version_range = (
158-
get_fixed_version_range(fixed_versions, purl.type) if fixed_versions else None
159-
)
160-
161-
if fixed_version_range or affected_version_range:
162-
affected_packages.append(
163-
AffectedPackageV2(
164-
package=purl,
165-
affected_version_range=affected_version_range,
166-
fixed_version_range=fixed_version_range,
167-
)
168-
)
169-
170-
database_specific = raw_data.get("database_specific") or {}
171-
cwe_ids = database_specific.get("cwe_ids") or []
172-
weaknesses = list(map(get_cwe_id, cwe_ids))
173-
174-
if advisory_id in aliases:
175-
aliases.remove(advisory_id)
176-
177-
return AdvisoryData(
178-
advisory_id=advisory_id,
179-
aliases=aliases,
180-
summary=summary,
181-
references_v2=references,
182-
severities=severities,
183-
affected_packages=affected_packages,
184-
date_published=date_published,
185-
weaknesses=weaknesses,
186-
url=advisory_url,
187-
original_advisory_text=advisory_text or json.dumps(raw_data, indent=2, ensure_ascii=False),
188-
)
189-
190-
191114
def extract_fixed_versions(fixed_range) -> Iterable[str]:
192115
"""
193116
Return a list of fixed version strings given a ``fixed_range`` mapping of

vulnerabilities/models.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565

6666
import vulnerablecode
6767
from vulnerabilities import utils
68+
from vulnerabilities.importer import AdvisoryDataV2
6869
from vulnerabilities.severity_systems import EPSS
6970
from vulnerabilities.severity_systems import SCORING_SYSTEMS
7071
from vulnerabilities.utils import compute_patch_checksum
@@ -2987,6 +2988,12 @@ class AdvisoryV2(models.Model):
29872988
help_text="Weighted severity is the highest value calculated by multiplying each severity by its corresponding weight, divided by 10.",
29882989
)
29892990

2991+
# precedence = models.IntegerField(
2992+
# null=True,
2993+
# blank=True,
2994+
# help_text="Precedence indicates the priority level of addressing a vulnerability based on its overall risk",
2995+
# )
2996+
29902997
@property
29912998
def risk_score(self):
29922999
"""
@@ -3026,17 +3033,17 @@ def get_absolute_url(self):
30263033
"""
30273034
return reverse("advisory_details", args=[self.avid])
30283035

3029-
def to_advisory_data(self) -> "AdvisoryData":
3030-
from vulnerabilities.importer import AdvisoryData
3036+
def to_advisory_data(self) -> "AdvisoryDataV2":
3037+
from vulnerabilities.importer import AdvisoryDataV2
30313038

3032-
return AdvisoryData(
3039+
return AdvisoryDataV2(
30333040
advisory_id=self.advisory_id,
30343041
aliases=[item.alias for item in self.aliases.all()],
30353042
summary=self.summary,
30363043
affected_packages=[
30373044
impacted.to_affected_package_data() for impacted in self.impacted_packages.all()
30383045
],
3039-
references_v2=[ref.to_reference_v2_data() for ref in self.references.all()],
3046+
references=[ref.to_reference_v2_data() for ref in self.references.all()],
30403047
patches=[patch.to_patch_data() for patch in self.patches.all()],
30413048
date_published=self.date_published,
30423049
weaknesses=[weak.cwe_id for weak in self.weaknesses.all()],

vulnerabilities/pipelines/v2_importers/aosp_importer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import dateparser
1616
from fetchcode.vcs import fetch_via_vcs
1717

18-
from vulnerabilities.importer import AdvisoryData
18+
from vulnerabilities.importer import AdvisoryDataV2
1919
from vulnerabilities.importer import VulnerabilitySeverity
2020
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
2121
from vulnerabilities.pipes.advisory import append_patch_classifications
@@ -100,13 +100,13 @@ def collect_advisories(self):
100100
f"{quote(file_path.name)}"
101101
)
102102

103-
yield AdvisoryData(
103+
yield AdvisoryDataV2(
104104
advisory_id=vulnerability_id,
105105
summary=summary,
106106
affected_packages=affected_packages,
107107
severities=severities,
108108
patches=patches,
109-
references_v2=references,
109+
references=references,
110110
date_published=date_published,
111111
url=url,
112112
)

vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from univers.version_range import ApacheVersionRange
2222
from univers.versions import SemverVersion
2323

24-
from vulnerabilities.importer import AdvisoryData
24+
from vulnerabilities.importer import AdvisoryDataV2
2525
from vulnerabilities.importer import AffectedPackageV2
2626
from vulnerabilities.importer import ReferenceV2
2727
from vulnerabilities.importer import VulnerabilitySeverity
@@ -223,7 +223,7 @@ class ApacheHTTPDImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
223223
def steps(cls):
224224
return (cls.collect_and_store_advisories,)
225225

226-
def collect_advisories(self) -> Iterable[AdvisoryData]:
226+
def collect_advisories(self) -> Iterable[AdvisoryDataV2]:
227227
if not self.links:
228228
self.links = fetch_links(self.base_url)
229229
for link in self.links:
@@ -301,12 +301,12 @@ def to_advisory(self, data):
301301

302302
weaknesses = get_weaknesses(data)
303303

304-
return AdvisoryData(
304+
return AdvisoryDataV2(
305305
advisory_id=alias,
306306
aliases=[],
307307
summary=description or "",
308308
affected_packages=affected_packages,
309-
references_v2=[reference],
309+
references=[reference],
310310
weaknesses=weaknesses,
311311
url=reference.url,
312312
severities=severities,

0 commit comments

Comments
 (0)