Skip to content

Commit 5d0e823

Browse files
committed
Add ZyXEL v2 security advisories importer with tests
Signed-off-by: Tedsig42 <teddams047@gmail.com>
1 parent 2ff2906 commit 5d0e823

File tree

5 files changed

+292
-0
lines changed

5 files changed

+292
-0
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@
8484
from vulnerabilities.pipelines.v2_importers import xen_importer as xen_importer_v2
8585
from vulnerabilities.utils import create_registry
8686

87+
from vulnerabilities.pipelines.v2_importers import zyxel_importer as zyxel_importer_v2
8788
IMPORTERS_REGISTRY = create_registry(
8889
[
8990
archlinux_importer_v2.ArchLinuxImporterPipeline,
@@ -191,5 +192,6 @@
191192
collect_fix_commits_v2.CollectGitFixCommitsPipeline,
192193
collect_fix_commits_v2.CollectJenkinsFixCommitsPipeline,
193194
collect_fix_commits_v2.CollectGitlabFixCommitsPipeline,
195+
zyxel_importer_v2.ZyxelImporterPipeline,
194196
]
195197
)
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import hashlib
11+
import logging
12+
import re
13+
from datetime import timezone
14+
from urllib.parse import urljoin
15+
from urllib.parse import urlparse
16+
17+
import requests
18+
from bs4 import BeautifulSoup
19+
from dateutil import parser as date_parser
20+
21+
from vulnerabilities.importer import AdvisoryDataV2
22+
from vulnerabilities.importer import ReferenceV2
23+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
24+
from vulnerabilities.utils import dedupe
25+
from vulnerabilities.utils import find_all_cve
26+
27+
logger = logging.getLogger(__name__)
28+
29+
30+
class ZyxelImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
31+
"""Importer for ZyXEL security advisories pages."""
32+
33+
pipeline_id = "zyxel_importer_v2"
34+
base_url = "https://www.zyxel.com/global/en/support/security-advisories"
35+
spdx_license_expression = "NOASSERTION"
36+
license_url = base_url
37+
38+
precedence = 200
39+
40+
@classmethod
41+
def steps(cls):
42+
return (
43+
cls.fetch,
44+
cls.collect_and_store_advisories,
45+
)
46+
47+
def fetch(self):
48+
self.log(f"Fetch `{self.base_url}`")
49+
try:
50+
response = requests.get(self.base_url, timeout=30)
51+
response.raise_for_status()
52+
self.listing_html = response.text
53+
except requests.exceptions.Timeout:
54+
self.log(f"Timeout while fetching {self.base_url}")
55+
raise
56+
except requests.exceptions.HTTPError as e:
57+
self.log(f"HTTP error while fetching {self.base_url}: {e!r}")
58+
raise
59+
except requests.exceptions.RequestException as e:
60+
self.log(f"Network error while fetching {self.base_url}: {e!r}")
61+
raise
62+
63+
def advisories_count(self):
64+
return len(parse_listing_for_advisory_urls(self.listing_html, self.base_url))
65+
66+
def collect_advisories(self):
67+
for advisory_url in parse_listing_for_advisory_urls(self.listing_html, self.base_url):
68+
try:
69+
response = requests.get(advisory_url, timeout=30)
70+
response.raise_for_status()
71+
raw_html = response.text
72+
advisory = parse_zyxel_advisory_page(raw_html=raw_html, advisory_url=advisory_url)
73+
if advisory:
74+
yield advisory
75+
except requests.exceptions.Timeout:
76+
self.log(f"Timeout while fetching ZyXEL advisory at {advisory_url}")
77+
except requests.exceptions.HTTPError as e:
78+
self.log(f"HTTP error while fetching ZyXEL advisory at {advisory_url}: {e!r}")
79+
except requests.exceptions.RequestException as e:
80+
self.log(f"Network error while fetching ZyXEL advisory at {advisory_url}: {e!r}")
81+
except Exception as e:
82+
self.log(f"Unexpected error parsing ZyXEL advisory at {advisory_url}: {e!r}")
83+
84+
85+
def parse_listing_for_advisory_urls(raw_html, base_url):
86+
"""Return sorted advisory detail URLs from the ZyXEL listing page HTML."""
87+
soup = BeautifulSoup(raw_html, features="lxml")
88+
found_urls = set()
89+
90+
for anchor in soup.find_all("a", href=True):
91+
href = anchor.get("href", "").strip()
92+
if not href:
93+
continue
94+
95+
absolute_url = urljoin(base_url, href)
96+
parsed = urlparse(absolute_url)
97+
slug = parsed.path.rstrip("/").split("/")[-1].lower()
98+
99+
if "support/security-advisories" not in absolute_url.lower():
100+
continue
101+
102+
if slug == "security-advisories":
103+
continue
104+
105+
found_urls.add(absolute_url)
106+
107+
return sorted(found_urls)
108+
109+
110+
def parse_zyxel_advisory_page(raw_html, advisory_url):
111+
"""Parse a ZyXEL advisory detail page and return AdvisoryDataV2."""
112+
soup = BeautifulSoup(raw_html, features="lxml")
113+
page_text = soup.get_text(" ", strip=True)
114+
115+
aliases = [alias.upper() for alias in find_all_cve(page_text)]
116+
aliases = dedupe(aliases)
117+
118+
summary = extract_summary(soup=soup)
119+
date_published = extract_published_date(soup=soup, page_text=page_text)
120+
advisory_id = get_advisory_id(
121+
advisory_url=advisory_url,
122+
aliases=aliases,
123+
summary=summary,
124+
date_published=date_published,
125+
)
126+
127+
references = get_references(soup=soup, advisory_url=advisory_url, aliases=aliases)
128+
129+
return AdvisoryDataV2(
130+
advisory_id=advisory_id,
131+
aliases=aliases,
132+
summary=summary,
133+
references=references,
134+
date_published=date_published,
135+
url=advisory_url,
136+
original_advisory_text=raw_html,
137+
)
138+
139+
140+
def extract_summary(soup):
141+
h1 = soup.find("h1")
142+
if h1 and h1.get_text(strip=True):
143+
return h1.get_text(" ", strip=True)
144+
145+
title = soup.find("title")
146+
if title and title.get_text(strip=True):
147+
return title.get_text(" ", strip=True)
148+
149+
return "ZyXEL security advisory"
150+
151+
152+
def extract_published_date(soup, page_text):
153+
for key, value in (
154+
("property", "article:published_time"),
155+
("name", "article:published_time"),
156+
("name", "publish_date"),
157+
("name", "date"),
158+
):
159+
meta = soup.find("meta", attrs={key: value})
160+
if not meta:
161+
continue
162+
163+
content = (meta.get("content") or "").strip()
164+
if not content:
165+
continue
166+
167+
parsed = date_parser.parse(content)
168+
if parsed:
169+
if not parsed.tzinfo:
170+
parsed = parsed.replace(tzinfo=timezone.utc)
171+
return parsed
172+
173+
match = re.search(r"(?:published|release date)\s*:?\s*([A-Za-z0-9, :\-+/]+)", page_text, re.I)
174+
if not match:
175+
return None
176+
177+
parsed = date_parser.parse(match.group(1).strip())
178+
if parsed and not parsed.tzinfo:
179+
parsed = parsed.replace(tzinfo=timezone.utc)
180+
return parsed
181+
182+
183+
def get_advisory_id(advisory_url, aliases, summary, date_published):
184+
slug = urlparse(advisory_url).path.rstrip("/").split("/")[-1]
185+
if slug and slug.lower() != "security-advisories":
186+
return f"zyxel-{slug}"
187+
188+
published = date_published.isoformat() if date_published else ""
189+
digest = hashlib.sha1(
190+
f"{advisory_url}|{summary}|{published}|{'|'.join(aliases)}".encode("utf-8")
191+
).hexdigest()[:16]
192+
return f"zyxel-{digest}"
193+
194+
195+
def get_references(soup, advisory_url, aliases):
196+
urls = [advisory_url]
197+
198+
for alias in aliases:
199+
urls.append(f"https://nvd.nist.gov/vuln/detail/{alias}")
200+
201+
for anchor in soup.find_all("a", href=True):
202+
href = anchor.get("href", "").strip()
203+
if not href:
204+
continue
205+
206+
absolute_url = urljoin(advisory_url, href)
207+
if absolute_url.startswith("http"):
208+
urls.append(absolute_url)
209+
210+
deduped_urls = dedupe(urls)
211+
return [ReferenceV2(url=url) for url in deduped_urls]
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
from pathlib import Path
11+
12+
from commoncode import testcase
13+
14+
from vulnerabilities.pipelines.v2_importers import zyxel_importer
15+
16+
17+
class TestZyxelImporterPipeline(testcase.FileBasedTesting):
18+
test_data_dir = Path(__file__).parent.parent.parent / "test_data" / "zyxel_v2"
19+
20+
def test_parse_listing_for_advisory_urls(self):
21+
listing_file = self.get_test_loc("security_advisories_listing.html")
22+
raw_html = Path(listing_file).read_text()
23+
24+
urls = zyxel_importer.parse_listing_for_advisory_urls(
25+
raw_html=raw_html,
26+
base_url="https://www.zyxel.com/global/en/support/security-advisories",
27+
)
28+
29+
assert urls == [
30+
"https://www.zyxel.com/global/en/support/security-advisories/zyxel-security-advisory-for-cve-2024-7261",
31+
"https://www.zyxel.com/global/en/support/security-advisories/zyxel-security-advisory-for-cve-2024-7263",
32+
]
33+
34+
def test_parse_zyxel_advisory_page_extracts_cves_and_id(self):
35+
advisory_file = self.get_test_loc("zyxel_security_advisory_for_foo.html")
36+
raw_html = Path(advisory_file).read_text()
37+
38+
result = zyxel_importer.parse_zyxel_advisory_page(
39+
raw_html=raw_html,
40+
advisory_url="https://www.zyxel.com/global/en/support/security-advisories/zyxel-security-advisory-for-foo",
41+
)
42+
43+
assert result.advisory_id == "zyxel-zyxel-security-advisory-for-foo"
44+
assert result.summary == "ZyXEL Security Advisory for Foo"
45+
assert result.aliases == ["CVE-2025-12345", "CVE-2025-67890"]
46+
assert result.date_published.isoformat() == "2025-03-10T00:00:00+00:00"
47+
48+
reference_urls = [ref.url for ref in result.references]
49+
assert "https://nvd.nist.gov/vuln/detail/CVE-2025-12345" in reference_urls
50+
assert "https://nvd.nist.gov/vuln/detail/CVE-2025-67890" in reference_urls
51+
52+
def test_get_advisory_id_hash_fallback_when_slug_missing(self):
53+
advisory_id = zyxel_importer.get_advisory_id(
54+
advisory_url="https://www.zyxel.com/global/en/support/security-advisories/",
55+
aliases=["CVE-2025-12345"],
56+
summary="Example advisory",
57+
date_published=None,
58+
)
59+
60+
assert advisory_id.startswith("zyxel-")
61+
assert advisory_id != "zyxel-security-advisories"
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<html>
2+
<body>
3+
<a href="https://www.zyxel.com/global/en/support/security-advisories">Listing</a>
4+
<a href="https://www.zyxel.com/global/en/support/security-advisories/zyxel-security-advisory-for-cve-2024-7261">Advisory 1</a>
5+
<a href="https://www.zyxel.com/global/en/support/security-advisories/zyxel-security-advisory-for-cve-2024-7263">Advisory 2</a>
6+
</body>
7+
</html>
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<html>
2+
<head>
3+
<title>ZyXEL Security Advisory for Foo</title>
4+
<meta property="article:published_time" content="2025-03-10" />
5+
</head>
6+
<body>
7+
<h1>ZyXEL Security Advisory for Foo</h1>
8+
<p>This advisory addresses CVE-2025-12345 and CVE-2025-67890 in affected devices.</p>
9+
<a href="https://www.cve.org/CVERecord?id=CVE-2025-12345">CVE Link</a>
10+
</body>
11+
</html>

0 commit comments

Comments
 (0)