Skip to content

Commit 8b21b2f

Browse files
committed
Address review feedback for EUVD importer and tests
Signed-off-by: Sampurna Pyne <sampurnapyne1710@gmail.com>
1 parent a3a5c36 commit 8b21b2f

File tree

5 files changed

+268
-126
lines changed

5 files changed

+268
-126
lines changed

vulnerabilities/pipelines/v2_importers/euvd_importer.py

Lines changed: 99 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import json
1111
import logging
12+
import math
1213
import time
1314
from datetime import datetime
1415
from http import HTTPStatus
@@ -47,86 +48,111 @@ def steps(cls):
4748
return (cls.collect_and_store_advisories,)
4849

4950
def fetch_data(self):
50-
# Return cached data if already fetched
5151
if self._cached_data is not None:
5252
logger.info(f"Using cached data: {len(self._cached_data)} items")
5353
return self._cached_data
5454

55-
headers = {"User-Agent": "VulnerableCode"}
5655
all_items = []
57-
page = 0
5856
size = 100
59-
max_retries = 100
57+
max_retries = 2
6058

6159
logger.info(f"Fetching data from EUVD API: {self.url}")
6260

63-
while True:
64-
65-
retry_count = 0
66-
success = False
67-
68-
while retry_count < max_retries and not success:
69-
try:
70-
params = {"size": size, "page": page}
71-
response = requests.get(self.url, headers=headers, params=params, timeout=30)
72-
73-
if response.status_code != HTTPStatus.OK:
74-
logger.error(f"API returned status {response.status_code} for page {page}")
75-
retry_count += 1
76-
if retry_count < max_retries:
77-
sleep_time = min(10 * (2 ** min(retry_count - 1, 5)), 60)
78-
logger.info(
79-
f"Retrying page {page} in {sleep_time}s (attempt {retry_count}/{max_retries})"
80-
)
81-
time.sleep(sleep_time)
82-
continue
83-
else:
84-
logger.error(f"Max retries reached for page {page}")
85-
return all_items
86-
87-
data = response.json()
88-
items = data.get("items", [])
89-
90-
if not items:
91-
logger.info(f"No items in response for page {page}; stopping fetch.")
92-
logger.info(
93-
f"Fetch completed successfully. Total items collected: {len(all_items)}"
94-
)
95-
96-
# Cache the fetched data for reuse
97-
self._cached_data = all_items
98-
logger.info(f"Cached {len(all_items)} items for reuse")
99-
100-
return all_items
101-
102-
all_items.extend(items)
103-
logger.info(
104-
f"Fetched page {page}: {len(items)} items (total: {len(all_items)})"
105-
)
106-
success = True
107-
page += 1
108-
109-
except requests.exceptions.Timeout as e:
110-
retry_count += 1
111-
if retry_count < max_retries:
112-
logger.warning(
113-
f"Timeout on page {page}: {e}. Retrying in 10s (attempt {retry_count}/{max_retries})"
114-
)
115-
time.sleep(10)
116-
else:
117-
logger.error(f"Max retries reached for page {page} after timeout")
118-
return all_items
119-
120-
except Exception as e:
121-
retry_count += 1
122-
if retry_count < max_retries:
123-
logger.error(
124-
f"Error fetching page {page}: {e}. Retrying in 10s (attempt {retry_count}/{max_retries})"
125-
)
126-
time.sleep(10)
127-
else:
128-
logger.error(f"Max retries reached for page {page}")
129-
return all_items
61+
total_count = self._fetch_total_count(size, max_retries)
62+
if total_count is None:
63+
logger.error("Failed to fetch total count from API")
64+
return all_items
65+
66+
total_pages = math.ceil(total_count / size)
67+
logger.info(f"Total advisories: {total_count}, Total pages: {total_pages}")
68+
69+
first_page_data = self._fetch_page(0, size, max_retries)
70+
if first_page_data:
71+
all_items.extend(first_page_data)
72+
logger.info(f"Fetched page 0: {len(first_page_data)} items (total: {len(all_items)})")
73+
74+
for page in range(1, total_pages):
75+
page_data = self._fetch_page(page, size, max_retries)
76+
if page_data is None:
77+
logger.warning(f"Skipping page {page} after failed retries")
78+
continue
79+
80+
if not page_data:
81+
logger.info(f"No items in response for page {page}; stopping fetch.")
82+
break
83+
84+
all_items.extend(page_data)
85+
logger.info(f"Fetched page {page}: {len(page_data)} items (total: {len(all_items)})")
86+
87+
logger.info(f"Fetch completed successfully. Total items collected: {len(all_items)}")
88+
89+
self._cached_data = all_items
90+
logger.info(f"Cached {len(all_items)} items for reuse")
91+
92+
return all_items
93+
94+
def _make_request_with_retry(self, params, max_retries, context):
95+
headers = {"User-Agent": "VulnerableCode"}
96+
97+
for attempt in range(max_retries):
98+
try:
99+
response = requests.get(self.url, headers=headers, params=params, timeout=30)
100+
101+
if response.status_code != HTTPStatus.OK:
102+
logger.error(f"API returned status {response.status_code} for {context}")
103+
if attempt < max_retries - 1:
104+
logger.info(f"Retrying {context} (attempt {attempt + 1}/{max_retries})")
105+
time.sleep(3)
106+
continue
107+
return None
108+
109+
return response.json()
110+
111+
except requests.exceptions.Timeout:
112+
logger.warning(f"Timeout on {context} (attempt {attempt + 1}/{max_retries})")
113+
if attempt < max_retries - 1:
114+
time.sleep(3)
115+
continue
116+
return None
117+
118+
except requests.exceptions.RequestException as e:
119+
logger.error(
120+
f"Network error on {context}: {e} (attempt {attempt + 1}/{max_retries})"
121+
)
122+
if attempt < max_retries - 1:
123+
time.sleep(3)
124+
continue
125+
return None
126+
127+
except (ValueError, KeyError) as e:
128+
logger.error(f"Error parsing response for {context}: {e}")
129+
return None
130+
131+
return None
132+
133+
def _fetch_total_count(self, size, max_retries):
134+
"""Fetch the total count of advisories from the API."""
135+
params = {"size": size, "page": 0}
136+
data = self._make_request_with_retry(params, max_retries, "total count")
137+
138+
if data is None:
139+
return None
140+
141+
total = data.get("total")
142+
if total is None:
143+
logger.error("No 'total' field in API response")
144+
145+
return total
146+
147+
def _fetch_page(self, page, size, max_retries):
148+
"""Fetch a single page of advisories from the API."""
149+
params = {"size": size, "page": page}
150+
data = self._make_request_with_retry(params, max_retries, f"page {page}")
151+
152+
if data is None:
153+
return None
154+
155+
return data.get("items", [])
130156

131157
def advisories_count(self) -> int:
132158
return len(self.fetch_data())
@@ -137,7 +163,7 @@ def collect_advisories(self) -> Iterable[AdvisoryData]:
137163
advisory = self.parse_advisory(raw_data)
138164
if advisory:
139165
yield advisory
140-
except Exception as e:
166+
except (ValueError, KeyError, TypeError) as e:
141167
logger.error(f"Failed to parse advisory: {e}")
142168
logger.debug(f"Raw data: {raw_data}")
143169
continue
@@ -162,7 +188,7 @@ def parse_advisory(self, raw_data: dict) -> AdvisoryData:
162188
date_published = date_published.replace(
163189
tzinfo=datetime.now().astimezone().tzinfo
164190
)
165-
except Exception as e:
191+
except (ValueError, TypeError) as e:
166192
logger.warning(f"Failed to parse date '{date_str}': {e}")
167193

168194
references = []

vulnerabilities/tests/pipelines/v2_importers/test_euvd_importer_v2.py

Lines changed: 8 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
from unittest.mock import Mock
1414
from unittest.mock import patch
1515

16-
from vulnerabilities.importer import AdvisoryData
1716
from vulnerabilities.pipelines.v2_importers.euvd_importer import EUVDImporterPipeline
17+
from vulnerabilities.tests import util_tests
1818

1919
TEST_DATA = Path(__file__).parent.parent.parent / "test_data" / "euvd"
2020

@@ -30,60 +30,17 @@ def test_collect_advisories(self, mock_get):
3030
sample2 = json.loads(sample2_path.read_text(encoding="utf-8"))
3131

3232
mock_responses = [
33+
Mock(status_code=200, json=lambda: sample1),
3334
Mock(status_code=200, json=lambda: sample1),
3435
Mock(status_code=200, json=lambda: sample2),
35-
Mock(status_code=200, json=lambda: {"items": []}),
3636
]
3737
mock_get.side_effect = mock_responses
3838

3939
pipeline = EUVDImporterPipeline()
40-
advisories = list(pipeline.collect_advisories())
41-
42-
assert len(advisories) == 5
43-
44-
first = advisories[0]
45-
assert isinstance(first, AdvisoryData)
46-
assert first.advisory_id == "EUVD-2025-197757"
47-
assert "EUVD-2025-197757" in first.aliases
48-
assert "CVE-2025-13284" in first.aliases
49-
assert first.summary == "ThinPLUS vulnerability that allows remote code execution"
50-
assert first.date_published is not None
51-
assert len(first.severities) == 1
52-
assert first.severities[0].system.identifier == "cvssv3.1"
53-
assert first.severities[0].value == "9.8"
54-
assert (
55-
first.severities[0].scoring_elements == "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H"
56-
)
57-
58-
urls = [ref.url for ref in first.references_v2]
59-
assert "https://nvd.nist.gov/vuln/detail/CVE-2025-13284" in urls
60-
assert "https://euvd.enisa.europa.eu/vulnerability/EUVD-2025-197757" in urls
61-
62-
second = advisories[1]
63-
assert second.advisory_id == "EUVD-2024-123456"
64-
assert "CVE-2024-12345" in second.aliases
65-
assert "CVE-2024-67890" in second.aliases
66-
assert len([a for a in second.aliases if a.startswith("CVE-")]) == 2
67-
68-
urls = [ref.url for ref in second.references_v2]
69-
assert "https://example.com/advisory1" in urls
70-
assert "https://example.com/advisory2" in urls
71-
72-
third = advisories[2]
73-
assert third.advisory_id == "EUVD-2023-999999"
74-
assert third.severities[0].system.identifier == "cvssv3"
75-
assert third.severities[0].value == "5.3"
76-
77-
fourth = advisories[3]
78-
assert fourth.advisory_id == "EUVD-2022-555555"
79-
assert fourth.summary == ""
80-
assert fourth.severities[0].system.identifier == "cvssv2"
81-
assert fourth.severities[0].value == "4.3"
82-
83-
fifth = advisories[4]
84-
assert fifth.advisory_id == "EUVD-2021-111111"
85-
assert len([a for a in fifth.aliases if a.startswith("CVE-")]) == 0
86-
assert fifth.summary == "Advisory without CVE alias but with EUVD ID"
40+
advisories = [data.to_dict() for data in list(pipeline.collect_advisories())]
41+
42+
expected_file = TEST_DATA / "euvd-expected.json"
43+
util_tests.check_results_against_json(advisories, expected_file)
8744

8845
def test_get_scoring_system(self):
8946
"""Test CVSS version to scoring system mapping"""
@@ -111,10 +68,10 @@ def test_get_scoring_system(self):
11168
@patch("vulnerabilities.pipelines.v2_importers.euvd_importer.requests.get")
11269
def test_advisories_count(self, mock_get):
11370
"""Test counting advisories"""
114-
sample_data = {"items": [{"id": "1"}, {"id": "2"}, {"id": "3"}]}
71+
sample_data = {"items": [{"id": "1"}, {"id": "2"}, {"id": "3"}], "total": 3}
11572
mock_responses = [
11673
Mock(status_code=200, json=lambda: sample_data),
117-
Mock(status_code=200, json=lambda: {"items": []}),
74+
Mock(status_code=200, json=lambda: sample_data),
11875
]
11976
mock_get.side_effect = mock_responses
12077

0 commit comments

Comments
 (0)