Skip to content

Commit 77b7756

Browse files
committed
Create amazon linux importer pipeline
Signed-off-by: ambuj <kulshreshthaak.12@gmail.com>
1 parent 5c9aa1c commit 77b7756

File tree

7 files changed

+729
-701
lines changed

7 files changed

+729
-701
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10-
from vulnerabilities.importers import alpine_linux
11-
from vulnerabilities.importers import amazon_linux
1210
from vulnerabilities.importers import apache_httpd
1311
from vulnerabilities.importers import apache_kafka
1412
from vulnerabilities.importers import apache_tomcat
@@ -37,6 +35,7 @@
3735
from vulnerabilities.importers import xen
3836
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
3937
from vulnerabilities.pipelines import alpine_linux_importer
38+
from vulnerabilities.pipelines import amazon_linux_importer
4039
from vulnerabilities.pipelines import github_importer
4140
from vulnerabilities.pipelines import gitlab_importer
4241
from vulnerabilities.pipelines import nginx_importer
@@ -72,7 +71,7 @@
7271
curl.CurlImporter,
7372
epss.EPSSImporter,
7473
vulnrichment.VulnrichImporter,
75-
amazon_linux.AmazonLinuxImporter,
74+
amazon_linux_importer.AmazonLinuxImporterPipeline,
7675
pypa_importer.PyPaImporterPipeline,
7776
npm_importer.NpmImporterPipeline,
7877
nginx_importer.NginxImporterPipeline,

vulnerabilities/improvers/valid_versions.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
from vulnerabilities.importer import AffectedPackage
2525
from vulnerabilities.importer import Importer
2626
from vulnerabilities.importer import UnMergeablePackageError
27-
from vulnerabilities.importers.amazon_linux import AmazonLinuxImporter
2827
from vulnerabilities.importers.apache_httpd import ApacheHTTPDImporter
2928
from vulnerabilities.importers.apache_kafka import ApacheKafkaImporter
3029
from vulnerabilities.importers.apache_tomcat import ApacheTomcatImporter
@@ -42,6 +41,7 @@
4241
from vulnerabilities.improver import Inference
4342
from vulnerabilities.models import Advisory
4443
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
44+
from vulnerabilities.pipelines.amazon_linux_importer import AmazonLinuxImporterPipeline
4545
from vulnerabilities.pipelines.github_importer import GitHubAPIImporterPipeline
4646
from vulnerabilities.pipelines.gitlab_importer import GitLabImporterPipeline
4747
from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline
@@ -480,9 +480,10 @@ class GithubOSVImprover(ValidVersionImprover):
480480

481481

482482
class AmazonLinuxImprover(ValidVersionImprover):
483-
importer = AmazonLinuxImporter
483+
importer = AmazonLinuxImporterPipeline
484484
ignorable_versions = []
485485

486+
486487
class CurlImprover(ValidVersionImprover):
487488
importer = CurlImporter
488489
ignorable_versions = []

vulnerabilities/importers/amazon_linux.py renamed to vulnerabilities/pipelines/amazon_linux_importer.py

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from typing import Optional
1717

1818
import pytz
19+
import requests
1920
from bs4 import BeautifulSoup
2021
from packageurl import PackageURL
2122
from univers.versions import RpmVersion
@@ -25,25 +26,37 @@
2526
from vulnerabilities.importer import Importer
2627
from vulnerabilities.importer import Reference
2728
from vulnerabilities.importer import VulnerabilitySeverity
29+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
2830
from vulnerabilities.rpm_utils import rpm_to_purl
2931
from vulnerabilities.severity_systems import SCORING_SYSTEMS
3032
from vulnerabilities.utils import fetch_response
3133
from vulnerabilities.utils import is_cve
3234

3335
LOGGER = logging.getLogger(__name__)
34-
BASE_URL = "https://alas.aws.amazon.com/"
3536

3637

37-
class AmazonLinuxImporter(Importer):
38-
spdx_license_expression = "CC BY 4.0"
39-
license_url = " " # TODO
38+
class AmazonLinuxImporterPipeline(VulnerableCodeBaseImporterPipeline):
39+
"""Imports Amazon Linux security advisories"""
4040

41+
pipeline_id = "amazon_linux_importer"
42+
BASE_URL = "https://alas.aws.amazon.com/"
43+
spdx_license_expression = "CC BY 4.0"
44+
license_url = "Unknown"
4145
importer_name = "Amazon Linux Importer"
4246

43-
def advisory_data(self) -> Iterable[AdvisoryData]:
44-
amazon_linux_1_url = BASE_URL + "/index.html"
45-
amazon_linux_2_url = BASE_URL + "/alas2.html"
46-
amazon_linux_2023_url = BASE_URL + "/alas2023.html"
47+
@classmethod
48+
def steps(cls):
49+
return (
50+
cls.fetch,
51+
cls.collect_and_store_advisories,
52+
cls.import_new_advisories,
53+
)
54+
55+
def fetch(self):
56+
self.log(f"Fetch `{self.BASE_URL}`")
57+
amazon_linux_1_url = self.BASE_URL + "/index.html"
58+
amazon_linux_2_url = self.BASE_URL + "/alas2.html"
59+
amazon_linux_2023_url = self.BASE_URL + "/alas2023.html"
4760
amazonlinux_advisories_pages = [
4861
amazon_linux_1_url,
4962
amazon_linux_2_url,
@@ -52,18 +65,40 @@ def advisory_data(self) -> Iterable[AdvisoryData]:
5265
alas_dict = {}
5366
for amazonlinux_advisories_page in amazonlinux_advisories_pages:
5467
alas_dict.update(fetch_alas_id_and_advisory_links(amazonlinux_advisories_page))
68+
self.advisory_data = alas_dict
69+
# self.advisory_data = requests.get(self.url).text
70+
71+
def advisories_count(self):
72+
return len(self.advisory_data)
73+
74+
def collect_advisories(self) -> Iterable[AdvisoryData]:
75+
"""
76+
Yield AdvisoryData from nginx security advisories HTML
77+
web page.
78+
"""
5579

56-
for alas_id, alas_url in alas_dict.items():
80+
for alas_id, alas_url in self.advisory_data.items():
5781
# It iterates through alas_dict to get alas ids and alas url
58-
if alas_id and alas_url:
59-
alas_advisory_page_content = fetch_response(alas_url).content
60-
yield process_advisory_data(alas_id, alas_advisory_page_content, alas_url)
82+
if not (alas_id and alas_url):
83+
continue
84+
try:
85+
# Fetch the advisory page content
86+
response = fetch_response(alas_url)
87+
alas_advisory_page_content = response.content
88+
89+
except Exception as e:
90+
# Log the error and continue to the next item
91+
LOGGER.error(f"Failed to fetch advisory {alas_id} from {alas_url}: {str(e)}")
92+
continue
93+
94+
# Process and yield data if successful
95+
yield process_advisory_data(alas_id, alas_advisory_page_content, alas_url)
6196

6297

6398
def fetch_alas_id_and_advisory_links(page_url: str) -> dict[str, str]:
6499
"""
65100
Return a dictionary where 'ALAS' entries are the keys and
66-
their corresponding advisory page links are the values.
101+
their corresponding advisory page link strings are the values.
67102
"""
68103

69104
page_response_content = fetch_response(page_url).content
@@ -253,7 +288,6 @@ def get_date_published(release_date_string):
253288
# Parse the date and time
254289
if release_date_string:
255290
date_part = release_date_string[:16]
256-
time_zone = release_date_string[17:]
257291
else:
258292
return None
259293

vulnerabilities/tests/test_amazon_linux.py renamed to vulnerabilities/tests/pipelines/test_amazon_linux_importer_pipeline.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,14 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10-
import json
1110
import os
11+
from pathlib import Path
1212
from unittest import TestCase
1313

14-
from bs4 import BeautifulSoup
15-
16-
from vulnerabilities.importers.amazon_linux import process_advisory_data
14+
from vulnerabilities.pipelines.amazon_linux_importer import process_advisory_data
1715
from vulnerabilities.tests import util_tests
1816

19-
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
20-
TEST_DATA = os.path.join(BASE_DIR, "test_data/amazon_linux")
17+
TEST_DATA = Path(__file__).parent.parent / "test_data" / "amazon_linux"
2118

2219

2320
class TestAmazonLinuxImporter(TestCase):
@@ -30,6 +27,7 @@ def test_process_advisory_data1(self):
3027
"ALAS-2024-1943", html_content, "https://alas.aws.amazon.com/ALAS-2024-1943.html"
3128
).to_dict()
3229
expected_file = os.path.join(TEST_DATA, "amazon_linux_expected1.json")
30+
# print(f"The result is {result}")
3331
util_tests.check_results_against_json(result, expected_file)
3432

3533
def test_process_advisory_data2(self):

0 commit comments

Comments
 (0)