Skip to content

Commit e7c2b74

Browse files
Zohaib ShahZohaib Shah
authored andcommitted
Add custom User-Agent for HTTP requests Fixes #410
Signed-off-by: Zohaib Shah <zs3783999@gmail.com> Signed-off-by: Zohaib Shah <zohaib@Zohaibs-MacBook-Pro.local>
1 parent 74172c4 commit e7c2b74

File tree

7 files changed

+34
-13
lines changed

7 files changed

+34
-13
lines changed

vulnerabilities/importers/apache_httpd.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from vulnerabilities.severity_systems import APACHE_HTTPD
2727
from vulnerabilities.utils import create_weaknesses_list
2828
from vulnerabilities.utils import cwe_regex
29+
from vulnerabilities.utils import get_http_headers
2930
from vulnerabilities.utils import get_item
3031

3132
logger = logging.getLogger(__name__)
@@ -41,7 +42,7 @@ class ApacheHTTPDImporter(Importer):
4142
def advisory_data(self):
4243
links = fetch_links(self.base_url)
4344
for link in links:
44-
data = requests.get(link).json()
45+
data = requests.get(link, headers=get_http_headers()).json()
4546
yield self.to_advisory(data)
4647

4748
def to_advisory(self, data):
@@ -150,7 +151,7 @@ def to_version_ranges(self, versions_data, fixed_versions):
150151

151152
def fetch_links(url):
152153
links = []
153-
data = requests.get(url).content
154+
data = requests.get(url, headers=get_http_headers()).content
154155
soup = BeautifulSoup(data, features="lxml")
155156
for tag in soup.find_all("a"):
156157
link = tag.get("href")

vulnerabilities/importers/debian_oval.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import requests
1515

1616
from vulnerabilities.importer import OvalImporter
17+
from vulnerabilities.utils import get_http_headers
1718

1819

1920
class DebianOvalImporter(OvalImporter):
@@ -68,7 +69,7 @@ def _fetch(self):
6869
for release in releases:
6970
file_url = f"https://www.debian.org/security/oval/oval-definitions-{release}.xml.bz2"
7071
self.data_url = file_url
71-
resp = requests.get(file_url).content
72+
resp = requests.get(file_url, headers=get_http_headers()).content
7273
extracted = bz2.decompress(resp)
7374
yield (
7475
{"type": "deb", "namespace": "debian", "qualifiers": {"distro": release}},

vulnerabilities/importers/openssl.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from vulnerabilities.importer import Reference
2626
from vulnerabilities.importer import VulnerabilitySeverity
2727
from vulnerabilities.severity_systems import SCORING_SYSTEMS
28+
from vulnerabilities.utils import get_http_headers
2829

2930
logger = logging.getLogger(__name__)
3031

@@ -36,7 +37,7 @@ class OpensslImporter(Importer):
3637
importer_name = "OpenSSL Importer"
3738

3839
def fetch(self):
39-
response = requests.get(url=self.url)
40+
response = requests.get(url=self.url, headers=get_http_headers())
4041
if not response.status_code == 200:
4142
logger.error(f"Error while fetching {self.url}: {response.status_code}")
4243
return

vulnerabilities/importers/suse_backports.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,13 @@
1515
from vulnerabilities.importer import AdvisoryData
1616
from vulnerabilities.importer import Importer
1717
from vulnerabilities.utils import create_etag
18+
from vulnerabilities.utils import get_http_headers
1819

1920

2021
class SUSEBackportsImporter(Importer):
2122
@staticmethod
2223
def get_all_urls_of_backports(url):
23-
r = requests.get(url)
24+
r = requests.get(url, headers=get_http_headers())
2425
soup = BeautifulSoup(r.content, "lxml")
2526
for a_tag in soup.find_all("a", href=True):
2627
if a_tag["href"].endswith(".yaml") and a_tag["href"].startswith("backports"):
@@ -38,7 +39,7 @@ def updated_advisories(self):
3839
def _fetch_yaml(self, url):
3940

4041
try:
41-
resp = requests.get(url)
42+
resp = requests.get(url, headers=get_http_headers())
4243
resp.raise_for_status()
4344
return saneyaml.load(resp.content)
4445

vulnerabilities/importers/suse_oval.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from bs4 import BeautifulSoup
1616

1717
from vulnerabilities.importer import OvalImporter
18+
from vulnerabilities.utils import get_http_headers
1819

1920

2021
class SuseOvalImporter(OvalImporter):
@@ -27,7 +28,7 @@ def __init__(self, *args, **kwargs):
2728
self.translations = {"less than": "<", "equals": "=", "greater than or equal": ">="}
2829

2930
def _fetch(self):
30-
page = requests.get(self.base_url).text
31+
page = requests.get(self.base_url, headers=get_http_headers()).text
3132
soup = BeautifulSoup(page, "lxml")
3233

3334
suse_oval_files = [
@@ -37,7 +38,7 @@ def _fetch(self):
3738
]
3839

3940
for suse_file in filter(suse_oval_files):
40-
response = requests.get(suse_file)
41+
response = requests.get(suse_file, headers=get_http_headers())
4142

4243
extracted = gzip.decompress(response.content)
4344
yield (

vulnerabilities/importers/ubuntu.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import requests
1515

1616
from vulnerabilities.importer import OvalImporter
17+
from vulnerabilities.utils import get_http_headers
1718

1819
logger = logging.getLogger(__name__)
1920

@@ -77,7 +78,7 @@ def _fetch(self):
7778
file_url = f"{base_url}/com.ubuntu.{release}.cve.oval.xml.bz2" # nopep8
7879
self.data_url = file_url
7980
logger.info(f"Fetching Ubuntu Oval: {file_url}")
80-
response = requests.get(file_url)
81+
response = requests.get(file_url, headers=get_http_headers())
8182
if response.status_code != requests.codes.ok:
8283
logger.error(
8384
f"Failed to fetch Ubuntu Oval: HTTP {response.status_code} : {file_url}"

vulnerabilities/utils.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,20 @@
4444

4545
logger = logging.getLogger(__name__)
4646

47+
# User-Agent string for all HTTP requests made by VulnerableCode
48+
VULNERABLECODE_USER_AGENT = "VulnerableCode/37.0.0 (https://github.com/aboutcode-org/vulnerablecode)"
49+
50+
51+
def get_http_headers(extra_headers=None):
52+
"""
53+
Return HTTP headers with the VulnerableCode User-Agent.
54+
Optionally merge with extra_headers if provided.
55+
"""
56+
headers = {"User-Agent": VULNERABLECODE_USER_AGENT}
57+
if extra_headers:
58+
headers.update(extra_headers)
59+
return headers
60+
4761
cve_regex = re.compile(r"CVE-[0-9]{4}-[0-9]{4,19}", re.IGNORECASE)
4862
is_cve = cve_regex.match
4963
find_all_cve = cve_regex.findall
@@ -75,7 +89,7 @@ def load_toml(path):
7589

7690

7791
def fetch_yaml(url):
78-
response = requests.get(url)
92+
response = requests.get(url, headers=get_http_headers())
7993
return saneyaml.load(response.content)
8094

8195

@@ -113,7 +127,7 @@ def contains_alpha(string):
113127
def requests_with_5xx_retry(max_retries=5, backoff_factor=0.5):
114128
"""
115129
Returns a requests sessions which retries on 5xx errors with
116-
a backoff_factor
130+
a backoff_factor. The session includes the VulnerableCode User-Agent header.
117131
"""
118132
retries = urllib3.Retry(
119133
total=max_retries,
@@ -123,6 +137,7 @@ def requests_with_5xx_retry(max_retries=5, backoff_factor=0.5):
123137
)
124138
adapter = requests.adapters.HTTPAdapter(max_retries=retries)
125139
session = requests.Session()
140+
session.headers.update(get_http_headers())
126141
session.mount("https://", adapter)
127142
session.mount("http://", adapter)
128143
return session
@@ -284,7 +299,7 @@ def _get_gh_response(gh_token, graphql_query):
284299
Convenience function to easy mocking in tests
285300
"""
286301
endpoint = "https://api.github.com/graphql"
287-
headers = {"Authorization": f"bearer {gh_token}"}
302+
headers = get_http_headers({"Authorization": f"bearer {gh_token}"})
288303
try:
289304
return requests.post(endpoint, headers=headers, json=graphql_query).json()
290305
except Exception as e:
@@ -390,7 +405,7 @@ def fetch_response(url):
390405
Fetch and return `response` from the `url`
391406
"""
392407
try:
393-
response = requests.get(url)
408+
response = requests.get(url, headers=get_http_headers())
394409
if response.status_code == HTTPStatus.OK:
395410
return response
396411
raise Exception(

0 commit comments

Comments
 (0)