Skip to content

Commit 5c3f96c

Browse files
committed
feat: add User-Agent header to all GitHub API calls
Adds a CollectOSS User-Agent header to all outgoing GitHub API requests so GitHub can identify traffic from our tooling. This helps with bot detection, allows GitHub staff to reach out if needed, and will benefit future support for other forge instances (Forgejo etc). The User-Agent format is: CollectOSS/1.0.0 (github:chaoss/collectoss; CHAOSS/Linux Foundation) A get_user_agent() helper is added to metadata.py so the version number is always in sync with the project version. The header is applied in: - github_data_access.py (REST API) - github_graphql_data_access.py (GraphQL API) - github_api_key_handler.py (key validation calls) - github_paginator.py (deprecated hit_api path) Fixes #408 Signed-off-by: Diptesh Roy <droy88333@gmail.com>
1 parent 58d75c7 commit 5c3f96c

5 files changed

Lines changed: 21 additions & 6 deletions

File tree

collectoss/tasks/github/util/github_api_key_handler.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77

88
from collectoss.tasks.util.redis_list import RedisList
99
from collectoss.application.db.lib import get_value, get_worker_oauth_keys
10-
from sqlalchemy import func
10+
from sqlalchemy import func
11+
from metadata import get_user_agent
1112

1213
RATE_LIMIT_URL = "https://api.github.com/rate_limit"
1314

@@ -156,7 +157,7 @@ def is_bad_api_key(self, client: httpx.Client, oauth_key: str) -> bool:
156157
True if key is bad. False if the key is good
157158
"""
158159

159-
headers = {'Authorization': f'token {oauth_key}'}
160+
headers = {'Authorization': f'token {oauth_key}', 'User-Agent': get_user_agent()}
160161

161162
data = client.request(method="GET", url=RATE_LIMIT_URL, headers=headers, timeout=180).json()
162163

@@ -171,7 +172,7 @@ def is_bad_api_key(self, client: httpx.Client, oauth_key: str) -> bool:
171172
@staticmethod
172173
def get_key_rate_limit(client, github_key):
173174

174-
headers = {'Authorization': f'token {github_key}'}
175+
headers = {'Authorization': f'token {github_key}', 'User-Agent': get_user_agent()}
175176

176177
data = client.request(method="GET", url=RATE_LIMIT_URL, headers=headers, timeout=180).json()
177178

collectoss/tasks/github/util/github_data_access.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from urllib.parse import urlparse, parse_qs, urlencode
66
from keyman.KeyClient import KeyClient
77
from collectoss.util.keys import mask_key
8+
from metadata import get_user_agent
89

910
GITHUB_RATELIMIT_REMAINING_CAP = 50
1011

@@ -152,7 +153,7 @@ def make_request(self, url, method="GET", timeout=100):
152153
if not self.key:
153154
self.key = self.key_client.request()
154155

155-
headers = {"Authorization": f"token {self.key}"}
156+
headers = {"Authorization": f"token {self.key}", "User-Agent": get_user_agent()}
156157

157158
response = client.request(method=method, url=url, headers=headers, timeout=timeout, follow_redirects=True)
158159

collectoss/tasks/github/util/github_graphql_data_access.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import time
33
import httpx
44
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception, RetryError
5+
from metadata import get_user_agent
56

67
URL = "https://api.github.com/graphql"
78

@@ -79,7 +80,8 @@ def make_request(self, query, variables, timeout=40):
7980
if variables:
8081
json_dict['variables'] = variables
8182

82-
response = client.post(url=URL,auth=self.key_manager,json=json_dict, timeout=timeout)
83+
response = client.post(url=URL, auth=self.key_manager, json=json_dict, timeout=timeout,
84+
headers={"User-Agent": get_user_agent()})
8385

8486
response.raise_for_status()
8587

collectoss/tasks/github/util/github_paginator.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from typing import Optional
99
from enum import Enum
1010
from typing_extensions import deprecated
11+
from metadata import get_user_agent
1112

1213
@deprecated("Deprecated. Use GithubDataAccess class instead")
1314
def hit_api(key_manager, url: str, logger: logging.Logger, timeout: float = 10, method: str = 'GET', follow_redirects=True) -> Optional[httpx.Response]:
@@ -22,7 +23,9 @@ def hit_api(key_manager, url: str, logger: logging.Logger, timeout: float = 10,
2223

2324
try:
2425
response = client.request(
25-
method=method, url=url, auth=key_manager, timeout=timeout, follow_redirects=follow_redirects)
26+
method=method, url=url, auth=key_manager, timeout=timeout,
27+
follow_redirects=follow_redirects,
28+
headers={"User-Agent": get_user_agent()})
2629

2730
except TimeoutError:
2831
logger.info(f"Request timed out. Sleeping {round(timeout)} seconds and trying again...\n")

metadata.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,11 @@
1111

1212
__license__ = "MIT"
1313
__copyright__ = "CHAOSS and the collective contributors to the CollectOSS and Augur projects. A full listing of all contributors may be found at https://github.com/chaoss/collectoss/blob/main/CREDITS.md"
14+
15+
16+
def get_user_agent() -> str:
17+
"""Return the User-Agent string CollectOSS should send with API requests.
18+
19+
Format: CollectOSS/<version> (github:chaoss/collectoss; CHAOSS/Linux Foundation)
20+
"""
21+
return f"CollectOSS/{__version__} (github:chaoss/collectoss; CHAOSS/Linux Foundation)"

0 commit comments

Comments
 (0)