Skip to content

Commit 5111920

Browse files
committed
add support for multiple pages of api results
1 parent 8ff6bf0 commit 5111920

1 file changed

Lines changed: 59 additions & 15 deletions

File tree

entrypoint.py

Lines changed: 59 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,16 @@
1010

1111
# Standard lib
1212
from dataclasses import dataclass, field
13-
from typing import Iterator, Union
13+
from typing import Iterator, Union, Any
14+
from urllib import parse as urlparse
1415
from operator import attrgetter
1516
from datetime import datetime
1617
import urllib.request
1718
import json as _json
1819
import subprocess
1920
import sys
2021
import os
22+
import re
2123

2224

2325
# Fetch vars from Command line
@@ -29,43 +31,85 @@
2931
# Fetch vars from default environment variables
3032
GITHUB_API_URL = os.environ.get("GITHUB_API_URL", "https://api.github.com")
3133
REPOSITORY = os.environ["GITHUB_REPOSITORY"]
34+
PER_PAGE = 100
3235

3336

34-
@dataclass
3537
class Response:
3638
"""Basic urllib response object."""
37-
raw_data: bytes
38-
status: int
39-
reason: str
39+
links_regex = re.compile(r'<([^>]+)>.*?rel="([\w\s]+)".*')
40+
41+
def __init__(self, raw_resp):
42+
self.raw_data: bytes = raw_resp.read()
43+
self.status: int = raw_resp.status
44+
self.reason: str = raw_resp.reason
45+
self.headers: dict[str: str] = raw_resp.headers
4046

41-
@property
4247
def json(self):
4348
"""Returns the response as a json object."""
4449
try:
4550
return _json.loads(self.raw_data)
4651
except _json.JSONDecodeError:
4752
return None
4853

54+
@property
55+
def links(self) -> dict[str, dict[str, str]]:
56+
"""Parse the link header and return as structured data."""
57+
if "link" not in self.headers:
58+
return {}
59+
60+
links = {}
61+
# Use regex to parse the rel links
62+
for match in self.links_regex.finditer(self.headers["link"]):
63+
link = match.group(1)
64+
rel = match.group(2)
65+
66+
# Extract url params for easier access
67+
query_params = dict(urlparse.parse_qsl(urlparse.urlsplit(link).query))
68+
69+
# Construct standardized link structure
70+
for true_rel in rel.split(" "):
71+
links[true_rel] = {"url": link, "rel": true_rel, **query_params}
4972

50-
def request_github_api(endpoint: str, method="GET") -> Response:
73+
return links
74+
75+
76+
def request_github_api(endpoint: str, params: dict = None, method="GET") -> Response:
5177
"""Make web request to GitHub API."""
78+
query = urlparse.urlencode(params or {})
5279
req = urllib.request.Request(
53-
url=f"{GITHUB_API_URL}/repos/{REPOSITORY}/{endpoint}",
80+
url=f"{GITHUB_API_URL}/repos/{REPOSITORY}/{endpoint}?{query}",
5481
method=method,
5582
headers={
5683
"X-GitHub-Api-Version": "2022-11-28",
5784
"Accept": "application/vnd.github+json",
5885
"Authorization": f"Bearer {GITHUB_TOKEN}",
59-
}
86+
},
6087
)
6188
with urllib.request.urlopen(req) as resp:
62-
return Response(resp.read(), resp.status, resp.reason)
89+
return Response(resp)
90+
91+
92+
def get_paged_resp(url: str, params: dict[str, Any] = None) -> Iterator[dict]:
93+
"""Return an iterator of paged results, looping until all resources are collected."""
94+
params = params or {}
95+
params.update(page="1")
96+
params.setdefault("per_page", min(PER_PAGE, 100))
97+
98+
while True:
99+
resp = request_github_api(url, params=params)
100+
yield from resp.json()
101+
102+
# Continue with next page if one is found
103+
if "next" in resp.links:
104+
page = resp.links["next"]["page"]
105+
params[page] = page
106+
else:
107+
break
63108

64109

65110
def get_all_branches() -> list[str]:
66111
"""Return a list of all branches in current repo."""
67-
req = request_github_api("branches")
68-
return [data["name"] for data in req.json]
112+
return [data["name"] for data in get_paged_resp("branches")]
69113

70114

71115
@dataclass
@@ -87,8 +131,9 @@ def __post_init__(self):
87131

88132
def is_okteto_deployment(self) -> bool:
89133
"""Return True if deployment matches the okteto url."""
134+
# We only need to check the first page of results. Anymore and things will really start slowing down
90135
statuses = request_github_api(f"deployments/{self.deploy_id}/statuses")
91-
for status in statuses.json:
136+
for status in statuses.json():
92137
url = status["environment_url"]
93138
if OKTETO_DOMAIN in url:
94139
self.url = url
@@ -103,8 +148,7 @@ def delete(self) -> bool:
103148
@classmethod
104149
def get_okteto_deployments(cls) -> Iterator["GitHubDeployment"]:
105150
"""Return a list of all deployments matching deploy regex."""
106-
deployments = request_github_api("deployments")
107-
for deployment in deployments.json:
151+
for deployment in get_paged_resp("deployments"):
108152
obj = cls(
109153
deployment["id"],
110154
deployment["environment"],

0 commit comments

Comments
 (0)