Skip to content

Commit 3925f2d

Browse files
committed
GitHubおよびGitLabクライアントのAPI呼び出しをページング処理に対応させ、ディスカッションおよびノートの取得を改善
1 parent 52c7784 commit 3925f2d

3 files changed

Lines changed: 310 additions & 78 deletions

File tree

clients/github_client.py

Lines changed: 109 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,14 @@ def get_pull_request_labels(self, owner: str, repo: str, pull_number: int) -> li
7272
return [label["name"] for label in issue.get("labels", [])]
7373

7474
def list_pull_requests_with_label(
75-
self, owner: str, repo: str, label: str, state: str = "open",
75+
self,
76+
owner: str,
77+
repo: str,
78+
label: str,
79+
state: str = "open",
80+
*,
81+
per_page: int = 100,
82+
max_pages: int = 20,
7683
) -> list[dict[str, Any]]:
7784
"""指定したラベルが付いているPull Requestの一覧を取得する.
7885
@@ -91,12 +98,10 @@ def list_pull_requests_with_label(
9198
"""
9299
# Pull Request一覧取得のAPIエンドポイント
93100
url = f"{self.api_url}/repos/{owner}/{repo}/pulls"
94-
params = {"state": state, "per_page": 100}
101+
params = {"state": state}
95102

96-
# Pull Request一覧を取得
97-
response = requests.get(url, headers=self.headers, params=params, timeout=30)
98-
response.raise_for_status()
99-
pulls = response.json()
103+
# Pull Request一覧をページングしながら取得
104+
pulls = self._fetch_paginated_list(url, params, per_page, max_pages)
100105

101106
# 指定されたラベルが付いているPull Requestをフィルタリング
102107
result = []
@@ -266,11 +271,12 @@ def get_pull_request_comments(
266271

267272
# タイムラインコメント(Issueコメント)を取得
268273
url_issue = f"{self.api_url}/repos/{owner}/{repo}/issues/{pull_number}/comments"
269-
resp_issue = requests.get(
270-
url_issue, headers=self.headers, params={"per_page": 200}, timeout=30,
274+
issue_comments_raw = self._fetch_paginated_list(
275+
url_issue,
276+
{},
277+
per_page=200,
278+
max_pages=20,
271279
)
272-
resp_issue.raise_for_status()
273-
issue_comments_raw = resp_issue.json()
274280

275281
# 不要なURLフィールドを削除
276282
issue_comments = [self.remove_url_fields(c) for c in issue_comments_raw]
@@ -309,20 +315,22 @@ def get_reviews_with_comments(
309315
"""
310316
# レビュー一覧を取得
311317
url_reviews = f"{self.api_url}/repos/{owner}/{repo}/pulls/{pull_number}/reviews"
312-
resp_reviews = requests.get(
313-
url_reviews, headers=self.headers, params={"per_page": 100}, timeout=30,
318+
reviews_raw = self._fetch_paginated_list(
319+
url_reviews,
320+
{},
321+
per_page=100,
322+
max_pages=20,
314323
)
315-
resp_reviews.raise_for_status()
316-
reviews_raw = resp_reviews.json()
317324
reviews = [self.remove_url_fields(r) for r in reviews_raw]
318325

319326
# レビューコメント一覧を取得
320327
url_comments = f"{self.api_url}/repos/{owner}/{repo}/pulls/{pull_number}/comments"
321-
resp_comments = requests.get(
322-
url_comments, headers=self.headers, params={"per_page": 200}, timeout=30,
328+
comments_raw = self._fetch_paginated_list(
329+
url_comments,
330+
{},
331+
per_page=200,
332+
max_pages=20,
323333
)
324-
resp_comments.raise_for_status()
325-
comments_raw = resp_comments.json()
326334
comments = [self.remove_url_fields(c) for c in comments_raw]
327335

328336
# review_idごとにコメントをまとめる
@@ -425,20 +433,7 @@ def search_issues_and_prs(
425433
"""
426434
# Search API のエンドポイント
427435
url = f"{self.api_url}/search/issues"
428-
params = {"q": query, "per_page": per_page, "page": page}
429-
430-
# ソート条件とソート順序を設定
431-
if sort:
432-
params["sort"] = sort
433-
if order:
434-
params["order"] = order
435-
436-
# 検索実行
437-
response = requests.get(url, headers=self.headers, params=params, timeout=30)
438-
response.raise_for_status()
439-
data = response.json()
440-
441-
return data.get("items", [])
436+
return self._fetch_search_results(url, query, sort, order, per_page, page)
442437

443438
def search_pull_requests(
444439
self,
@@ -497,3 +492,85 @@ def search_issues(
497492
# 検索実行とIssueのフィルタリング
498493
items = self.search_issues_and_prs(query, sort, order, per_page, page)
499494
return [item for item in items if "pull_request" not in item]
495+
496+
def _fetch_paginated_list(
497+
self,
498+
url: str,
499+
params: dict[str, Any],
500+
per_page: int,
501+
max_pages: int,
502+
) -> list[dict[str, Any]]:
503+
"""GitHubの標準REST APIでページングされるリストを全件取得する."""
504+
items: list[dict[str, Any]] = []
505+
page_number: int = 1
506+
507+
# Linkヘッダーを使わず、ページ数と件数で終了条件を判定
508+
while page_number <= max_pages:
509+
page_params = dict(params)
510+
page_params["per_page"] = per_page
511+
page_params["page"] = page_number
512+
513+
response = requests.get(url, headers=self.headers, params=page_params, timeout=30)
514+
response.raise_for_status()
515+
page_items = response.json()
516+
517+
if not isinstance(page_items, list) or not page_items:
518+
break
519+
520+
items.extend(page_items)
521+
522+
if len(page_items) < per_page:
523+
break
524+
525+
page_number += 1
526+
527+
return items
528+
529+
def _fetch_search_results(
530+
self,
531+
url: str,
532+
query: str,
533+
sort: str | None,
534+
order: str | None,
535+
per_page: int,
536+
page: int,
537+
*,
538+
max_pages: int = 10,
539+
) -> list[dict[str, Any]]:
540+
"""Search APIで複数ページに跨る結果を収集する."""
541+
aggregated: list[dict[str, Any]] = []
542+
current_page: int = page
543+
pages_fetched: int = 0
544+
545+
# Search API固有のtotal_countなどを利用してループを制御
546+
while pages_fetched < max_pages:
547+
params = {"q": query, "per_page": per_page, "page": current_page}
548+
if sort:
549+
params["sort"] = sort
550+
if order:
551+
params["order"] = order
552+
553+
response = requests.get(url, headers=self.headers, params=params, timeout=30)
554+
response.raise_for_status()
555+
data = response.json()
556+
557+
page_items = data.get("items", [])
558+
if not page_items:
559+
break
560+
561+
aggregated.extend(page_items)
562+
563+
total_count = data.get("total_count")
564+
if isinstance(total_count, int) and total_count <= len(aggregated):
565+
break
566+
567+
if len(page_items) < per_page:
568+
break
569+
570+
if data.get("incomplete_results") is True:
571+
break
572+
573+
current_page += 1
574+
pages_fetched += 1
575+
576+
return aggregated

clients/gitlab_client.py

Lines changed: 86 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -24,23 +24,23 @@ def list_issues(
2424
labels: list[str] | None = None,
2525
state: str = "opened",
2626
per_page: int = 100,
27+
max_pages: int = 200,
2728
) -> list[dict[str, Any]]:
2829
url = f"{self.api_url}/projects/{project_id}/issues"
29-
params = {"state": state, "per_page": per_page}
30+
params: dict[str, Any] = {"state": state}
3031
if labels:
3132
params["labels"] = ",".join(labels)
32-
resp = requests.get(url, headers=self.headers, params=params, timeout=30)
33-
resp.raise_for_status()
34-
return resp.json()
33+
return self._fetch_paginated_list(url, params, per_page, max_pages)
3534

3635
def list_issue_notes(
37-
self, project_id: int | str, issue_iid: int | str, per_page: int = 100,
36+
self,
37+
project_id: int | str,
38+
issue_iid: int | str,
39+
per_page: int = 100,
40+
max_pages: int = 200,
3841
) -> list[dict[str, Any]]:
3942
url = f"{self.api_url}/projects/{project_id}/issues/{issue_iid}/notes"
40-
params = {"per_page": per_page}
41-
resp = requests.get(url, headers=self.headers, params=params, timeout=30)
42-
resp.raise_for_status()
43-
return resp.json()
43+
return self._fetch_paginated_list(url, {}, per_page, max_pages)
4444

4545
def add_issue_note(
4646
self, project_id: int | str, issue_iid: int | str, body: str,
@@ -101,25 +101,25 @@ def list_merge_requests(
101101
assignee: str | None = None,
102102
state: str = "opened",
103103
per_page: int = 100,
104+
max_pages: int = 200,
104105
) -> list[dict[str, Any]]:
105106
url = f"{self.api_url}/projects/{project_id}/merge_requests"
106-
params = {"state": state, "per_page": per_page}
107+
params: dict[str, Any] = {"state": state}
107108
if labels:
108109
params["labels"] = ",".join(labels)
109110
if assignee:
110111
params["assignee_username"] = assignee
111-
resp = requests.get(url, headers=self.headers, params=params, timeout=30)
112-
resp.raise_for_status()
113-
return resp.json()
112+
return self._fetch_paginated_list(url, params, per_page, max_pages)
114113

115114
def list_merge_request_notes(
116-
self, project_id: int | str, merge_request_iid: int | str, per_page: int = 100,
115+
self,
116+
project_id: int | str,
117+
merge_request_iid: int | str,
118+
per_page: int = 100,
119+
max_pages: int = 200,
117120
) -> list[dict[str, Any]]:
118121
url = f"{self.api_url}/projects/{project_id}/merge_requests/{merge_request_iid}/notes"
119-
params = {"per_page": per_page}
120-
resp = requests.get(url, headers=self.headers, params=params, timeout=30)
121-
resp.raise_for_status()
122-
return resp.json()
122+
return self._fetch_paginated_list(url, {}, per_page, max_pages)
123123

124124
def add_merge_request_note(
125125
self, project_id: int | str, merge_request_iid: int | str, body: str,
@@ -168,21 +168,79 @@ def get_merge_request(
168168
return resp.json()
169169

170170
def search_issues(
171-
self, query: str, state: str = "opened", per_page: int = 200,
171+
self,
172+
query: str,
173+
state: str = "opened",
174+
per_page: int = 200,
175+
max_pages: int = 200,
172176
) -> list[dict[str, Any]]:
173177
url = f"{self.api_url}/search"
174-
params = {"scope": "issues", "search": query, "state": state, "per_page": per_page}
175-
resp = requests.get(url, headers=self.headers, params=params, timeout=30)
176-
resp.raise_for_status()
177-
return resp.json()
178+
params: dict[str, Any] = {"scope": "issues", "search": query, "state": state}
179+
return self._fetch_paginated_list(url, params, per_page, max_pages)
178180

179181
def search_merge_requests(
180-
self, query: str, state: str | None = None, per_page: int = 200,
182+
self,
183+
query: str,
184+
state: str | None = None,
185+
per_page: int = 200,
186+
max_pages: int = 200,
181187
) -> list[dict[str, Any]]:
182188
url = f"{self.api_url}/search"
183-
params = {"scope": "merge_requests", "search": query, "per_page": per_page}
189+
params: dict[str, Any] = {"scope": "merge_requests", "search": query}
184190
if state:
185191
params["state"] = state
186-
resp = requests.get(url, headers=self.headers, params=params, timeout=30)
187-
resp.raise_for_status()
188-
return resp.json()
192+
return self._fetch_paginated_list(url, params, per_page, max_pages)
193+
194+
def _fetch_paginated_list(
195+
self,
196+
url: str,
197+
params: dict[str, Any],
198+
per_page: int,
199+
max_pages: int,
200+
) -> list[dict[str, Any]]:
201+
"""GitLab APIからページング結果を全件取得するヘルパー."""
202+
items: list[dict[str, Any]] = []
203+
page: int = 1
204+
visited_pages: set[int] = set()
205+
206+
# X-Next-Pageヘッダーとレスポンス件数を使って次ページを辿る
207+
while page not in visited_pages and page <= max_pages:
208+
visited_pages.add(page)
209+
page_params = dict(params)
210+
page_params["per_page"] = per_page
211+
page_params["page"] = page
212+
213+
resp = requests.get(url, headers=self.headers, params=page_params, timeout=30)
214+
resp.raise_for_status()
215+
payload = resp.json()
216+
217+
page_items: list[dict[str, Any]]
218+
if isinstance(payload, list):
219+
page_items = payload
220+
elif isinstance(payload, dict) and isinstance(payload.get("items"), list):
221+
page_items = payload["items"]
222+
else:
223+
break
224+
225+
if not page_items:
226+
break
227+
228+
items.extend(page_items)
229+
230+
next_page_header = resp.headers.get("X-Next-Page")
231+
if next_page_header:
232+
try:
233+
next_page = int(next_page_header)
234+
except ValueError:
235+
break
236+
if next_page <= 0:
237+
break
238+
page = next_page
239+
continue
240+
241+
if len(page_items) < per_page:
242+
break
243+
244+
page += 1
245+
246+
return items

0 commit comments

Comments
 (0)