Skip to content

Commit 796dc07

Browse files
zkoppertCopilot
andcommitted
fix: use commits-first approach to avoid rate limiting and missing contributors
When date filtering is active, fetch commits in the date range directly and extract unique authors, instead of iterating all-time contributors and making a separate API call per contributor to check for commits. The previous approach made O(N) API calls where N is the number of all-time contributors, which exhausted rate limits on large repos/orgs. When rate limiting occurred mid-iteration, the broad exception handler silently dropped all contributors for the affected repository. The new approach makes O(M/30) API calls where M is the number of commits in the date range, which is orders of magnitude fewer for monthly reports. Additionally, contribution_count now reflects the actual count for the specified period rather than the misleading all-time count. Fixes #392 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 66f0aac commit 796dc07

2 files changed

Lines changed: 101 additions & 71 deletions

File tree

contributors.py

Lines changed: 42 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -155,41 +155,53 @@ def get_contributors(repo: object, start_date: str, end_date: str, ghe: str):
155155
Returns:
156156
contributors (list): A list of ContributorStats objects
157157
"""
158-
all_repo_contributors = repo.contributors()
159158
contributors = []
159+
endpoint = ghe if ghe else "https://github.com"
160160
try:
161-
for user in all_repo_contributors:
162-
# Ignore contributors with [bot] in their name
163-
if "[bot]" in user.login:
164-
continue
165-
166-
# Check if user has commits in the date range
167-
if start_date and end_date:
168-
user_commits = repo.commits(
169-
author=user.login, since=start_date, until=end_date
161+
if start_date and end_date:
162+
# Fetch commits in the date range and extract unique authors.
163+
# This is much more efficient than iterating all-time contributors
164+
# and checking each one for commits, which causes rate limiting
165+
# on large repositories.
166+
contributor_data = {}
167+
for commit in repo.commits(since=start_date, until=end_date):
168+
if commit.author is None:
169+
continue
170+
login = commit.author.login
171+
if "[bot]" in login:
172+
continue
173+
if login not in contributor_data:
174+
contributor_data[login] = {
175+
"avatar_url": commit.author.avatar_url,
176+
"contribution_count": 0,
177+
}
178+
contributor_data[login]["contribution_count"] += 1
179+
180+
for username, data in contributor_data.items():
181+
commit_url = f"{endpoint}/{repo.full_name}/commits?author={username}&since={start_date}&until={end_date}"
182+
contributor = contributor_stats.ContributorStats(
183+
username,
184+
False,
185+
data["avatar_url"],
186+
data["contribution_count"],
187+
commit_url,
188+
"",
170189
)
171-
172-
# If the user has no commits in the date range, skip them
173-
try:
174-
next(user_commits)
175-
except StopIteration:
190+
contributors.append(contributor)
191+
else:
192+
for user in repo.contributors():
193+
if "[bot]" in user.login:
176194
continue
177-
178-
# Store the contributor information in a ContributorStats object
179-
endpoint = ghe if ghe else "https://github.com"
180-
if start_date and end_date:
181-
commit_url = f"{endpoint}/{repo.full_name}/commits?author={user.login}&since={start_date}&until={end_date}"
182-
else:
183195
commit_url = f"{endpoint}/{repo.full_name}/commits?author={user.login}"
184-
contributor = contributor_stats.ContributorStats(
185-
user.login,
186-
False,
187-
user.avatar_url,
188-
user.contributions_count,
189-
commit_url,
190-
"",
191-
)
192-
contributors.append(contributor)
196+
contributor = contributor_stats.ContributorStats(
197+
user.login,
198+
False,
199+
user.avatar_url,
200+
user.contributions_count,
201+
commit_url,
202+
"",
203+
)
204+
contributors.append(contributor)
193205
except Exception as e:
194206
print(f"Error getting contributors for repository: {repo.full_name}")
195207
print(e)

test_contributors.py

Lines changed: 59 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import runpy
44
import unittest
5-
from unittest.mock import MagicMock, call, patch
5+
from unittest.mock import MagicMock, patch
66

77
import contributors as contributors_module
88
from contributor_stats import ContributorStats
@@ -19,24 +19,24 @@ def test_get_contributors(self, mock_contributor_stats):
1919
Test the get_contributors function.
2020
"""
2121
mock_repo = MagicMock()
22-
mock_user = MagicMock()
23-
mock_user.login = "user"
24-
mock_user.avatar_url = "https://avatars.githubusercontent.com/u/12345678?v=4"
25-
mock_user.contributions_count = 100
26-
mock_repo.contributors.return_value = [mock_user]
22+
mock_commit = MagicMock()
23+
mock_commit.author.login = "user"
24+
mock_commit.author.avatar_url = (
25+
"https://avatars.githubusercontent.com/u/12345678?v=4"
26+
)
2727
mock_repo.full_name = "owner/repo"
28-
mock_repo.commits.return_value = iter([object()])
28+
mock_repo.commits.return_value = iter([mock_commit])
2929

3030
contributors_module.get_contributors(mock_repo, "2022-01-01", "2022-12-31", "")
3131

3232
mock_repo.commits.assert_called_once_with(
33-
author="user", since="2022-01-01", until="2022-12-31"
33+
since="2022-01-01", until="2022-12-31"
3434
)
3535
mock_contributor_stats.assert_called_once_with(
3636
"user",
3737
False,
3838
"https://avatars.githubusercontent.com/u/12345678?v=4",
39-
100,
39+
1,
4040
"https://github.com/owner/repo/commits?author=user&since=2022-01-01&until=2022-12-31",
4141
"",
4242
)
@@ -129,36 +129,26 @@ def test_get_contributors_skip_users_with_no_commits(self, mock_contributor_stat
129129
Test the get_contributors function skips users with no commits in the date range.
130130
"""
131131
mock_repo = MagicMock()
132-
mock_user = MagicMock()
133-
mock_user.login = "user"
134-
mock_user.avatar_url = "https://avatars.githubusercontent.com/u/12345678?v=4"
135-
mock_user.contributions_count = 100
136-
mock_user2 = MagicMock()
137-
mock_user2.login = "user2"
138-
mock_user2.avatar_url = "https://avatars.githubusercontent.com/u/12345679?v=4"
139-
mock_user2.contributions_count = 102
132+
mock_commit = MagicMock()
133+
mock_commit.author.login = "user"
134+
mock_commit.author.avatar_url = (
135+
"https://avatars.githubusercontent.com/u/12345678?v=4"
136+
)
140137

141-
mock_repo.contributors.return_value = [mock_user, mock_user2]
142138
mock_repo.full_name = "owner/repo"
143-
mock_repo.commits.side_effect = [
144-
iter([object()]), # user has commits in range
145-
iter([]), # user2 has no commits in range and should be skipped
146-
]
139+
mock_repo.commits.return_value = iter([mock_commit])
147140
ghe = ""
148141

149142
contributors_module.get_contributors(mock_repo, "2022-01-01", "2022-12-31", ghe)
150143

151-
mock_repo.commits.assert_has_calls(
152-
[
153-
call(author="user", since="2022-01-01", until="2022-12-31"),
154-
call(author="user2", since="2022-01-01", until="2022-12-31"),
155-
]
144+
mock_repo.commits.assert_called_once_with(
145+
since="2022-01-01", until="2022-12-31"
156146
)
157147
mock_contributor_stats.assert_called_once_with(
158148
"user",
159149
False,
160150
"https://avatars.githubusercontent.com/u/12345678?v=4",
161-
100,
151+
1,
162152
"https://github.com/owner/repo/commits?author=user&since=2022-01-01&until=2022-12-31",
163153
"",
164154
)
@@ -169,19 +159,19 @@ def test_get_contributors_skip_bot(self, mock_contributor_stats):
169159
Test if the get_contributors function skips the bot user.
170160
"""
171161
mock_repo = MagicMock()
172-
mock_user = MagicMock()
173-
mock_user.login = "[bot]"
174-
mock_user.avatar_url = "https://avatars.githubusercontent.com/u/12345678?v=4"
175-
mock_user.contributions_count = 100
162+
mock_commit = MagicMock()
163+
mock_commit.author.login = "[bot]"
164+
mock_commit.author.avatar_url = (
165+
"https://avatars.githubusercontent.com/u/12345678?v=4"
166+
)
176167

177-
mock_repo.contributors.return_value = [mock_user]
178168
mock_repo.full_name = "owner/repo"
169+
mock_repo.commits.return_value = iter([mock_commit])
179170
ghe = ""
180171

181172
contributors_module.get_contributors(mock_repo, "2022-01-01", "2022-12-31", ghe)
182173

183174
# Ensure that the bot user is skipped and ContributorStats is never instantiated
184-
mock_repo.commits.assert_not_called()
185175
mock_contributor_stats.assert_not_called()
186176

187177
@patch("contributors.contributor_stats.ContributorStats")
@@ -212,13 +202,8 @@ def test_get_contributors_no_commit_end_date(self, mock_contributor_stats):
212202
)
213203

214204
def test_get_contributors_skips_when_no_commits_in_range(self):
215-
"""Test get_contributors skips users with no commits in the date range."""
205+
"""Test get_contributors returns empty list when no commits in the date range."""
216206
mock_repo = MagicMock()
217-
mock_user = MagicMock()
218-
mock_user.login = "user"
219-
mock_user.avatar_url = "https://avatars.githubusercontent.com/u/12345678?v=4"
220-
mock_user.contributions_count = 100
221-
mock_repo.contributors.return_value = [mock_user]
222207
mock_repo.full_name = "owner/repo"
223208
mock_repo.commits.return_value = iter([])
224209

@@ -228,6 +213,39 @@ def test_get_contributors_skips_when_no_commits_in_range(self):
228213

229214
self.assertEqual(result, [])
230215

216+
def test_get_contributors_skips_none_author(self):
217+
"""Test get_contributors skips commits with no linked GitHub author."""
218+
mock_repo = MagicMock()
219+
mock_repo.full_name = "owner/repo"
220+
mock_commit = MagicMock()
221+
mock_commit.author = None
222+
mock_repo.commits.return_value = iter([mock_commit])
223+
224+
result = contributors_module.get_contributors(
225+
mock_repo, "2022-01-01", "2022-12-31", ""
226+
)
227+
228+
self.assertEqual(result, [])
229+
230+
def test_get_contributors_aggregates_multiple_commits(self):
231+
"""Test get_contributors counts multiple commits per author correctly."""
232+
mock_repo = MagicMock()
233+
mock_repo.full_name = "owner/repo"
234+
mock_commit1 = MagicMock()
235+
mock_commit1.author.login = "user"
236+
mock_commit1.author.avatar_url = "https://avatars.githubusercontent.com/u/1"
237+
mock_commit2 = MagicMock()
238+
mock_commit2.author.login = "user"
239+
mock_commit2.author.avatar_url = "https://avatars.githubusercontent.com/u/1"
240+
mock_repo.commits.return_value = iter([mock_commit1, mock_commit2])
241+
242+
result = contributors_module.get_contributors(
243+
mock_repo, "2022-01-01", "2022-12-31", ""
244+
)
245+
246+
self.assertEqual(len(result), 1)
247+
self.assertEqual(result[0].contribution_count, 2)
248+
231249
def test_get_contributors_handles_exception(self):
232250
"""Test get_contributors returns None when an exception is raised."""
233251

@@ -239,7 +257,7 @@ def __iter__(self):
239257

240258
mock_repo = MagicMock()
241259
mock_repo.full_name = "owner/repo"
242-
mock_repo.contributors.return_value = BoomIterable()
260+
mock_repo.commits.return_value = BoomIterable()
243261

244262
with patch("builtins.print") as mock_print:
245263
result = contributors_module.get_contributors(

0 commit comments

Comments
 (0)