Skip to content

Commit 6843cd0

Browse files
authored
fix: Resolve GitHub API service initialization and rate limiting issues (#70)
- Initialize missing attributes in GitHubAPIService (_lock, _rate_limit_info, callbacks) - Fix datetime import to use from datetime import datetime - Configure HTTP client and Github client in __init__ method - Remove redundant HTTP client re-initialization in update_rate_limit_info - Fix _get_tag method to be synchronous (called from asyncio.to_thread) - Change get_repository_info to use HTTP client instead of PyGithub sync client - Fix duplicate callback resetting in RateLimiter.set_rate_limit_callback - Increase default delays in RateLimiter to be more conservative - Use HTTP client for repository info requests to avoid PyGithub threading issues These changes resolve the 'GitHubAPIService object has no attribute github_client' error and fix rate limit handling issues that were causing 403 errors even with authentication.
2 parents b7e24ee + b7954f7 commit 6843cd0

3 files changed

Lines changed: 101 additions & 28 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,3 +211,6 @@ __marimo__/
211211

212212
# Road Map Plan files
213213
*PLAN.md
214+
215+
# Test Downloas
216+
.downloads/*

forklet/infrastructure/rate_limiter.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,10 @@ class RateLimiter:
5151
"""
5252

5353
def __init__(
54-
self, default_delay: float = 1.0, max_delay: float = 60.0, adaptive: bool = True
54+
self,
55+
default_delay: float = 2.0,
56+
max_delay: float = 120.0,
57+
adaptive: bool = True,
5558
):
5659
self.default_delay = default_delay
5760
self.max_delay = max_delay
@@ -67,8 +70,6 @@ def set_rate_limit_callback(
6770
) -> None:
6871
"""Set a callback to be invoked when rate limit information is updated."""
6972
self._rate_limit_callback = callback
70-
self._rate_limit_callback: Optional[Callable[[RateLimitInfo], None]] = None
71-
self._rate_limit_callback: Optional[Callable[[RateLimitInfo], None]] = None
7273

7374
async def acquire(self) -> None:
7475
"""Acquire rate limit permission."""

forklet/services/github_api.py

Lines changed: 94 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44

55
from typing import List, Optional, Dict, Any, AsyncIterator, Callable
6-
import datetime
6+
from datetime import datetime
77
import asyncio
88
import httpx
99
from github import Github, GithubException
@@ -19,8 +19,9 @@
1919
RateLimiter,
2020
RetryManager,
2121
CacheManager,
22-
RateLimitInfo
22+
RateLimitInfo,
2323
)
24+
2425
# from ..infrastructure.cache_manager import CacheManager
2526
from ..models import RepositoryInfo, GitReference, RepositoryType, GitHubFile
2627
from ..models.constants import USER_AGENT
@@ -56,6 +57,36 @@ def __init__(
5657
# Set up rate limit callback to adjust concurrency
5758
self.rate_limiter.set_rate_limit_callback(self._on_rate_limit_update)
5859

60+
# Initialize attributes
61+
self._lock = asyncio.Lock()
62+
self._rate_limit_info = RateLimitInfo()
63+
self._rate_limit_callback: Optional[Callable[[RateLimitInfo], None]] = None
64+
self._external_rate_limit_callback: Optional[
65+
Callable[[RateLimitInfo], None]
66+
] = None
67+
self._consecutive_limits = 0
68+
69+
# Configure HTTP client
70+
headers = {"Accept": "application/vnd.github.v3+json", "User-Agent": USER_AGENT}
71+
72+
if self.auth_token:
73+
headers["Authorization"] = f"token {self.auth_token}"
74+
75+
self.http_client = httpx.AsyncClient(
76+
headers=headers, timeout=httpx.Timeout(self.timeout)
77+
)
78+
79+
# Sync client for PyGithub (used only for metadata)
80+
self.github_client = (
81+
Github(
82+
self.auth_token,
83+
retry=self.retry_manager.max_retries,
84+
user_agent=USER_AGENT,
85+
)
86+
if self.auth_token
87+
else Github(retry=self.retry_manager.max_retries, user_agent=USER_AGENT)
88+
)
89+
5990
def set_external_rate_limit_callback(
6091
self, callback: Callable[[RateLimitInfo], None]
6192
) -> None:
@@ -89,7 +120,9 @@ def _on_rate_limit_update(self, rate_limit_info: RateLimitInfo) -> None:
89120
# Sync client for PyGithub (used only for metadata)
90121
self.github_client = (
91122
Github(
92-
self.auth_token, retry=self.retry_manager.max_retries, user_agent=USER_AGENT
123+
self.auth_token,
124+
retry=self.retry_manager.max_retries,
125+
user_agent=USER_AGENT,
93126
)
94127
if self.auth_token
95128
else Github(retry=self.retry_manager.max_retries, user_agent=USER_AGENT)
@@ -127,6 +160,32 @@ async def update_rate_limit_info(self, headers: Dict[str, str]) -> None:
127160
if self._external_rate_limit_callback:
128161
self._external_rate_limit_callback(self._rate_limit_info)
129162

163+
except (ValueError, KeyError) as e:
164+
logger.warning(f"Failed to parse rate limit headers: {e}")
165+
self._rate_limit_info.remaining = int(
166+
headers.get("x-ratelimit-remaining", 5000)
167+
)
168+
self._rate_limit_info.used = int(headers.get("x-ratelimit-used", 0))
169+
170+
reset_timestamp = headers.get("x-ratelimit-reset")
171+
if reset_timestamp:
172+
self._rate_limit_info.reset_time = datetime.fromtimestamp(
173+
int(reset_timestamp)
174+
)
175+
176+
# Track consecutive rate limit hits
177+
if self._rate_limit_info.is_exhausted:
178+
self._consecutive_limits += 1
179+
else:
180+
self._consecutive_limits = 0
181+
182+
# Invoke internal callback if set
183+
if self._rate_limit_callback:
184+
self._rate_limit_callback(self._rate_limit_info)
185+
# Invoke external callback if set
186+
if self._external_rate_limit_callback:
187+
self._external_rate_limit_callback(self._rate_limit_info)
188+
130189
except (ValueError, KeyError) as e:
131190
logger.warning(f"Failed to parse rate limit headers: {e}")
132191

@@ -143,7 +202,9 @@ async def update_rate_limit_info(self, headers: Dict[str, str]) -> None:
143202
# Sync client for PyGithub (used only for metadata)
144203
self.github_client = (
145204
Github(
146-
self.auth_token, retry=self.retry_manager.max_retries, user_agent=USER_AGENT
205+
self.auth_token,
206+
retry=self.retry_manager.max_retries,
207+
user_agent=USER_AGENT,
147208
)
148209
if self.auth_token
149210
else Github(retry=self.retry_manager.max_retries, user_agent=USER_AGENT)
@@ -181,33 +242,43 @@ async def get_repository_info(self, owner: str, repo: str) -> RepositoryInfo:
181242
"""
182243

183244
try:
184-
# Use sync client for this operation as it's metadata-focused
185245
await self.rate_limiter.acquire()
186-
github_repo = await asyncio.to_thread(
187-
lambda: self.github_client.get_repo(f"{owner}/{repo}")
246+
url = f"{self.BASE_URL}/repos/{owner}/{repo}"
247+
response = await self.retry_manager.execute(
248+
lambda: self.http_client.get(url)
188249
)
189250

251+
# Update rate limit info
252+
await self.rate_limiter.update_rate_limit_info(response.headers)
253+
254+
response.raise_for_status()
255+
repo_data = response.json()
256+
190257
return RepositoryInfo(
191258
owner=owner,
192259
name=repo,
193-
full_name=github_repo.full_name,
194-
url=github_repo.html_url,
195-
default_branch=github_repo.default_branch,
260+
full_name=repo_data["full_name"],
261+
url=repo_data["html_url"],
262+
default_branch=repo_data["default_branch"],
196263
repo_type=RepositoryType.PRIVATE
197-
if github_repo.private
264+
if repo_data["private"]
198265
else RepositoryType.PUBLIC,
199-
size=github_repo.size,
200-
is_private=github_repo.private,
201-
is_fork=github_repo.fork,
202-
created_at=github_repo.created_at,
203-
updated_at=github_repo.updated_at,
204-
language=github_repo.language,
205-
description=github_repo.description,
206-
topics=github_repo.get_topics(),
266+
size=repo_data["size"],
267+
is_private=repo_data["private"],
268+
is_fork=repo_data["fork"],
269+
created_at=datetime.fromisoformat(
270+
repo_data["created_at"].replace("Z", "+00:00")
271+
),
272+
updated_at=datetime.fromisoformat(
273+
repo_data["updated_at"].replace("Z", "+00:00")
274+
),
275+
language=repo_data["language"],
276+
description=repo_data["description"],
277+
topics=repo_data.get("topics", []),
207278
)
208279

209-
except GithubException as e:
210-
if e.status == 404:
280+
except httpx.HTTPStatusError as e:
281+
if e.response.status_code == 404:
211282
raise RepositoryNotFoundError(f"Repository {owner}/{repo} not found")
212283
raise
213284

@@ -255,11 +326,9 @@ async def resolve_reference(self, owner: str, repo: str, ref: str) -> GitReferen
255326
f"Could not resolve reference '{ref}' for repository {owner}/{repo}"
256327
)
257328

258-
async def _get_tag(self, tag_name: str, owner: str, repo: str):
329+
def _get_tag(self, tag_name: str, owner: str, repo: str):
259330
"""Helper to get a tag by name - needed because get_tags() returns a list."""
260-
tags = await asyncio.to_thread(
261-
lambda: list(self.github_client.get_repo(f"{owner}/{repo}").get_tags())
262-
)
331+
tags = list(self.github_client.get_repo(f"{owner}/{repo}").get_tags())
263332
for tag in tags:
264333
if tag.name == tag_name:
265334
return tag

0 commit comments

Comments
 (0)