33"""
44
55from typing import List , Optional , Dict , Any , AsyncIterator , Callable
6- import datetime
6+ from datetime import datetime
77import asyncio
88import httpx
99from github import Github , GithubException
1919 RateLimiter ,
2020 RetryManager ,
2121 CacheManager ,
22- RateLimitInfo
22+ RateLimitInfo ,
2323)
24+
2425# from ..infrastructure.cache_manager import CacheManager
2526from ..models import RepositoryInfo , GitReference , RepositoryType , GitHubFile
2627from ..models .constants import USER_AGENT
@@ -56,6 +57,36 @@ def __init__(
5657 # Set up rate limit callback to adjust concurrency
5758 self .rate_limiter .set_rate_limit_callback (self ._on_rate_limit_update )
5859
60+ # Initialize attributes
61+ self ._lock = asyncio .Lock ()
62+ self ._rate_limit_info = RateLimitInfo ()
63+ self ._rate_limit_callback : Optional [Callable [[RateLimitInfo ], None ]] = None
64+ self ._external_rate_limit_callback : Optional [
65+ Callable [[RateLimitInfo ], None ]
66+ ] = None
67+ self ._consecutive_limits = 0
68+
69+ # Configure HTTP client
70+ headers = {"Accept" : "application/vnd.github.v3+json" , "User-Agent" : USER_AGENT }
71+
72+ if self .auth_token :
73+ headers ["Authorization" ] = f"token { self .auth_token } "
74+
75+ self .http_client = httpx .AsyncClient (
76+ headers = headers , timeout = httpx .Timeout (self .timeout )
77+ )
78+
79+ # Sync client for PyGithub (used only for metadata)
80+ self .github_client = (
81+ Github (
82+ self .auth_token ,
83+ retry = self .retry_manager .max_retries ,
84+ user_agent = USER_AGENT ,
85+ )
86+ if self .auth_token
87+ else Github (retry = self .retry_manager .max_retries , user_agent = USER_AGENT )
88+ )
89+
5990 def set_external_rate_limit_callback (
6091 self , callback : Callable [[RateLimitInfo ], None ]
6192 ) -> None :
@@ -89,7 +120,9 @@ def _on_rate_limit_update(self, rate_limit_info: RateLimitInfo) -> None:
89120 # Sync client for PyGithub (used only for metadata)
90121 self .github_client = (
91122 Github (
92- self .auth_token , retry = self .retry_manager .max_retries , user_agent = USER_AGENT
123+ self .auth_token ,
124+ retry = self .retry_manager .max_retries ,
125+ user_agent = USER_AGENT ,
93126 )
94127 if self .auth_token
95128 else Github (retry = self .retry_manager .max_retries , user_agent = USER_AGENT )
@@ -127,6 +160,32 @@ async def update_rate_limit_info(self, headers: Dict[str, str]) -> None:
127160 if self ._external_rate_limit_callback :
128161 self ._external_rate_limit_callback (self ._rate_limit_info )
129162
163+ except (ValueError , KeyError ) as e :
164+ logger .warning (f"Failed to parse rate limit headers: { e } " )
165+ self ._rate_limit_info .remaining = int (
166+ headers .get ("x-ratelimit-remaining" , 5000 )
167+ )
168+ self ._rate_limit_info .used = int (headers .get ("x-ratelimit-used" , 0 ))
169+
170+ reset_timestamp = headers .get ("x-ratelimit-reset" )
171+ if reset_timestamp :
172+ self ._rate_limit_info .reset_time = datetime .fromtimestamp (
173+ int (reset_timestamp )
174+ )
175+
176+ # Track consecutive rate limit hits
177+ if self ._rate_limit_info .is_exhausted :
178+ self ._consecutive_limits += 1
179+ else :
180+ self ._consecutive_limits = 0
181+
182+ # Invoke internal callback if set
183+ if self ._rate_limit_callback :
184+ self ._rate_limit_callback (self ._rate_limit_info )
185+ # Invoke external callback if set
186+ if self ._external_rate_limit_callback :
187+ self ._external_rate_limit_callback (self ._rate_limit_info )
188+
130189 except (ValueError , KeyError ) as e :
131190 logger .warning (f"Failed to parse rate limit headers: { e } " )
132191
@@ -143,7 +202,9 @@ async def update_rate_limit_info(self, headers: Dict[str, str]) -> None:
143202 # Sync client for PyGithub (used only for metadata)
144203 self .github_client = (
145204 Github (
146- self .auth_token , retry = self .retry_manager .max_retries , user_agent = USER_AGENT
205+ self .auth_token ,
206+ retry = self .retry_manager .max_retries ,
207+ user_agent = USER_AGENT ,
147208 )
148209 if self .auth_token
149210 else Github (retry = self .retry_manager .max_retries , user_agent = USER_AGENT )
@@ -181,33 +242,43 @@ async def get_repository_info(self, owner: str, repo: str) -> RepositoryInfo:
181242 """
182243
183244 try :
184- # Use sync client for this operation as it's metadata-focused
185245 await self .rate_limiter .acquire ()
186- github_repo = await asyncio .to_thread (
187- lambda : self .github_client .get_repo (f"{ owner } /{ repo } " )
246+ url = f"{ self .BASE_URL } /repos/{ owner } /{ repo } "
247+ response = await self .retry_manager .execute (
248+ lambda : self .http_client .get (url )
188249 )
189250
251+ # Update rate limit info
252+ await self .rate_limiter .update_rate_limit_info (response .headers )
253+
254+ response .raise_for_status ()
255+ repo_data = response .json ()
256+
190257 return RepositoryInfo (
191258 owner = owner ,
192259 name = repo ,
193- full_name = github_repo . full_name ,
194- url = github_repo . html_url ,
195- default_branch = github_repo . default_branch ,
260+ full_name = repo_data [ " full_name" ] ,
261+ url = repo_data [ " html_url" ] ,
262+ default_branch = repo_data [ " default_branch" ] ,
196263 repo_type = RepositoryType .PRIVATE
197- if github_repo . private
264+ if repo_data [ " private" ]
198265 else RepositoryType .PUBLIC ,
199- size = github_repo .size ,
200- is_private = github_repo .private ,
201- is_fork = github_repo .fork ,
202- created_at = github_repo .created_at ,
203- updated_at = github_repo .updated_at ,
204- language = github_repo .language ,
205- description = github_repo .description ,
206- topics = github_repo .get_topics (),
266+ size = repo_data ["size" ],
267+ is_private = repo_data ["private" ],
268+ is_fork = repo_data ["fork" ],
269+ created_at = datetime .fromisoformat (
270+ repo_data ["created_at" ].replace ("Z" , "+00:00" )
271+ ),
272+ updated_at = datetime .fromisoformat (
273+ repo_data ["updated_at" ].replace ("Z" , "+00:00" )
274+ ),
275+ language = repo_data ["language" ],
276+ description = repo_data ["description" ],
277+ topics = repo_data .get ("topics" , []),
207278 )
208279
209- except GithubException as e :
210- if e .status == 404 :
280+ except httpx . HTTPStatusError as e :
281+ if e .response . status_code == 404 :
211282 raise RepositoryNotFoundError (f"Repository { owner } /{ repo } not found" )
212283 raise
213284
@@ -255,11 +326,9 @@ async def resolve_reference(self, owner: str, repo: str, ref: str) -> GitReferen
255326 f"Could not resolve reference '{ ref } ' for repository { owner } /{ repo } "
256327 )
257328
258- async def _get_tag (self , tag_name : str , owner : str , repo : str ):
329+ def _get_tag (self , tag_name : str , owner : str , repo : str ):
259330 """Helper to get a tag by name - needed because get_tags() returns a list."""
260- tags = await asyncio .to_thread (
261- lambda : list (self .github_client .get_repo (f"{ owner } /{ repo } " ).get_tags ())
262- )
331+ tags = list (self .github_client .get_repo (f"{ owner } /{ repo } " ).get_tags ())
263332 for tag in tags :
264333 if tag .name == tag_name :
265334 return tag
0 commit comments