Skip to content

Commit 4a15147

Browse files
authored
Enhance GitHub repo search with rate limit handling
1 parent 1d236b7 commit 4a15147

1 file changed

Lines changed: 67 additions & 37 deletions

File tree

scripts/github/workflows/search_repos.py

Lines changed: 67 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
import os
33
import sys
44
import json
5+
import time
56
import requests
6-
from datetime import datetime
7+
from datetime import datetime, timezone
78

89
TOKEN = os.environ.get('GITHUB_TOKEN')
910
if not TOKEN:
@@ -13,9 +14,24 @@
1314
HEADERS = {
1415
'Accept': 'application/vnd.github+json',
1516
'Authorization': f'Bearer {TOKEN}',
16-
'User-Agent': 'GitHub-Action-Search'
17+
'X-GitHub-Api-Version': '2022-11-28',
18+
'User-Agent': 'Antigravity-GitHub-Search'
1719
}
1820

21+
def check_rate_limit(response):
22+
"""Handle GitHub API rate limiting gracefully."""
23+
if 'X-RateLimit-Remaining' in response.headers:
24+
remaining = int(response.headers['X-RateLimit-Remaining'])
25+
# Search API has a strict 10 req/min limit
26+
if remaining <= 1:
27+
reset_time = int(response.headers.get('X-RateLimit-Reset', time.time() + 60))
28+
sleep_time = max(reset_time - time.time(), 0) + 1
29+
print(f"⚠️ Rate limit reached. Sleeping for {sleep_time:.0f} seconds...")
30+
time.sleep(sleep_time)
31+
else:
32+
# Add a small 1-second delay between requests to prevent triggering abuse mechanisms
33+
time.sleep(1)
34+
1935
def search_code(query, limit=100):
2036
"""Search GitHub code and return results."""
2137
url = 'https://api.github.com/search/code'
@@ -27,68 +43,82 @@ def search_code(query, limit=100):
2743
try:
2844
response = requests.get(url, headers=HEADERS, params=params, timeout=30)
2945
response.raise_for_status()
46+
check_rate_limit(response)
3047
return response.json().get('items', [])
3148
except requests.exceptions.RequestException as e:
3249
print(f"Error searching: {e}")
3350
return []
3451

52+
def get_repo_details(repo_full_name):
53+
"""Fetch detailed repository metadata (stars, language, dates)."""
54+
url = f'https://api.github.com/repos/{repo_full_name}'
55+
try:
56+
response = requests.get(url, headers=HEADERS, timeout=30)
57+
response.raise_for_status()
58+
check_rate_limit(response)
59+
return response.json()
60+
except requests.exceptions.RequestException as e:
61+
print(f"Error fetching details for {repo_full_name}: {e}")
62+
return None
63+
3564
def main():
36-
print("=== Searching for shadcn in Cloudflare Workers projects ===\n")
37-
38-
# Search for wrangler.toml
39-
print("📦 Searching for shadcn in repos with wrangler.toml...")
40-
toml_results = search_code('shadcn path:wrangler.toml', limit=100)
41-
print(f" Found {len(toml_results)} code matches")
42-
43-
# Search for wrangler.jsonc
44-
print("📦 Searching for shadcn in repos with wrangler.jsonc...")
45-
jsonc_results = search_code('shadcn path:wrangler.jsonc', limit=100)
46-
print(f" Found {len(jsonc_results)} code matches")
65+
print("=== Searching for Cloudflare Workers + Shadcn projects ===\n")
4766

48-
# Combine and deduplicate by repository
49-
repos = {}
50-
for item in toml_results + jsonc_results:
51-
repo = item['repository']
52-
repo_id = repo['full_name']
53-
if repo_id not in repos:
54-
repos[repo_id] = {
55-
'full_name': repo['full_name'],
56-
'html_url': repo['html_url'],
57-
'description': repo.get('description', ''),
58-
'stargazers_count': repo.get('stargazers_count', 0),
59-
'language': repo.get('language', ''),
60-
'pushed_at': repo.get('pushed_at', ''),
61-
'created_at': repo.get('created_at', ''),
62-
}
67+
# We search package.json for both backend (Cloudflare) and frontend (Shadcn utilities) dependencies
68+
print("📦 Searching for projects with wrangler and shadcn-related dependencies...")
69+
search_query = '("wrangler" OR "@cloudflare/workers-types") AND ("lucide-react" OR "clsx" OR "tailwind-merge") path:package.json'
70+
code_results = search_code(search_query, limit=100)
71+
print(f" Found {len(code_results)} code matches")
6372

64-
print(f"\n✨ Found {len(repos)} unique repositories\n")
73+
# Extract unique repository IDs
74+
unique_repo_names = set()
75+
for item in code_results:
76+
unique_repo_names.add(item['repository']['full_name'])
77+
78+
print(f"\n✨ Found {len(unique_repo_names)} unique repositories. Fetching metadata...\n")
6579

66-
if not repos:
80+
if not unique_repo_names:
6781
print("No repositories found!")
6882
return
69-
83+
84+
# Fetch full repository metadata to get accurate stars and dates
85+
repos = {}
86+
for i, repo_name in enumerate(unique_repo_names, 1):
87+
print(f"Fetching details for [{i}/{len(unique_repo_names)}]: {repo_name}...")
88+
repo_data = get_repo_details(repo_name)
89+
if repo_data:
90+
repos[repo_name] = {
91+
'full_name': repo_data.get('full_name'),
92+
'html_url': repo_data.get('html_url'),
93+
'description': repo_data.get('description', ''),
94+
'stargazers_count': repo_data.get('stargazers_count', 0),
95+
'language': repo_data.get('language', 'Unknown'),
96+
'pushed_at': repo_data.get('pushed_at', ''),
97+
'created_at': repo_data.get('created_at', ''),
98+
}
99+
70100
# Sort by stars
71-
print("=== 🌟 Top 20 by Stars ===")
101+
print("\n=== 🌟 Top 20 by Stars ===")
72102
sorted_by_stars = sorted(repos.values(), key=lambda x: x['stargazers_count'], reverse=True)[:20]
73103
for i, repo in enumerate(sorted_by_stars, 1):
74-
lang = f"[{repo['language']}]" if repo['language'] else ""
75-
print(f"{i:2}. {repo['full_name']:40}{repo['stargazers_count']:5} {lang:15} {repo['html_url']}")
104+
lang = f"[{repo['language']}]" if repo['language'] else "[Unknown]"
105+
print(f"{i:2}. {repo['full_name']:40}{repo['stargazers_count']:<5} {lang:15} {repo['html_url']}")
76106

77107
# Sort by last updated
78108
print("\n=== 📅 Top 20 by Last Updated ===")
79109
sorted_by_updated = sorted(repos.values(), key=lambda x: x['pushed_at'], reverse=True)[:20]
80110
for i, repo in enumerate(sorted_by_updated, 1):
81111
pushed = repo['pushed_at'][:10] if repo['pushed_at'] else 'Unknown'
82-
print(f"{i:2}. {repo['full_name']:40}{repo['stargazers_count']:5} Updated: {pushed} {repo['html_url']}")
112+
print(f"{i:2}. {repo['full_name']:40}{repo['stargazers_count']:<5} Updated: {pushed} {repo['html_url']}")
83113

84114
# Save results to JSON
85115
output = {
86-
'search_date': datetime.utcnow().isoformat(),
116+
'search_date': datetime.now(timezone.utc).isoformat(),
87117
'total_repos': len(repos),
88118
'repositories': list(repos.values())
89119
}
90120

91-
with open('results.json', 'w') as f:
121+
with open('results.json', 'w', encoding='utf-8') as f:
92122
json.dump(output, f, indent=2)
93123

94124
print(f"\n✅ Full results saved to results.json ({len(repos)} repositories)")

0 commit comments

Comments
 (0)