Skip to content

Commit 69892a0

Browse files
authored
Update search_repos.py
1 parent 5695bdb commit 69892a0

1 file changed

Lines changed: 35 additions & 14 deletions

File tree

scripts/github/workflows/search_repos.py

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import time
55
import requests
66
import base64
7-
import urllib.parse
87
from datetime import datetime, timezone
98

109
# --- CONFIGURATION ---
@@ -20,7 +19,8 @@
2019
HEADERS = {
2120
'Authorization': f'Bearer {GITHUB_TOKEN}',
2221
'Accept': 'application/vnd.github+json',
23-
'User-Agent': 'Cloudflare-Worker-Discovery-Bot/1.0'
22+
'User-Agent': 'Cloudflare-Worker-Discovery-Bot/1.0',
23+
'X-GitHub-Api-Version': '2022-11-28' # Best practice for GitHub API stability
2424
}
2525

2626
# Tags to search for in package.json
@@ -34,7 +34,10 @@
3434
def analyze_stack(repo_name):
3535
"""Enriches the repo by checking its package.json dependencies."""
3636
tags = []
37-
url = f"https://api.github.com{repo_name}/contents/package.json"
37+
38+
# 🐛 FIX 1: Added '/repos/' prefix for the contents API
39+
url = f"https://api.github.com/repos/{repo_name}/contents/package.json"
40+
3841
try:
3942
res = requests.get(url, headers=HEADERS, timeout=5)
4043
if res.status_code == 200:
@@ -45,8 +48,9 @@ def analyze_stack(repo_name):
4548
for pkg, label in sigs.items():
4649
if any(pkg in k for k in all_deps):
4750
tags.append(label)
48-
except:
49-
pass
51+
except Exception:
52+
pass # Silently fail if package.json doesn't exist to keep the loop moving
53+
5054
return list(set(tags)) if tags else ["Standard Worker"]
5155

5256
def get_already_registered_repos():
@@ -67,20 +71,28 @@ def get_already_registered_repos():
6771
def search_broad_workers():
6872
"""Searches for ANY valid worker config file using correct URL formatting."""
6973

70-
# 1. Define and Encode Query
71-
raw_query = 'compatibility_date path:/(wrangler\\.jsonc|wrangler\\.toml)/'
72-
encoded_query = urllib.parse.quote(raw_query)
74+
# 🐛 FIX 2: Replaced regex with native GitHub boolean filename search for stability
75+
raw_query = 'compatibility_date filename:wrangler.toml OR filename:wrangler.jsonc'
76+
77+
# 🐛 FIX 3: Point to the actual /search/code endpoint explicitly
78+
url = "https://api.github.com/search/code"
7379

74-
# 2. Correct URL Construction
75-
url = f"https://api.github.com{encoded_query}&sort=indexed&order=desc&per_page=100"
80+
# Let requests handle the urlencoding natively using the `params` dict
81+
params = {
82+
'q': raw_query,
83+
'sort': 'indexed',
84+
'order': 'desc',
85+
'per_page': 100
86+
}
7687

7788
print(f"🌊 Casting wide net: {raw_query}")
7889

7990
# Get previously found repos for deduplication
8091
already_found = get_already_registered_repos()
8192

8293
try:
83-
res = requests.get(url, headers=HEADERS, timeout=30)
94+
res = requests.get(url, headers=HEADERS, params=params, timeout=30)
95+
8496
if res.status_code != 200:
8597
print(f"❌ Error: {res.status_code} - {res.text}")
8698
return []
@@ -104,15 +116,20 @@ def search_broad_workers():
104116

105117
stack_tags = analyze_stack(name)
106118

119+
# Handle edge cases where description returns `None`
120+
description = repo.get('description')
121+
if not description:
122+
description = 'No description'
123+
107124
results.append({
108125
"name": name,
109126
"url": repo['html_url'],
110-
"description": repo.get('description', 'No description'),
127+
"description": description,
111128
"detected_stack": stack_tags,
112129
"config_file": item['name'],
113130
"discovered_at": datetime.now(timezone.utc).isoformat()
114131
})
115-
time.sleep(1) # Safety delay for secondary rate limits
132+
time.sleep(1) # Crucial safety delay for secondary rate limits
116133

117134
return results
118135

@@ -122,11 +139,15 @@ def search_broad_workers():
122139

123140
def main():
124141
if not GITHUB_TOKEN:
125-
print("❌ Error: GITHUB_TOKEN not set")
142+
print("❌ Error: GITHUB_TOKEN not set in environment.")
126143
return
127144

128145
discoveries = search_broad_workers()
129146

147+
if not discoveries:
148+
print("⚠️ No new discoveries made or error encountered.")
149+
return
150+
130151
# Save to artifact file
131152
with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
132153
json.dump(discoveries, f, indent=2)

0 commit comments

Comments
 (0)