|
23 | 23 | from pathlib import Path |
24 | 24 | from datetime import date |
25 | 25 |
|
| 26 | +import requests |
26 | 27 | import yaml |
27 | 28 |
|
| 29 | +GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "") |
| 30 | +HF_TOKEN = os.environ.get("HF_TOKEN", "") |
| 31 | + |
| 32 | + |
| 33 | +def check_url(url: str) -> tuple[bool, str]: |
| 34 | + """HEAD request a URL. Returns (is_valid, warning_message). |
| 35 | +
|
| 36 | + 404 → invalid (broken link). |
| 37 | + 401/403 → valid URL but access-restricted; return warning only. |
| 38 | + Other errors → treated as valid to avoid false positives. |
| 39 | + """ |
| 40 | + if not url: |
| 41 | + return True, "" |
| 42 | + headers = {} |
| 43 | + if "huggingface.co" in url and HF_TOKEN: |
| 44 | + headers["Authorization"] = f"Bearer {HF_TOKEN}" |
| 45 | + elif "github.com" in url and GITHUB_TOKEN: |
| 46 | + headers["Authorization"] = f"Bearer {GITHUB_TOKEN}" |
| 47 | + try: |
| 48 | + resp = requests.head(url, headers=headers, timeout=10, allow_redirects=True) |
| 49 | + if resp.status_code == 404: |
| 50 | + return False, f"URL returned 404 (not found): {url}" |
| 51 | + if resp.status_code == 401: |
| 52 | + return True, f"URL requires authentication (gated): {url}" |
| 53 | + if resp.status_code == 403: |
| 54 | + return True, f"URL is access-restricted: {url}" |
| 55 | + except requests.RequestException: |
| 56 | + pass # network errors are not treated as broken links |
| 57 | + return True, "" |
| 58 | + |
28 | 59 | ROOT = Path(__file__).parent.parent |
29 | 60 | DATA_DIR = ROOT / "data" |
30 | 61 | TODAY = date.today().isoformat() |
@@ -254,6 +285,25 @@ def main() -> None: |
254 | 285 | print("::error::Entry 'name' is required") |
255 | 286 | sys.exit(1) |
256 | 287 |
|
| 288 | + url_fields = ["github_url", "paper_url", "hf_url", "project_url"] |
| 289 | + broken, warned = [], [] |
| 290 | + for field in url_fields: |
| 291 | + url = entry.get(field, "") |
| 292 | + if not url: |
| 293 | + continue |
| 294 | + valid, msg = check_url(url) |
| 295 | + if not valid: |
| 296 | + broken.append(field) |
| 297 | + elif msg: |
| 298 | + warned.append(field) |
| 299 | + |
| 300 | + for field in warned: |
| 301 | + print(f"::warning::{field} is access-restricted (gated or private) — included anyway") |
| 302 | + if broken: |
| 303 | + field_list = ", ".join(broken) |
| 304 | + print(f"::error::The following URLs returned 404: {field_list}. Please fix the links and edit the issue to retry.") |
| 305 | + sys.exit(1) |
| 306 | + |
257 | 307 | append_entry(yaml_path, entry) |
258 | 308 | print(f"Entry '{entry['name']}' added by @{author} (issue #{issue_number})") |
259 | 309 |
|
|
0 commit comments