Skip to content

Commit 8171230

Browse files
committed
feat(process-issue): validate URLs before creating PR
HEAD-request all submitted URLs during issue processing. - 404 → fail with a list of broken fields (e.g. "github_url, paper_url") and skip PR creation so the contributor fixes links first - 401/403 → warning only, PR is still created (gated or private repos) HF_TOKEN and GITHUB_TOKEN passed to the step for authenticated checks.
1 parent 3fd5340 commit 8171230

2 files changed

Lines changed: 52 additions & 0 deletions

File tree

.github/workflows/process-issue.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ jobs:
4646
ISSUE_TYPE: ${{ steps.type.outputs.issue_type }}
4747
ISSUE_NUMBER: ${{ github.event.issue.number }}
4848
ISSUE_AUTHOR: ${{ github.event.issue.user.login }}
49+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
50+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
4951
run: python scripts/process_issue.py
5052

5153
- name: Regenerate site data

scripts/process_issue.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,39 @@
2323
from pathlib import Path
2424
from datetime import date
2525

26+
import requests
2627
import yaml
2728

29+
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "")
30+
HF_TOKEN = os.environ.get("HF_TOKEN", "")
31+
32+
33+
def check_url(url: str) -> tuple[bool, str]:
34+
"""HEAD request a URL. Returns (is_valid, warning_message).
35+
36+
404 → invalid (broken link).
37+
401/403 → valid URL but access-restricted; return warning only.
38+
Other errors → treated as valid to avoid false positives.
39+
"""
40+
if not url:
41+
return True, ""
42+
headers = {}
43+
if "huggingface.co" in url and HF_TOKEN:
44+
headers["Authorization"] = f"Bearer {HF_TOKEN}"
45+
elif "github.com" in url and GITHUB_TOKEN:
46+
headers["Authorization"] = f"Bearer {GITHUB_TOKEN}"
47+
try:
48+
resp = requests.head(url, headers=headers, timeout=10, allow_redirects=True)
49+
if resp.status_code == 404:
50+
return False, f"URL returned 404 (not found): {url}"
51+
if resp.status_code == 401:
52+
return True, f"URL requires authentication (gated): {url}"
53+
if resp.status_code == 403:
54+
return True, f"URL is access-restricted: {url}"
55+
except requests.RequestException:
56+
pass # network errors are not treated as broken links
57+
return True, ""
58+
2859
ROOT = Path(__file__).parent.parent
2960
DATA_DIR = ROOT / "data"
3061
TODAY = date.today().isoformat()
@@ -254,6 +285,25 @@ def main() -> None:
254285
print("::error::Entry 'name' is required")
255286
sys.exit(1)
256287

288+
url_fields = ["github_url", "paper_url", "hf_url", "project_url"]
289+
broken, warned = [], []
290+
for field in url_fields:
291+
url = entry.get(field, "")
292+
if not url:
293+
continue
294+
valid, msg = check_url(url)
295+
if not valid:
296+
broken.append(field)
297+
elif msg:
298+
warned.append(field)
299+
300+
for field in warned:
301+
print(f"::warning::{field} is access-restricted (gated or private) — included anyway")
302+
if broken:
303+
field_list = ", ".join(broken)
304+
print(f"::error::The following URLs returned 404: {field_list}. Please fix the links and edit the issue to retry.")
305+
sys.exit(1)
306+
257307
append_entry(yaml_path, entry)
258308
print(f"Entry '{entry['name']}' added by @{author} (issue #{issue_number})")
259309

0 commit comments

Comments
 (0)