|
| 1 | +#!/usr/bin/env node |
| 2 | + |
| 3 | +/** |
| 4 | + * URL Resolution Check Script |
| 5 | + * |
| 6 | + * Scans all TypeScript source files under src/ for hardcoded http(s) URLs |
| 7 | + * and verifies each one resolves (returns a non-4xx/5xx HTTP status). |
| 8 | + * |
| 9 | + * Usage: |
| 10 | + * node .github/skills/check-urls/check-urls.js |
| 11 | + * |
| 12 | + * Exit codes: |
| 13 | + * 0 — all URLs resolved successfully (2xx or 3xx) |
| 14 | + * 1 — one or more URLs are broken (4xx / 5xx / timeout / connection error) |
| 15 | + */ |
| 16 | + |
| 17 | +'use strict'; |
| 18 | + |
| 19 | +const fs = require('fs'); |
| 20 | +const path = require('path'); |
| 21 | +const https = require('https'); |
| 22 | +const http = require('http'); |
| 23 | + |
| 24 | +// ── Configuration ────────────────────────────────────────────────────────── |
| 25 | + |
| 26 | +const SRC_DIR = path.join(__dirname, '../../../src'); |
| 27 | +const TIMEOUT_MS = 10_000; |
| 28 | + |
| 29 | +/** |
| 30 | + * URL prefixes that are intentionally not real HTTP endpoints and should be |
| 31 | + * skipped (e.g. JSON Schema meta-schemas, localhost references). |
| 32 | + */ |
| 33 | +const SKIP_PREFIXES = [ |
| 34 | + 'http://json-schema.org/', |
| 35 | + 'http://localhost', |
| 36 | + 'https://localhost', |
| 37 | +]; |
| 38 | + |
| 39 | +// ── Helpers ──────────────────────────────────────────────────────────────── |
| 40 | + |
| 41 | +/** Recursively collect all *.ts files under a directory. */ |
| 42 | +function collectTsFiles(dir) { |
| 43 | + const results = []; |
| 44 | + for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { |
| 45 | + const full = path.join(dir, entry.name); |
| 46 | + if (entry.isDirectory()) { |
| 47 | + results.push(...collectTsFiles(full)); |
| 48 | + } else if (entry.isFile() && entry.name.endsWith('.ts')) { |
| 49 | + results.push(full); |
| 50 | + } |
| 51 | + } |
| 52 | + return results; |
| 53 | +} |
| 54 | + |
| 55 | +/** Extract all unique http(s) URLs from a string. */ |
| 56 | +function extractUrls(text) { |
| 57 | + // Match URLs, then strip trailing punctuation that isn't part of the URL |
| 58 | + const raw = text.matchAll(/https?:\/\/[^\s"'`<>)\]},]+/g); |
| 59 | + const urls = new Set(); |
| 60 | + for (const [match] of raw) { |
| 61 | + // Strip trailing punctuation characters that commonly appear after URLs |
| 62 | + // in prose or markdown (e.g. "see https://example.com." or "(https://example.com)") |
| 63 | + const url = match.replace(/[.,;:!?)>\]'"`]+$/u, ''); |
| 64 | + // Skip template literal interpolations (e.g. https://${variable}/path) |
| 65 | + if (url.includes('${')) { continue; } |
| 66 | + urls.add(url); |
| 67 | + } |
| 68 | + return urls; |
| 69 | +} |
| 70 | + |
| 71 | +/** Send an HTTP HEAD request; fall back to GET if the server returns a 4xx. |
| 72 | + * Some servers (e.g. bsky.app intent URLs) return 404 or 405 for HEAD but |
| 73 | + * correctly serve GET requests, so any 4xx HEAD response triggers a retry. */ |
| 74 | +function checkUrl(urlStr) { |
| 75 | + return checkUrlWithMethod(urlStr, 'HEAD').then(({ status, error }) => { |
| 76 | + if (status >= 400) { |
| 77 | + // Server may not support HEAD — retry with GET |
| 78 | + return checkUrlWithMethod(urlStr, 'GET'); |
| 79 | + } |
| 80 | + return { status, error }; |
| 81 | + }); |
| 82 | +} |
| 83 | + |
| 84 | +/** Send an HTTP request with the given method and resolve with { status, error }. */ |
| 85 | +function checkUrlWithMethod(urlStr, method) { |
| 86 | + return new Promise((resolve) => { |
| 87 | + let url; |
| 88 | + try { |
| 89 | + url = new URL(urlStr); |
| 90 | + } catch { |
| 91 | + resolve({ status: null, error: 'invalid URL' }); |
| 92 | + return; |
| 93 | + } |
| 94 | + |
| 95 | + const lib = url.protocol === 'https:' ? https : http; |
| 96 | + const options = { |
| 97 | + method, |
| 98 | + hostname: url.hostname, |
| 99 | + port: url.port || undefined, |
| 100 | + path: url.pathname + url.search, |
| 101 | + headers: { |
| 102 | + 'User-Agent': 'copilot-token-tracker-url-checker/1.0', |
| 103 | + }, |
| 104 | + timeout: TIMEOUT_MS, |
| 105 | + }; |
| 106 | + |
| 107 | + const req = lib.request(options, (res) => { |
| 108 | + resolve({ status: res.statusCode }); |
| 109 | + req.destroy(); // don't wait for body |
| 110 | + res.resume(); |
| 111 | + }); |
| 112 | + |
| 113 | + req.on('timeout', () => { |
| 114 | + req.destroy(); |
| 115 | + resolve({ status: null, error: 'timeout' }); |
| 116 | + }); |
| 117 | + |
| 118 | + req.on('error', (err) => { |
| 119 | + resolve({ status: null, error: err.message }); |
| 120 | + }); |
| 121 | + |
| 122 | + req.end(); |
| 123 | + }); |
| 124 | +} |
| 125 | + |
| 126 | +// ── Main ─────────────────────────────────────────────────────────────────── |
| 127 | + |
| 128 | +async function main() { |
| 129 | + // 1. Collect all TypeScript files |
| 130 | + if (!fs.existsSync(SRC_DIR)) { |
| 131 | + console.error(`❌ Source directory not found: ${SRC_DIR}`); |
| 132 | + process.exit(1); |
| 133 | + } |
| 134 | + |
| 135 | + const tsFiles = collectTsFiles(SRC_DIR); |
| 136 | + console.log(`Scanning ${tsFiles.length} TypeScript file(s) under ${path.relative(process.cwd(), SRC_DIR)}/\n`); |
| 137 | + |
| 138 | + // 2. Extract all URLs, tracking which file(s) each came from |
| 139 | + const urlSources = new Map(); // url → Set<relativePath> |
| 140 | + for (const file of tsFiles) { |
| 141 | + const content = fs.readFileSync(file, 'utf8'); |
| 142 | + const rel = path.relative(process.cwd(), file); |
| 143 | + for (const url of extractUrls(content)) { |
| 144 | + if (!urlSources.has(url)) { |
| 145 | + urlSources.set(url, new Set()); |
| 146 | + } |
| 147 | + urlSources.get(url).add(rel); |
| 148 | + } |
| 149 | + } |
| 150 | + |
| 151 | + // 3. Filter out known-skip prefixes |
| 152 | + const urlsToCheck = [...urlSources.keys()].filter( |
| 153 | + (u) => !SKIP_PREFIXES.some((prefix) => u.startsWith(prefix)) |
| 154 | + ); |
| 155 | + |
| 156 | + if (urlsToCheck.length === 0) { |
| 157 | + console.log('No URLs found to check.'); |
| 158 | + process.exit(0); |
| 159 | + } |
| 160 | + |
| 161 | + console.log(`Found ${urlsToCheck.length} unique URL(s) to check.\n`); |
| 162 | + |
| 163 | + // 4. Check each URL |
| 164 | + let broken = 0; |
| 165 | + |
| 166 | + // Check sequentially to avoid hammering servers |
| 167 | + for (const url of urlsToCheck.sort()) { |
| 168 | + const sources = [...urlSources.get(url)].join(', '); |
| 169 | + const { status, error } = await checkUrl(url); |
| 170 | + |
| 171 | + if (error) { |
| 172 | + console.log(`❌ BROKEN [${error}]`); |
| 173 | + console.log(` ${url}`); |
| 174 | + console.log(` → ${sources}\n`); |
| 175 | + broken++; |
| 176 | + } else if (status >= 400) { |
| 177 | + console.log(`❌ BROKEN [HTTP ${status}]`); |
| 178 | + console.log(` ${url}`); |
| 179 | + console.log(` → ${sources}\n`); |
| 180 | + broken++; |
| 181 | + } else if (status >= 300) { |
| 182 | + console.log(`⚠️ REDIRECT [HTTP ${status}]`); |
| 183 | + console.log(` ${url}`); |
| 184 | + console.log(` → ${sources}\n`); |
| 185 | + } else { |
| 186 | + console.log(`✅ OK [HTTP ${status}] ${url}`); |
| 187 | + } |
| 188 | + } |
| 189 | + |
| 190 | + // 5. Summary |
| 191 | + console.log('\n─────────────────────────────────────────'); |
| 192 | + if (broken === 0) { |
| 193 | + console.log(`✅ All ${urlsToCheck.length} URL(s) resolved successfully.`); |
| 194 | + } else { |
| 195 | + console.log(`❌ ${broken} of ${urlsToCheck.length} URL(s) are broken.`); |
| 196 | + process.exit(1); |
| 197 | + } |
| 198 | +} |
| 199 | + |
| 200 | +main().catch((err) => { |
| 201 | + console.error('Unexpected error:', err); |
| 202 | + process.exit(1); |
| 203 | +}); |
0 commit comments