diff --git a/.github/workflows/link-check-pr.yml b/.github/workflows/link-check-pr.yml new file mode 100644 index 000000000..da681e228 --- /dev/null +++ b/.github/workflows/link-check-pr.yml @@ -0,0 +1,60 @@ +# Pull-request link check. +# +# Scope: external HTTP(S) links in markdown files changed by the PR. +# Internal/relative links are validated by Docusaurus during build.yml +# (onBrokenLinks: 'throw', onBrokenMarkdownLinks: 'throw'); re-checking +# them here would be redundant and prone to false positives because +# lychee cannot replay Docusaurus's slug routing. +name: Link check (PR) + +on: + pull_request: + paths: + - '**/*.md' + - '**/*.mdx' + - 'lychee.toml' + - '.github/workflows/link-check-pr.yml' + +permissions: + contents: read + +jobs: + lychee: + runs-on: ubuntu-latest + # Non-blocking on day one. Once the check is stable for a couple of + # weeks, remove this line and mark "Link check (PR) / lychee" as a + # required status check in branch protection. + continue-on-error: true + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + persist-credentials: false + + - name: Detect changed markdown files + id: changed + uses: tj-actions/changed-files@v45 + with: + files: | + **/*.md + **/*.mdx + + - name: Restore lychee cache + if: steps.changed.outputs.any_changed == 'true' + uses: actions/cache@v4 + with: + path: .lycheecache + key: lychee-pr-${{ github.run_id }} + restore-keys: lychee-pr- + + - name: Run lychee on changed files + if: steps.changed.outputs.any_changed == 'true' + uses: lycheeverse/lychee-action@v2 + with: + # --scheme http --scheme https restricts lychee to absolute + # HTTP(S) URLs only (i.e., external links). See header comment. + args: --no-progress --scheme http --scheme https --config lychee.toml ${{ steps.changed.outputs.all_changed_files }} + fail: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/link-check-weekly.yml b/.github/workflows/link-check-weekly.yml new file mode 100644 index 000000000..50f59ff08 --- /dev/null +++ b/.github/workflows/link-check-weekly.yml @@ -0,0 +1,76 @@ +# Weekly site-wide link check. +# +# Builds the Docusaurus site and runs lychee against the rendered HTML +# under build/docs, build/community, and build/faq (blog and changelog +# are intentionally excluded). On failure, opens or updates a single +# rolling GitHub issue labeled "link-rot" so findings are tracked over +# time without spamming new issues each week. +name: Link check (weekly) + +on: + schedule: + - cron: '0 12 * * 1' # Mondays 12:00 UTC + workflow_dispatch: + +permissions: + contents: read + issues: write + +jobs: + lychee: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Use Node.js lts/jod (v22) + uses: actions/setup-node@v4 + with: + node-version: lts/jod + cache: 'npm' + + - name: Install and build + run: | + npm ci + npm run build + + - name: Restore lychee cache + uses: actions/cache@v4 + with: + path: .lycheecache + key: lychee-weekly-${{ github.run_id }} + restore-keys: lychee-weekly- + + - name: Run lychee on built docs, community, and faq + id: lychee + uses: lycheeverse/lychee-action@v2 + with: + args: >- + --no-progress + --config lychee.toml + --base ./build + './build/docs/**/*.html' + './build/community/**/*.html' + './build/faq/**/*.html' + # Do not fail the job so the next step can open/update the + # tracking issue with the report. + fail: false + output: ./lychee-report.md + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Create or update tracking issue + # lychee-action@v2 exposes exit_code as a STEP OUTPUT, not an env var. + # Using env.lychee_exit_code here would always be empty, and + # '' != '0' evaluates to true in GitHub Actions expressions, so the + # issue would be created on every run. + if: steps.lychee.outputs.exit_code != '0' + uses: peter-evans/create-issue-from-file@v5 + with: + title: 'Link check report' + content-filepath: ./lychee-report.md + labels: | + link-rot + automated diff --git a/lychee.toml b/lychee.toml new file mode 100644 index 000000000..66798b112 --- /dev/null +++ b/lychee.toml @@ -0,0 +1,29 @@ +# Lychee link checker config. +# Shared by .github/workflows/link-check-pr.yml and link-check-weekly.yml. +# Docs: https://lychee.cli.rs/usage/config/ + +cache = true +max_cache_age = "1d" +max_retries = 3 +retry_wait_time = 2 +timeout = 20 +max_concurrency = 16 + +# Treat rate-limit/partial responses as success so transient throttling +# from external sites is not reported as link rot. +accept = [200, 203, 206, 429] + +# Skip URLs that routinely 403 bots regardless of how politely we crawl. +exclude = [ + "^http(s)?://localhost", + "^http(s)?://127\\.0\\.0\\.1", + "^http(s)?://0\\.0\\.0\\.0", + "linkedin\\.com", +] + +# Never traverse these paths when expanding inputs. +exclude_path = [ + "node_modules", + ".docusaurus", + ".preview-pages", +]