|
81 | 81 |
|
82 | 82 | - name: Fetch sitemap and extract URLs |
83 | 83 | run: | |
84 | | - curl -s https://buildwithfern.com/learn/sitemap.xml | grep -oP '(?<=<loc>)[^<]+' > urls.txt |
85 | | - echo "Found $(wc -l < urls.txt) URLs in sitemap" |
| 84 | + # The sitemap may be either a regular <urlset> or a <sitemapindex> |
| 85 | + # that points to per-language sub-sitemaps (e.g. sitemap-en.xml, |
| 86 | + # sitemap-zh.xml). Handle both shapes by recursively expanding any |
| 87 | + # sitemapindex into its child sitemaps before extracting page URLs. |
| 88 | + set -euo pipefail |
| 89 | + ROOT_SITEMAP="https://buildwithfern.com/learn/sitemap.xml" |
| 90 | +
|
| 91 | + fetch_sitemap_urls() { |
| 92 | + local sitemap_url="$1" |
| 93 | + local body |
| 94 | + body=$(curl -fsSL "$sitemap_url") || { |
| 95 | + echo "Warning: failed to fetch $sitemap_url" >&2 |
| 96 | + return 0 |
| 97 | + } |
| 98 | +
|
| 99 | + local locs |
| 100 | + locs=$(echo "$body" | grep -oP '(?<=<loc>)[^<]+' || true) |
| 101 | +
|
| 102 | + if echo "$body" | grep -q '<sitemapindex'; then |
| 103 | + # Recursively expand each child sitemap. |
| 104 | + while IFS= read -r child; do |
| 105 | + if [ -n "$child" ]; then |
| 106 | + fetch_sitemap_urls "$child" |
| 107 | + fi |
| 108 | + done <<< "$locs" |
| 109 | + else |
| 110 | + # Regular <urlset> — emit page URLs directly. |
| 111 | + if [ -n "$locs" ]; then |
| 112 | + echo "$locs" |
| 113 | + fi |
| 114 | + fi |
| 115 | + } |
| 116 | +
|
| 117 | + fetch_sitemap_urls "$ROOT_SITEMAP" | sort -u > urls.txt |
| 118 | +
|
| 119 | + total=$(wc -l < urls.txt | tr -d ' ') |
| 120 | + echo "Found $total URLs in sitemap" |
| 121 | +
|
| 122 | + if [ "$total" -eq 0 ]; then |
| 123 | + echo "::error::No URLs were extracted from the sitemap. The link checker has nothing to scan." |
| 124 | + echo "Root sitemap response:" |
| 125 | + curl -fsSL "$ROOT_SITEMAP" || true |
| 126 | + exit 1 |
| 127 | + fi |
86 | 128 |
|
87 | 129 | - name: Extract and verify GitHub blob/tree/tag URLs locally |
88 | 130 | id: verify_github |
|
0 commit comments