Skip to content

Commit 0a9e5f2

Browse files
ci: handle <sitemapindex> in check-links workflow
Co-Authored-By: David Konigsberg <davidakonigsberg@gmail.com>
1 parent 7ac069c commit 0a9e5f2

1 file changed

Lines changed: 44 additions & 2 deletions

File tree

.github/workflows/check-links.yml

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,50 @@ jobs:
8181
8282
- name: Fetch sitemap and extract URLs
8383
run: |
84-
curl -s https://buildwithfern.com/learn/sitemap.xml | grep -oP '(?<=<loc>)[^<]+' > urls.txt
85-
echo "Found $(wc -l < urls.txt) URLs in sitemap"
84+
# The sitemap may be either a regular <urlset> or a <sitemapindex>
85+
# that points to per-language sub-sitemaps (e.g. sitemap-en.xml,
86+
# sitemap-zh.xml). Handle both shapes by recursively expanding any
87+
# sitemapindex into its child sitemaps before extracting page URLs.
88+
set -euo pipefail
89+
ROOT_SITEMAP="https://buildwithfern.com/learn/sitemap.xml"
90+
91+
fetch_sitemap_urls() {
92+
local sitemap_url="$1"
93+
local body
94+
body=$(curl -fsSL "$sitemap_url") || {
95+
echo "Warning: failed to fetch $sitemap_url" >&2
96+
return 0
97+
}
98+
99+
local locs
100+
locs=$(echo "$body" | grep -oP '(?<=<loc>)[^<]+' || true)
101+
102+
if echo "$body" | grep -q '<sitemapindex'; then
103+
# Recursively expand each child sitemap.
104+
while IFS= read -r child; do
105+
if [ -n "$child" ]; then
106+
fetch_sitemap_urls "$child"
107+
fi
108+
done <<< "$locs"
109+
else
110+
# Regular <urlset> — emit page URLs directly.
111+
if [ -n "$locs" ]; then
112+
echo "$locs"
113+
fi
114+
fi
115+
}
116+
117+
fetch_sitemap_urls "$ROOT_SITEMAP" | sort -u > urls.txt
118+
119+
total=$(wc -l < urls.txt | tr -d ' ')
120+
echo "Found $total URLs in sitemap"
121+
122+
if [ "$total" -eq 0 ]; then
123+
echo "::error::No URLs were extracted from the sitemap. The link checker has nothing to scan."
124+
echo "Root sitemap response:"
125+
curl -fsSL "$ROOT_SITEMAP" || true
126+
exit 1
127+
fi
86128
87129
- name: Extract and verify GitHub blob/tree/tag URLs locally
88130
id: verify_github

0 commit comments

Comments
 (0)