Skip to content

Commit a1a8beb

Browse files
committed
feat: Implement automated sitemap generation and add robots.txt with content signals for SEO.
1 parent f770f2c commit a1a8beb

4 files changed

Lines changed: 95 additions & 0 deletions

File tree

ci-build.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,5 @@ chmod +x dotnet-install.sh
1111
src/BlazorApps/BlazorApps.csproj \
1212
-c Release \
1313
-o publish
14+
15+
./generate-sitemap.sh

generate-sitemap.sh

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#!/usr/bin/env bash
2+
3+
set -euo pipefail
4+
5+
ROOT_DIR="publish/wwwroot"
6+
OUTPUT="$ROOT_DIR/sitemap.xml"
7+
APPSETTINGS="$ROOT_DIR/appsettings.json"
8+
9+
# Extract BaseUrl from appsettings.json
10+
BASE_URL=$(grep -o '"BaseUrl"[[:space:]]*:[[:space:]]*"[^"]*"' "$APPSETTINGS" \
11+
| sed -E 's/.*"BaseUrl"[[:space:]]*:[[:space:]]*"([^"]*)".*/\1/')
12+
13+
if [[ -z "$BASE_URL" ]]; then
14+
echo "BaseUrl not found in appsettings.json"
15+
exit 1
16+
fi
17+
18+
# Normalize base URL (remove trailing slash)
19+
BASE_URL="${BASE_URL%/}"
20+
21+
echo "Using BaseUrl: $BASE_URL"
22+
23+
# Write XML header
24+
cat > "$OUTPUT" <<EOF
25+
<?xml version="1.0" encoding="UTF-8"?>
26+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
27+
EOF
28+
29+
# Find index.html files
30+
find "$ROOT_DIR" -type f -name "index.html" \
31+
! -path "*/_content/*" \
32+
! -path "*/_framework/*" \
33+
! -path "*/css/*" \
34+
| while read -r file; do
35+
36+
# Get directory path relative to wwwroot
37+
rel_dir=$(dirname "${file#$ROOT_DIR}")
38+
39+
# Root index.html → "/"
40+
if [[ "$rel_dir" == "" || "$rel_dir" == "/" ]]; then
41+
url="$BASE_URL/"
42+
else
43+
url="$BASE_URL$rel_dir/"
44+
fi
45+
46+
# Get last modified date in ISO 8601
47+
lastmod=$(date -u -r "$file" +"%Y-%m-%dT%H:%M:%SZ")
48+
49+
cat >> "$OUTPUT" <<EOF
50+
<url>
51+
<loc>$url</loc>
52+
<lastmod>$lastmod</lastmod>
53+
</url>
54+
EOF
55+
done
56+
57+
# Close XML
58+
cat >> "$OUTPUT" <<EOF
59+
</urlset>
60+
EOF
61+
62+
echo "Sitemap generated at $OUTPUT"

src/BlazorApps/wwwroot/appsettings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
{
2+
"BaseUrl": "https://lab.junian.net",
23
"Title": "Blazor Lab",
34
"GoogleAnalytics": {
45
"Id": "G-BJ58GF34YZ"

src/BlazorApps/wwwroot/robots.txt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# As a condition of accessing this website, you agree to abide by the following
2+
# content signals:
3+
4+
# (a) If a content-signal = yes, you may collect content for the corresponding
5+
# use.
6+
# (b) If a content-signal = no, you may not collect content for the
7+
# corresponding use.
8+
# (c) If the website operator does not include a content signal for a
9+
# corresponding use, the website operator neither grants nor restricts
10+
# permission via content signal with respect to the corresponding use.
11+
12+
# The content signals and their meanings are:
13+
14+
# search: building a search index and providing search results (e.g., returning
15+
# hyperlinks and short excerpts from your website's contents). Search does not
16+
# include providing AI-generated search summaries.
17+
# ai-input: inputting content into one or more AI models (e.g., retrieval
18+
# augmented generation, grounding, or other real-time taking of content for
19+
# generative AI search answers).
20+
# ai-train: training or fine-tuning AI models.
21+
22+
# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
23+
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
24+
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.
25+
26+
User-Agent: *
27+
Content-Signal: ai-train=no, search=yes, ai-input=no
28+
Allow: /
29+
30+
Sitemap: https://lab.junian.net/sitemap.xml

0 commit comments

Comments
 (0)