Skip to content

Commit 231f3be

Browse files
committed
feat(web): add auto-generated type/topic pages for SEO
Add generate-pages.ts script that queries the database and renders static HTML pages for each document type (10) and topic (9), plus index pages and a dynamic sitemap. Deploy workflow updated to run generation in CI and upload pages with clean URLs to R2.
1 parent 7480c23 commit 231f3be

7 files changed

Lines changed: 857 additions & 2 deletions

File tree

.github/workflows/deploy-site.yml

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ on:
77
paths:
88
- "apps/web/**"
99
- "!apps/web/worker/**"
10-
- "!apps/web/scripts/**"
1110

1211
jobs:
1312
deploy:
@@ -16,13 +15,41 @@ jobs:
1615
steps:
1716
- uses: actions/checkout@v6
1817

18+
- uses: oven-sh/setup-bun@v2
19+
20+
- name: Generate pages
21+
env:
22+
DATABASE_URL: ${{ secrets.DATABASE_URL }}
23+
run: bun run apps/web/scripts/generate-pages.ts
24+
1925
- name: Upload site to R2
2026
env:
2127
AWS_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
2228
AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
2329
R2_ENDPOINT: https://${{ secrets.CLOUDFLARE_ACCOUNT_ID }}.r2.cloudflarestorage.com
2430
R2_BUCKET: s3://${{ secrets.R2_BUCKET_NAME }}
2531
run: |
32+
# Static files
2633
aws s3 cp apps/web/index.html "$R2_BUCKET/index.html" --endpoint-url "$R2_ENDPOINT"
2734
aws s3 cp apps/web/sitemap.xml "$R2_BUCKET/sitemap.xml" --endpoint-url "$R2_ENDPOINT"
2835
aws s3 cp apps/web/public/ "$R2_BUCKET/public/" --recursive --endpoint-url "$R2_ENDPOINT"
36+
37+
# Generated type pages (clean URLs: /types/legal, not /types/legal.html)
38+
for f in apps/web/types/*.html; do
39+
name=$(basename "$f" .html)
40+
if [ "$name" = "index" ]; then
41+
aws s3 cp "$f" "$R2_BUCKET/types" --content-type "text/html; charset=utf-8" --endpoint-url "$R2_ENDPOINT"
42+
else
43+
aws s3 cp "$f" "$R2_BUCKET/types/$name" --content-type "text/html; charset=utf-8" --endpoint-url "$R2_ENDPOINT"
44+
fi
45+
done
46+
47+
# Generated topic pages
48+
for f in apps/web/topics/*.html; do
49+
name=$(basename "$f" .html)
50+
if [ "$name" = "index" ]; then
51+
aws s3 cp "$f" "$R2_BUCKET/topics" --content-type "text/html; charset=utf-8" --endpoint-url "$R2_ENDPOINT"
52+
else
53+
aws s3 cp "$f" "$R2_BUCKET/topics/$name" --content-type "text/html; charset=utf-8" --endpoint-url "$R2_ENDPOINT"
54+
fi
55+
done

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ dev/
1818

1919
# Generated web data (local only)
2020
apps/web/data/
21+
apps/web/types/
22+
apps/web/topics/
2123

2224
# Tests
2325
coverage/

apps/web/index.html

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -871,6 +871,8 @@
871871
<div class="brand"><span class="accent">docx</span>-corpus</div>
872872
<div class="links">
873873
<a href="#explore">Explore</a>
874+
<a href="/types">Types</a>
875+
<a href="/topics">Topics</a>
874876
<a href="https://github.com/superdoc-dev/docx-corpus">GitHub</a>
875877
<a href="https://huggingface.co/datasets/superdoc-dev/docx-corpus">HuggingFace</a>
876878
</div>

apps/web/scripts/generate-pages.ts

Lines changed: 705 additions & 0 deletions
Large diffs are not rendered by default.

apps/web/scripts/preview.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,18 @@ Bun.serve({
2222
return new Response(file);
2323
}
2424

25+
// Clean URLs: /types/legal → /types/legal.html (generated pages)
26+
const htmlFile = Bun.file(WEB_DIR + path + ".html");
27+
if (await htmlFile.exists()) {
28+
return new Response(htmlFile, { headers: { "Content-Type": "text/html; charset=utf-8" } });
29+
}
30+
31+
// Directory index: /types → /types/index.html
32+
const indexFile = Bun.file(WEB_DIR + path + "/index.html");
33+
if (await indexFile.exists()) {
34+
return new Response(indexFile, { headers: { "Content-Type": "text/html; charset=utf-8" } });
35+
}
36+
2537
return new Response("Not found", { status: 404 });
2638
},
2739
});

apps/web/sitemap.xml

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,109 @@
55
<changefreq>weekly</changefreq>
66
<priority>1.0</priority>
77
</url>
8+
<url>
9+
<loc>https://docxcorp.us/types</loc>
10+
<changefreq>weekly</changefreq>
11+
<priority>0.8</priority>
12+
</url>
13+
<url>
14+
<loc>https://docxcorp.us/topics</loc>
15+
<changefreq>weekly</changefreq>
16+
<priority>0.8</priority>
17+
</url>
18+
<url>
19+
<loc>https://docxcorp.us/types/legal</loc>
20+
<changefreq>weekly</changefreq>
21+
<priority>0.7</priority>
22+
</url>
23+
<url>
24+
<loc>https://docxcorp.us/types/forms</loc>
25+
<changefreq>weekly</changefreq>
26+
<priority>0.7</priority>
27+
</url>
28+
<url>
29+
<loc>https://docxcorp.us/types/educational</loc>
30+
<changefreq>weekly</changefreq>
31+
<priority>0.7</priority>
32+
</url>
33+
<url>
34+
<loc>https://docxcorp.us/types/administrative</loc>
35+
<changefreq>weekly</changefreq>
36+
<priority>0.7</priority>
37+
</url>
38+
<url>
39+
<loc>https://docxcorp.us/types/policies</loc>
40+
<changefreq>weekly</changefreq>
41+
<priority>0.7</priority>
42+
</url>
43+
<url>
44+
<loc>https://docxcorp.us/types/correspondence</loc>
45+
<changefreq>weekly</changefreq>
46+
<priority>0.7</priority>
47+
</url>
48+
<url>
49+
<loc>https://docxcorp.us/types/reports</loc>
50+
<changefreq>weekly</changefreq>
51+
<priority>0.7</priority>
52+
</url>
53+
<url>
54+
<loc>https://docxcorp.us/types/reference</loc>
55+
<changefreq>weekly</changefreq>
56+
<priority>0.7</priority>
57+
</url>
58+
<url>
59+
<loc>https://docxcorp.us/types/technical</loc>
60+
<changefreq>weekly</changefreq>
61+
<priority>0.7</priority>
62+
</url>
63+
<url>
64+
<loc>https://docxcorp.us/types/creative</loc>
65+
<changefreq>weekly</changefreq>
66+
<priority>0.7</priority>
67+
</url>
68+
<url>
69+
<loc>https://docxcorp.us/topics/government</loc>
70+
<changefreq>weekly</changefreq>
71+
<priority>0.7</priority>
72+
</url>
73+
<url>
74+
<loc>https://docxcorp.us/topics/education</loc>
75+
<changefreq>weekly</changefreq>
76+
<priority>0.7</priority>
77+
</url>
78+
<url>
79+
<loc>https://docxcorp.us/topics/healthcare</loc>
80+
<changefreq>weekly</changefreq>
81+
<priority>0.7</priority>
82+
</url>
83+
<url>
84+
<loc>https://docxcorp.us/topics/general</loc>
85+
<changefreq>weekly</changefreq>
86+
<priority>0.7</priority>
87+
</url>
88+
<url>
89+
<loc>https://docxcorp.us/topics/legal_judicial</loc>
90+
<changefreq>weekly</changefreq>
91+
<priority>0.7</priority>
92+
</url>
93+
<url>
94+
<loc>https://docxcorp.us/topics/finance</loc>
95+
<changefreq>weekly</changefreq>
96+
<priority>0.7</priority>
97+
</url>
98+
<url>
99+
<loc>https://docxcorp.us/topics/environment</loc>
100+
<changefreq>weekly</changefreq>
101+
<priority>0.7</priority>
102+
</url>
103+
<url>
104+
<loc>https://docxcorp.us/topics/nonprofit</loc>
105+
<changefreq>weekly</changefreq>
106+
<priority>0.7</priority>
107+
</url>
108+
<url>
109+
<loc>https://docxcorp.us/topics/technology</loc>
110+
<changefreq>weekly</changefreq>
111+
<priority>0.7</priority>
112+
</url>
8113
</urlset>

package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
"build": "bun run --cwd packages/scraper build && bun run --cwd apps/cli build",
1616
"release:cli": "bun run --cwd apps/cli release",
1717
"setup:extractor": "bun run --cwd packages/extractor setup",
18-
"prepare": "lefthook install"
18+
"prepare": "lefthook install",
19+
"dev:web": "bun run apps/web/scripts/preview.ts",
20+
"dev:web:generate": "bun run apps/web/scripts/generate-pages.ts"
1921
},
2022
"devDependencies": {
2123
"@biomejs/biome": "^2.4.6",

0 commit comments

Comments
 (0)