Skip to content

Commit fce5c57

Browse files
committed
fix(seo): fix Rich Results, trim title/desc, add JSON-LD to index pages
Add description, creator, license to isPartOf nested Dataset on type/topic pages to pass Google Rich Results validation. Trim homepage title to 56 chars and description to 150 chars. Add CollectionPage JSON-LD to /types and /topics index pages.
1 parent 92b9841 commit fce5c57

2 files changed

Lines changed: 27 additions & 3 deletions

File tree

apps/web/index.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
</script>
1313
<meta charset="UTF-8">
1414
<meta name="viewport" content="width=device-width, initial-scale=1.0">
15-
<title>docx-corpus — The largest open corpus of classified Word documents</title>
15+
<title>docx-corpus — Open corpus of classified Word documents</title>
1616
<meta name="description"
17-
content="736K+ real .docx files from the public web, classified into 10 document types and 9 topics across 46+ languages. The missing dataset for document processing research.">
17+
content="736K+ real .docx files from the public web, classified into 10 types and 9 topics across 46+ languages. Open dataset for document processing research.">
1818
<link rel="canonical" href="https://docxcorp.us">
1919
<meta property="og:title" content="docx-corpus — Every Word document on the public web. Classified and open.">
2020
<meta property="og:description"

apps/web/scripts/generate-pages.ts

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,14 @@ function renderPage(data: PageData): string {
490490
"name": "docx-corpus — ${esc(label)} Documents",
491491
"description": "${esc(metaDesc)}",
492492
"url": "${SITE}/${urlPrefix}/${id}",
493-
"isPartOf": { "@type": "Dataset", "name": "docx-corpus", "url": "${SITE}" },
493+
"isPartOf": {
494+
"@type": "Dataset",
495+
"name": "docx-corpus",
496+
"description": "The largest open corpus of classified Word documents. 736K+ real .docx files from the public web, classified into 10 document types and 9 topics across 46+ languages.",
497+
"url": "${SITE}",
498+
"license": "https://github.com/superdoc-dev/docx-corpus/blob/main/LICENSE",
499+
"creator": { "@type": "Organization", "name": "SuperDoc", "url": "https://superdoc.dev" }
500+
},
494501
"license": "https://github.com/superdoc-dev/docx-corpus/blob/main/LICENSE",
495502
"creator": { "@type": "Organization", "name": "SuperDoc", "url": "https://superdoc.dev" },
496503
"keywords": ["docx", "${label.toLowerCase()}", "word documents", "dataset", "NLP", "document processing"]
@@ -570,6 +577,23 @@ function renderIndexPage(
570577
return `<!DOCTYPE html>
571578
<html lang="en">
572579
<head>${sharedHead(title, description, `/${urlPrefix}`, kind, "")}
580+
<script type="application/ld+json">
581+
{
582+
"@context": "https://schema.org",
583+
"@type": "CollectionPage",
584+
"name": "${esc(title)}",
585+
"description": "${esc(description)}",
586+
"url": "${SITE}/${urlPrefix}",
587+
"isPartOf": {
588+
"@type": "Dataset",
589+
"name": "docx-corpus",
590+
"description": "The largest open corpus of classified Word documents. 736K+ real .docx files from the public web, classified into 10 document types and 9 topics across 46+ languages.",
591+
"url": "${SITE}",
592+
"license": "https://github.com/superdoc-dev/docx-corpus/blob/main/LICENSE",
593+
"creator": { "@type": "Organization", "name": "SuperDoc", "url": "https://superdoc.dev" }
594+
}
595+
}
596+
</script>
573597
<style>
574598
.index-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 16px; padding-bottom: 64px; }
575599
.index-card { padding: 24px; border: 1px solid #f0f0f0; border-radius: 12px; text-decoration: none; transition: border-color 0.15s; display: block; }

0 commit comments

Comments
 (0)