Skip to content

Commit c5d4735

Browse files
authored
Merge pull request #592 from raifdmueller/feat/jsonld-defined-terms
feat(seo): structured data (Organization + DefinedTermSet) and pre-render the article
2 parents 86ee9ca + dd8ab77 commit c5d4735

5 files changed

Lines changed: 204 additions & 6 deletions

File tree

docs/changelog.adoc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22

33
A chronological record of all semantic anchors added to the catalog. Community contributors are credited with thanks.
44

5+
== 2026-06-10
6+
7+
*Discoverability (SEO / AI):*
8+
9+
* *Structured data* — added a standalone `Organization` entity and a `DefinedTermSet` with a `DefinedTerm` for every anchor (name, canonical URL, and a definition where available), generated at build time from `anchors.json`. Lets search engines and retrieval-grounded AI resolve "Semantic Anchors" as a distinct entity and each anchor as a defined term (#579).
10+
* *Fixed:* the _An Anchor Delivers Only as Far as the Prior Reaches_ article was not pre-rendered and was therefore invisible to search engines and LLM crawlers. It is now pre-rendered like every other doc page.
11+
512
== 2026-06-09
613

714
*New contracts:*

scripts/generate-jsonld.js

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
#!/usr/bin/env node
2+
/**
3+
* Generate schema.org DefinedTermSet / DefinedTerm JSON-LD from anchors.json
4+
* and inject it into the pre-rendered catalog pages.
5+
*
6+
* Why: the catalog is 160+ well-defined terms, but only their prose is
7+
* crawlable (via /all-anchors). Search engines and retrieval-grounded AI need
8+
* a machine-readable entity graph to resolve "Semantic Anchors" as a distinct
9+
* DefinedTermSet and each anchor as a DefinedTerm with a canonical URL. This is
10+
* the canonical schema.org type for a glossary/controlled vocabulary and is the
11+
* structured-data half of issue #579 (the human-readable definitions already
12+
* ship in crawlable HTML).
13+
*
14+
* Per-term `description` is extracted from the first "Core Concepts" definition
15+
* in each anchor's .adoc when it is cleanly available, and omitted otherwise —
16+
* crisp 40-60 word answer blocks are issue #580's job, and will later supersede
17+
* these as the DefinedTerm descriptions.
18+
*
19+
* Runs AFTER prerender-routes.js so it only touches the home page and
20+
* /all-anchors (the canonical locations for the set), not every route shell.
21+
* When the dist build is absent it prints the JSON-LD to stdout for inspection.
22+
*
23+
* Usage: node scripts/generate-jsonld.js
24+
*/
25+
26+
const fs = require('fs')
27+
const path = require('path')
28+
29+
const ROOT = path.join(__dirname, '..')
30+
const ANCHORS_JSON = path.join(ROOT, 'website/public/data/anchors.json')
31+
const DIST = path.join(ROOT, 'website/dist')
32+
const BASE = 'https://llm-coding.github.io/Semantic-Anchors'
33+
const SET_ID = `${BASE}/#catalog`
34+
35+
// Pages that should carry the DefinedTermSet: the catalog root and the full
36+
// reference. Both represent the whole set; other routes do not.
37+
const TARGETS = [path.join(DIST, 'index.html'), path.join(DIST, 'all-anchors', 'index.html')]
38+
39+
/**
40+
* Pull a short definition for an anchor from the first "Core Concepts" entry in
41+
* its .adoc. Returns a cleaned, length-capped string, or null when nothing
42+
* usable is found (safe to omit — DefinedTerm.description is optional).
43+
*/
44+
function extractDescription(filePath) {
45+
const abs = path.join(ROOT, filePath)
46+
if (!fs.existsSync(abs)) return null
47+
const lines = fs.readFileSync(abs, 'utf-8').split('\n')
48+
49+
const ccIndex = lines.findIndex((l) => /Core Concepts/i.test(l))
50+
if (ccIndex === -1) return null
51+
52+
// First definition-list description after the Core Concepts heading:
53+
// Term:: definition text
54+
for (let i = ccIndex + 1; i < lines.length && i < ccIndex + 12; i++) {
55+
const m = lines[i].match(/^.+?::\s+(.+)$/)
56+
if (m) {
57+
const cleaned = cleanAdoc(m[1])
58+
return cleaned.length >= 20 ? capLength(cleaned, 220) : null
59+
}
60+
}
61+
return null
62+
}
63+
64+
/** Strip the AsciiDoc inline markup that would be noise in a description. */
65+
function cleanAdoc(s) {
66+
return s
67+
.replace(/link:[^[]*\[([^\]]*)\]/g, '$1') // link:url[text] -> text
68+
.replace(/<<[^,>]+,\s*([^>]+)>>/g, '$1') // <<id,text>> -> text
69+
.replace(/<<([^>]+)>>/g, '$1') // <<id>> -> id
70+
.replace(/[*_`]/g, '') // bold/italic/mono markers
71+
.replace(/\s+/g, ' ')
72+
.trim()
73+
}
74+
75+
/** Cap at a word boundary, appending an ellipsis when truncated. */
76+
function capLength(s, max) {
77+
if (s.length <= max) return s
78+
const cut = s.slice(0, max)
79+
const lastSpace = cut.lastIndexOf(' ')
80+
return `${cut.slice(0, lastSpace > 40 ? lastSpace : max).trim()}…`
81+
}
82+
83+
/** Build the DefinedTermSet object from anchors.json. */
84+
function buildDefinedTermSet() {
85+
const anchors = JSON.parse(fs.readFileSync(ANCHORS_JSON, 'utf-8'))
86+
const list = Array.isArray(anchors) ? anchors : anchors.anchors || []
87+
88+
const terms = list
89+
.filter((a) => a && a.id && a.title)
90+
.map((a) => {
91+
const url = `${BASE}/anchor/${a.id}`
92+
const term = {
93+
'@type': 'DefinedTerm',
94+
'@id': url,
95+
name: a.title,
96+
termCode: a.id,
97+
url,
98+
inDefinedTermSet: SET_ID,
99+
}
100+
const description = a.filePath ? extractDescription(a.filePath) : null
101+
if (description) term.description = description
102+
return term
103+
})
104+
105+
return {
106+
'@context': 'https://schema.org',
107+
'@type': 'DefinedTermSet',
108+
'@id': SET_ID,
109+
name: 'Semantic Anchors',
110+
url: `${BASE}/`,
111+
description:
112+
'A curated catalog of semantic anchors — well-defined terms, methodologies, and frameworks used as shared vocabulary when communicating with Large Language Models.',
113+
hasDefinedTerm: terms,
114+
}
115+
}
116+
117+
/**
118+
* Serialize as a <script> tag. `<` is escaped to < so a stray "</script>"
119+
* inside any description can never break out of the element (standard JSON-LD
120+
* hardening).
121+
*/
122+
function buildScriptTag() {
123+
const json = JSON.stringify(buildDefinedTermSet(), null, 2).replace(/</g, '\\u003c')
124+
return `<script type="application/ld+json">\n${json}\n</script>`
125+
}
126+
127+
/** Insert the script tag before </head>, unless the set is already present. */
128+
function injectInto(file, scriptTag) {
129+
if (!fs.existsSync(file)) return false
130+
let html = fs.readFileSync(file, 'utf-8')
131+
if (html.includes(SET_ID)) return false // idempotent
132+
if (!html.includes('</head>')) return false
133+
html = html.replace('</head>', ` ${scriptTag}\n </head>`)
134+
fs.writeFileSync(file, html, 'utf-8')
135+
return true
136+
}
137+
138+
function main() {
139+
const set = buildDefinedTermSet()
140+
const scriptTag = buildScriptTag()
141+
142+
const anyDist = TARGETS.some((f) => fs.existsSync(f))
143+
if (!anyDist) {
144+
// No build present — print for inspection so the output can be validated
145+
// without a full vite build.
146+
process.stdout.write(JSON.stringify(set, null, 2) + '\n')
147+
console.warn(
148+
`\n(no dist build found — printed ${set.hasDefinedTerm.length} DefinedTerms to stdout; run after 'vite build' to inject)`
149+
)
150+
return
151+
}
152+
153+
let injected = 0
154+
for (const file of TARGETS) {
155+
if (injectInto(file, scriptTag)) {
156+
injected++
157+
console.log(` ✓ injected DefinedTermSet into ${path.relative(ROOT, file)}`)
158+
}
159+
}
160+
const withDesc = set.hasDefinedTerm.filter((t) => t.description).length
161+
console.log(
162+
`\n✓ DefinedTermSet: ${set.hasDefinedTerm.length} terms (${withDesc} with description) injected into ${injected} page(s)`
163+
)
164+
}
165+
166+
if (require.main === module) main()
167+
168+
module.exports = { buildDefinedTermSet, buildScriptTag }

scripts/prerender-routes.js

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,13 @@ const ROUTES = [
8686
description:
8787
'Installable Claude Code Skill that packages the brownfield documentation-recovery workflow. Two-phase Question Tree with [ANSWERED]/[OPEN] leaves, Q-ID traceability. Install on Claude Code, Codex, Cursor, GitHub Copilot, Gemini CLI, and Amazon Kiro.',
8888
},
89+
{
90+
path: '/training-data-vs-practice',
91+
fragment: 'docs/training-data-vs-practice.html',
92+
title: 'An Anchor Delivers Only as Far as the Prior Reaches — Semantic Anchors',
93+
description:
94+
"A semantic anchor's power depends on how densely the concept sits in an LLM's training data. A reproducible clean-room experiment across Claude Haiku 4.5, Sonnet 4.6, Opus 4.8 and Fable 5 on the Cockburn use-cases anchor.",
95+
},
8996
{
9097
path: '/contracts',
9198
fragment: 'docs/contracts.html',

website/index.html

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,7 @@
5353
"url": "https://llm-coding.github.io/Semantic-Anchors/",
5454
"description": "110+ semantic anchors and semantic contracts for precise communication with Large Language Models. Evaluated across 10 models.",
5555
"inLanguage": ["en", "de"],
56-
"publisher": {
57-
"@type": "Organization",
58-
"name": "LLM Coding Community",
59-
"url": "https://github.com/LLM-Coding"
60-
},
56+
"publisher": { "@id": "https://llm-coding.github.io/Semantic-Anchors/#organization" },
6157
"potentialAction": {
6258
"@type": "SearchAction",
6359
"target": "https://llm-coding.github.io/Semantic-Anchors/#/search?q={search_term_string}",
@@ -66,6 +62,26 @@
6662
}
6763
</script>
6864

65+
<!-- Standalone Organization entity (resolvable by @id, not only nested as
66+
publisher) so search engines and AI can identify "Semantic Anchors" as
67+
a distinct entity. See issue #579. -->
68+
<script type="application/ld+json">
69+
{
70+
"@context": "https://schema.org",
71+
"@type": "Organization",
72+
"@id": "https://llm-coding.github.io/Semantic-Anchors/#organization",
73+
"name": "Semantic Anchors",
74+
"alternateName": "LLM Coding Community",
75+
"url": "https://llm-coding.github.io/Semantic-Anchors/",
76+
"logo": "https://llm-coding.github.io/Semantic-Anchors/logo.png",
77+
"description": "A curated catalog of semantic anchors and semantic contracts — shared vocabulary for precise communication with Large Language Models.",
78+
"sameAs": [
79+
"https://github.com/LLM-Coding",
80+
"https://github.com/LLM-Coding/Semantic-Anchors"
81+
]
82+
}
83+
</script>
84+
6985
<!-- Privacy-friendly, cookieless analytics (GoatCounter). No cookies, no
7086
personal data, no IP storage — so no consent banner is required.
7187
count.js is self-hosted (first-party) to avoid a third-party script

website/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"predev": "node ../scripts/sync-anchors.js",
99
"dev": "vite",
1010
"prebuild": "node ../scripts/sync-anchors.js && node ../scripts/render-docs.js && node ../scripts/render-contracts.js",
11-
"build": "vite build && node ../scripts/prerender-routes.js",
11+
"build": "vite build && node ../scripts/prerender-routes.js && node ../scripts/generate-jsonld.js",
1212
"preview": "vite preview",
1313
"test": "vitest run",
1414
"test:watch": "vitest",

0 commit comments

Comments
 (0)