Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/changelog.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@

A chronological record of all semantic anchors added to the catalog. Community contributors are credited with thanks.

== 2026-06-10

*Discoverability (SEO / AI):*

* *Structured data* — added a standalone `Organization` entity and a `DefinedTermSet` with a `DefinedTerm` for every anchor (name, canonical URL, and a definition where available), generated at build time from `anchors.json`. Lets search engines and retrieval-grounded AI resolve "Semantic Anchors" as a distinct entity and each anchor as a defined term (#579).
* *Fixed:* the _An Anchor Delivers Only as Far as the Prior Reaches_ article was not pre-rendered and was therefore invisible to search engines and LLM crawlers. It is now pre-rendered like every other doc page.

== 2026-06-09

*New contracts:*
Expand Down
168 changes: 168 additions & 0 deletions scripts/generate-jsonld.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
#!/usr/bin/env node
/**
* Generate schema.org DefinedTermSet / DefinedTerm JSON-LD from anchors.json
* and inject it into the pre-rendered catalog pages.
*
* Why: the catalog is 160+ well-defined terms, but only their prose is
* crawlable (via /all-anchors). Search engines and retrieval-grounded AI need
* a machine-readable entity graph to resolve "Semantic Anchors" as a distinct
* DefinedTermSet and each anchor as a DefinedTerm with a canonical URL. This is
* the canonical schema.org type for a glossary/controlled vocabulary and is the
* structured-data half of issue #579 (the human-readable definitions already
* ship in crawlable HTML).
*
* Per-term `description` is extracted from the first "Core Concepts" definition
* in each anchor's .adoc when it is cleanly available, and omitted otherwise —
* crisp 40-60 word answer blocks are issue #580's job, and will later supersede
* these as the DefinedTerm descriptions.
*
* Runs AFTER prerender-routes.js so it only touches the home page and
* /all-anchors (the canonical locations for the set), not every route shell.
* When the dist build is absent it prints the JSON-LD to stdout for inspection.
*
* Usage: node scripts/generate-jsonld.js
*/

const fs = require('fs')
const path = require('path')

const ROOT = path.join(__dirname, '..')
const ANCHORS_JSON = path.join(ROOT, 'website/public/data/anchors.json')
const DIST = path.join(ROOT, 'website/dist')
const BASE = 'https://llm-coding.github.io/Semantic-Anchors'
const SET_ID = `${BASE}/#catalog`

// Pages that should carry the DefinedTermSet: the catalog root and the full
// reference. Both represent the whole set; other routes do not.
const TARGETS = [path.join(DIST, 'index.html'), path.join(DIST, 'all-anchors', 'index.html')]

/**
* Pull a short definition for an anchor from the first "Core Concepts" entry in
* its .adoc. Returns a cleaned, length-capped string, or null when nothing
* usable is found (safe to omit — DefinedTerm.description is optional).
*/
function extractDescription(filePath) {
const abs = path.join(ROOT, filePath)
if (!fs.existsSync(abs)) return null
const lines = fs.readFileSync(abs, 'utf-8').split('\n')

const ccIndex = lines.findIndex((l) => /Core Concepts/i.test(l))
if (ccIndex === -1) return null

// First definition-list description after the Core Concepts heading:
// Term:: definition text
for (let i = ccIndex + 1; i < lines.length && i < ccIndex + 12; i++) {
const m = lines[i].match(/^.+?::\s+(.+)$/)
if (m) {
const cleaned = cleanAdoc(m[1])
return cleaned.length >= 20 ? capLength(cleaned, 220) : null
}
}
return null
}

/** Strip the AsciiDoc inline markup that would be noise in a description. */
function cleanAdoc(s) {
return s
.replace(/link:[^[]*\[([^\]]*)\]/g, '$1') // link:url[text] -> text
.replace(/<<[^,>]+,\s*([^>]+)>>/g, '$1') // <<id,text>> -> text
.replace(/<<([^>]+)>>/g, '$1') // <<id>> -> id
.replace(/[*_`]/g, '') // bold/italic/mono markers
.replace(/\s+/g, ' ')
.trim()
}

/** Cap at a word boundary, appending an ellipsis when truncated. */
function capLength(s, max) {
if (s.length <= max) return s
const cut = s.slice(0, max)
const lastSpace = cut.lastIndexOf(' ')
return `${cut.slice(0, lastSpace > 40 ? lastSpace : max).trim()}…`
}

/** Build the DefinedTermSet object from anchors.json. */
function buildDefinedTermSet() {
const anchors = JSON.parse(fs.readFileSync(ANCHORS_JSON, 'utf-8'))
const list = Array.isArray(anchors) ? anchors : anchors.anchors || []

const terms = list
.filter((a) => a && a.id && a.title)
.map((a) => {
const url = `${BASE}/anchor/${a.id}`
const term = {
'@type': 'DefinedTerm',
'@id': url,
name: a.title,
termCode: a.id,
url,
inDefinedTermSet: SET_ID,
}
const description = a.filePath ? extractDescription(a.filePath) : null
if (description) term.description = description
return term
})

return {
'@context': 'https://schema.org',
'@type': 'DefinedTermSet',
'@id': SET_ID,
name: 'Semantic Anchors',
url: `${BASE}/`,
description:
'A curated catalog of semantic anchors — well-defined terms, methodologies, and frameworks used as shared vocabulary when communicating with Large Language Models.',
hasDefinedTerm: terms,
}
}

/**
* Serialize as a <script> tag. `<` is escaped to < so a stray "</script>"
* inside any description can never break out of the element (standard JSON-LD
* hardening).
*/
function buildScriptTag() {
const json = JSON.stringify(buildDefinedTermSet(), null, 2).replace(/</g, '\\u003c')
return `<script type="application/ld+json">\n${json}\n</script>`
}

/** Insert the script tag before </head>, unless the set is already present. */
function injectInto(file, scriptTag) {
if (!fs.existsSync(file)) return false
let html = fs.readFileSync(file, 'utf-8')
if (html.includes(SET_ID)) return false // idempotent
if (!html.includes('</head>')) return false
html = html.replace('</head>', ` ${scriptTag}\n </head>`)
fs.writeFileSync(file, html, 'utf-8')

Check failure

Code scanning / CodeQL

Potential file system race condition High

The file may have changed since it
was checked
.
return true
}

function main() {
const set = buildDefinedTermSet()
const scriptTag = buildScriptTag()

const anyDist = TARGETS.some((f) => fs.existsSync(f))
if (!anyDist) {
// No build present — print for inspection so the output can be validated
// without a full vite build.
process.stdout.write(JSON.stringify(set, null, 2) + '\n')
console.warn(
`\n(no dist build found — printed ${set.hasDefinedTerm.length} DefinedTerms to stdout; run after 'vite build' to inject)`
)
return
}

let injected = 0
for (const file of TARGETS) {
if (injectInto(file, scriptTag)) {
injected++
console.log(` ✓ injected DefinedTermSet into ${path.relative(ROOT, file)}`)
}
}
const withDesc = set.hasDefinedTerm.filter((t) => t.description).length
console.log(
`\n✓ DefinedTermSet: ${set.hasDefinedTerm.length} terms (${withDesc} with description) injected into ${injected} page(s)`
)
}

if (require.main === module) main()

module.exports = { buildDefinedTermSet, buildScriptTag }
7 changes: 7 additions & 0 deletions scripts/prerender-routes.js
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,13 @@ const ROUTES = [
description:
'Installable Claude Code Skill that packages the brownfield documentation-recovery workflow. Two-phase Question Tree with [ANSWERED]/[OPEN] leaves, Q-ID traceability. Install on Claude Code, Codex, Cursor, GitHub Copilot, Gemini CLI, and Amazon Kiro.',
},
{
path: '/training-data-vs-practice',
fragment: 'docs/training-data-vs-practice.html',
title: 'An Anchor Delivers Only as Far as the Prior Reaches — Semantic Anchors',
description:
"A semantic anchor's power depends on how densely the concept sits in an LLM's training data. A reproducible clean-room experiment across Claude Haiku 4.5, Sonnet 4.6, Opus 4.8 and Fable 5 on the Cockburn use-cases anchor.",
},
{
path: '/contracts',
fragment: 'docs/contracts.html',
Expand Down
26 changes: 21 additions & 5 deletions website/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,7 @@
"url": "https://llm-coding.github.io/Semantic-Anchors/",
"description": "110+ semantic anchors and semantic contracts for precise communication with Large Language Models. Evaluated across 10 models.",
"inLanguage": ["en", "de"],
"publisher": {
"@type": "Organization",
"name": "LLM Coding Community",
"url": "https://github.com/LLM-Coding"
},
"publisher": { "@id": "https://llm-coding.github.io/Semantic-Anchors/#organization" },
"potentialAction": {
"@type": "SearchAction",
"target": "https://llm-coding.github.io/Semantic-Anchors/#/search?q={search_term_string}",
Expand All @@ -66,6 +62,26 @@
}
</script>

<!-- Standalone Organization entity (resolvable by @id, not only nested as
publisher) so search engines and AI can identify "Semantic Anchors" as
a distinct entity. See issue #579. -->
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@type": "Organization",
"@id": "https://llm-coding.github.io/Semantic-Anchors/#organization",
"name": "Semantic Anchors",
"alternateName": "LLM Coding Community",
"url": "https://llm-coding.github.io/Semantic-Anchors/",
"logo": "https://llm-coding.github.io/Semantic-Anchors/logo.png",
"description": "A curated catalog of semantic anchors and semantic contracts — shared vocabulary for precise communication with Large Language Models.",
"sameAs": [
"https://github.com/LLM-Coding",
"https://github.com/LLM-Coding/Semantic-Anchors"
]
}
</script>

<!-- Privacy-friendly, cookieless analytics (GoatCounter). No cookies, no
personal data, no IP storage — so no consent banner is required.
count.js is self-hosted (first-party) to avoid a third-party script
Expand Down
2 changes: 1 addition & 1 deletion website/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"predev": "node ../scripts/sync-anchors.js",
"dev": "vite",
"prebuild": "node ../scripts/sync-anchors.js && node ../scripts/render-docs.js && node ../scripts/render-contracts.js",
"build": "vite build && node ../scripts/prerender-routes.js",
"build": "vite build && node ../scripts/prerender-routes.js && node ../scripts/generate-jsonld.js",
"preview": "vite preview",
"test": "vitest run",
"test:watch": "vitest",
Expand Down
Loading