Skip to content

Commit 39ce491

Browse files
marcoscaceresCopilotsidvishnoi
authored
feat(xref): add headings lookup API for cross-spec section links (#469)
* feat(xref): add headings lookup API for cross-spec section links Extends the xref infrastructure to read and serve section heading data from WebRef's ed/headings/ directory. This enables ReSpec's [[[SPEC#id]]] syntax to display actual heading text instead of just the spec title. New endpoint: POST /xref/headings Request: { queries: [{ spec: 'fetch', id: 'cookie-header' }] } Response: { result: [{ spec, id, title, number, href, level, specTitle }] } Changes: - scraper.ts: reads ed/headings/*.json during update, writes headings.json - store.ts: loads headings data, adds getHeading(spec, id) lookup method - headings.post.ts: new route handler - index.ts: registers the /xref/headings POST endpoint - update.ts: also triggers on ed/headings/ changes in webref webhook * fix: address Copilot review feedback on headings API - store.ts: index headings by id for O(1) lookup instead of linear scan; precompute specTitleByShortname map; split readJson into required/optional variants so missing xref.json still throws - scraper.ts: fix doc comment on readAllHeadings - headings.post.ts: validate queries is an array before mapping * refactor(xref): address reviewer feedback on headings API - Move `readdir` to top-level fs/promises import in scraper.ts - Add generic type `readJSON<T>` with HeadingsJSON interface to reduce `any` - Use typed readJSON<T> calls in getAllData and readAllHeadings - Change specTitleByShortname to Map<string, string> in store.ts - Add per-item validation (spec/id are strings) in headings.post.ts Agent-Logs-Url: https://github.com/speced/respec-web-services/sessions/e14c5758-5d6f-4324-bb98-77839d96f660 * Apply suggestion from @sidvishnoi Co-authored-by: Sid Vishnoi <8426945+sidvishnoi@users.noreply.github.com> * Update routes/xref/index.ts Co-authored-by: Sid Vishnoi <8426945+sidvishnoi@users.noreply.github.com> * fix(xref/headings): fix specTitleMap, add query limit, harden error handling - Fix buildSpecTitleMap to iterate nested specmap structure correctly (was iterating top-level {current, snapshot} objects instead of entries) - Add 1000-query limit and empty-string validation to headings endpoint - Only catch ENOENT in readJsonOptional (was swallowing all errors) - Remove dead spec?.shortname branch from scraper (webref doesn't include it) - Update JSDoc to reflect actual route path (/xref/search/headings) * refactor(xref/headings): pre-index headings by ID at scrape time Per sidvishnoi's review: index headings by ID during scraping rather than at store load time. Removes the redundant indexHeadings() runtime function. * fix(xref/headings): trim inputs, add version/series fallback in getHeading Per Copilot review: - Trim spec and id before lookup (validation checked trim but passed raw) - Add version-stripped and series-shortname fallback in getHeading so both "cssom-view-1" and "cssom-view" resolve headings correctly - Also resolves specTitle via stripped form as fallback * fix: correct specmap type to match nested JSON structure The specmap type previously described a flat map, but the actual data is nested with current/snapshot groups. This caused TS2352 errors after rebasing onto main's stricter type checking. * fix: address Sid's review comments on PR #469 - Extract heading resolution fallback into resolveHeadings() method - Remove unnecessary generator (specmapEntries) — iterate directly - Use separate res.status()/res.json() calls instead of chaining --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: Sid Vishnoi <8426945+sidvishnoi@users.noreply.github.com>
1 parent 21ee6e1 commit 39ce491

5 files changed

Lines changed: 229 additions & 17 deletions

File tree

routes/xref/headings.post.ts

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import { Request, Response } from "express";
2+
import { store } from "./lib/store-init.js";
3+
4+
interface HeadingsQuery {
5+
spec: string;
6+
id: string;
7+
}
8+
9+
interface RequestBody {
10+
queries: HeadingsQuery[];
11+
}
12+
13+
type IRequest = Request<never, any, RequestBody>;
14+
15+
/**
16+
* POST /xref/search/headings
17+
*
18+
* Looks up section headings by spec shortname and fragment id.
19+
* Used by ReSpec's [[[SPEC#id]]] syntax to get heading text for
20+
* cross-spec section links.
21+
*
22+
* Request body: { queries: [{ spec: "fetch", id: "cookie-header" }] }
23+
* Response: { result: [{ spec: "fetch", id: "cookie-header", ... }] }
24+
*/
25+
export default function route(req: IRequest, res: Response) {
26+
const { queries } = req.body;
27+
if (!Array.isArray(queries)) {
28+
res.status(400).json({ error: "queries must be an array" });
29+
return;
30+
}
31+
if (queries.length > 1000) {
32+
res.status(400).json({ error: "too many queries (max 1000)" });
33+
return;
34+
}
35+
for (const item of queries) {
36+
if (typeof item?.spec !== "string" || typeof item?.id !== "string") {
37+
res.status(400);
38+
res.json({ error: "each query must have string fields: spec, id" });
39+
return;
40+
}
41+
if (!item.spec.trim() || !item.id.trim()) {
42+
res.status(400);
43+
res.json({ error: "spec and id must be non-empty strings" });
44+
return;
45+
}
46+
}
47+
const result = queries.map(({ spec, id }) => {
48+
const heading = store.getHeading(spec.trim(), id.trim());
49+
if (!heading) {
50+
return { spec, id, error: "not found" };
51+
}
52+
return {
53+
spec,
54+
id,
55+
title: heading.title,
56+
number: heading.number || null,
57+
href: heading.href,
58+
level: heading.level,
59+
specTitle: heading.specTitle,
60+
};
61+
});
62+
res.json({ result });
63+
}

routes/xref/index.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import { env, ms } from "../../utils/misc.js";
1010
import { store } from "./lib/store-init.js";
1111
import searchRouteGet from "./search.get.js";
1212
import searchRoutePost from "./search.post.js";
13+
import headingsRoutePost from "./headings.post.js";
1314
import metaRoute from "./meta.js";
1415
import updateRoute from "./update.js";
1516
import { search, Options, Query } from "./lib/search.js";
@@ -26,6 +27,9 @@ xref
2627
.get("/search", cors(), searchRouteGet)
2728
.post("/search", express.json({ limit: "2mb" }), cors(), searchRoutePost);
2829
xref.get("/meta{/:field}", cors(), metaRoute);
30+
xref
31+
.options("/search/headings", cors({ methods: ["POST"], maxAge: ms("1day") }))
32+
.post("/search/headings", express.json({ limit: "1mb" }), cors(), headingsRoutePost);
2933
xref.post("/update", authGithubWebhook(env("W3C_WEBREF_SECRET")), updateRoute);
3034
xref.use("/data", express.static(path.join(DATA_DIR, "xref")));
3135

routes/xref/lib/scraper.ts

Lines changed: 73 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@
55

66
import path from "path";
77
import { existsSync } from "fs";
8-
import { mkdir, readFile, writeFile } from "fs/promises";
8+
import { mkdir, readFile, readdir, writeFile } from "fs/promises";
99

1010
import { Definition as InputDfn, DfnsJSON, SpecsJSON } from "webref";
1111

1212
import { SUPPORTED_TYPES, CSS_TYPES_INPUT } from "./constants.js";
1313
import { uniq } from "./utils.js";
14-
import { Store } from "./store.js";
14+
import { Store, SpecMapGroup } from "./store.js";
1515
import { env } from "../../../utils/misc.js";
1616
import sh from "../../../utils/sh.js";
1717

@@ -25,6 +25,7 @@ const OUT_DIR_BASE = path.join(DATA_DIR, "xref");
2525
const OUTFILE_BY_TERM = path.resolve(OUT_DIR_BASE, "./xref.json");
2626
const OUTFILE_BY_SPEC = path.resolve(OUT_DIR_BASE, "./specs.json");
2727
const OUTFILE_SPECMAP = path.resolve(OUT_DIR_BASE, "./specmap.json");
28+
const OUTFILE_HEADINGS = path.resolve(OUT_DIR_BASE, "./headings.json");
2829

2930
type Status = "current" | "snapshot";
3031
const dirToStatus = [
@@ -41,6 +42,22 @@ interface DataBySpec {
4142
[shortname: string]: Omit<ParsedDataEntry, "shortname" | "isExported">[];
4243
}
4344

45+
export interface HeadingEntry {
46+
id: string;
47+
href: string;
48+
title: string;
49+
number?: string;
50+
level: number;
51+
}
52+
53+
export interface HeadingsBySpec {
54+
[shortname: string]: { [id: string]: HeadingEntry };
55+
}
56+
57+
interface HeadingsJSON {
58+
headings?: HeadingEntry[];
59+
}
60+
4461
const defaultOptions = { forceUpdate: false };
4562
type Options = typeof defaultOptions;
4663

@@ -55,8 +72,8 @@ export default async function main(options: Partial<Options> = {}) {
5572
const dataByTerm: DataByTerm = Object.create(null);
5673
const dataBySpec: DataBySpec = Object.create(null);
5774
const specificationsMap = {
58-
current: {} as Store["specmap"],
59-
snapshot: {} as Store["specmap"],
75+
current: {} as SpecMapGroup,
76+
snapshot: {} as SpecMapGroup,
6077
};
6178

6279
for (const [dir, status] of dirToStatus) {
@@ -83,12 +100,18 @@ export default async function main(options: Partial<Options> = {}) {
83100
dataByTerm[term] = uniq(dataByTerm[term]);
84101
}
85102

103+
// Read headings data from webref (ed/ only, same as xref default)
104+
const headingsBySpec = await readAllHeadings(
105+
path.join(INPUT_DIR_BASE, "ed", "headings"),
106+
);
107+
86108
console.log("Writing processed data files...");
87109
await mkdir(OUT_DIR_BASE, { recursive: true });
88110
await Promise.all([
89111
writeFile(OUTFILE_BY_TERM, JSON.stringify(dataByTerm, null, 2)),
90112
writeFile(OUTFILE_BY_SPEC, JSON.stringify(dataBySpec, null, 2)),
91113
writeFile(OUTFILE_SPECMAP, JSON.stringify(specificationsMap, null, 2)),
114+
writeFile(OUTFILE_HEADINGS, JSON.stringify(headingsBySpec, null, 2)),
92115
]);
93116
return true;
94117
}
@@ -193,15 +216,15 @@ function normalizeTerm(term: string, type: string) {
193216
async function getAllData(baseDir: string) {
194217
const SPECS_JSON = path.resolve(baseDir, "./index.json");
195218
console.log(`Getting data from ${SPECS_JSON}`);
196-
const urlFileContent = await readJSON(SPECS_JSON);
219+
const urlFileContent = await readJSON<{ results: SpecsJSON[] }>(SPECS_JSON);
197220
const data: SpecsJSON[] = urlFileContent.results;
198221

199-
const specMap: Store["specmap"] = Object.create(null);
222+
const specMap: SpecMapGroup = Object.create(null);
200223
const dfnSources: DfnsJSON[] = [];
201224

202225
for (const entry of data) {
203226
if (entry.dfns) {
204-
const dfnsData = await readJSON(path.join(baseDir, entry.dfns));
227+
const dfnsData = await readJSON<{ dfns: InputDfn[] }>(path.join(baseDir, entry.dfns));
205228
const dfns: InputDfn[] = dfnsData.dfns;
206229
dfnSources.push({
207230
series: entry.series.shortname,
@@ -221,7 +244,48 @@ async function getAllData(baseDir: string) {
221244
return { specMap, dfnSources };
222245
}
223246

224-
async function readJSON(filePath: string) {
247+
async function readJSON<T = unknown>(filePath: string): Promise<T> {
225248
const text = await readFile(filePath, "utf-8");
226-
return JSON.parse(text);
249+
return JSON.parse(text) as T;
250+
}
251+
252+
/**
253+
* Read all headings data from webref's ed/headings/ directory.
254+
* Returns { shortname: { id: HeadingEntry } } — pre-indexed for O(1) lookup.
255+
*/
256+
async function readAllHeadings(
257+
headingsDir: string,
258+
): Promise<HeadingsBySpec> {
259+
const result: HeadingsBySpec = Object.create(null);
260+
if (!existsSync(headingsDir)) {
261+
console.warn(`Headings directory not found: ${headingsDir}`);
262+
return result;
263+
}
264+
265+
const files = await readdir(headingsDir);
266+
const jsonFiles = files.filter(f => f.endsWith(".json"));
267+
268+
console.log(`Processing ${jsonFiles.length} heading files...`);
269+
for (const file of jsonFiles) {
270+
try {
271+
const data = await readJSON<HeadingsJSON>(path.join(headingsDir, file));
272+
// Shortname derived from filename (webref doesn't include it in the JSON)
273+
const shortname = file.replace(/\.json$/, "").toLowerCase();
274+
const byId: { [id: string]: HeadingEntry } = Object.create(null);
275+
for (const h of data.headings || []) {
276+
byId[h.id] = {
277+
id: h.id,
278+
href: h.href,
279+
title: h.title,
280+
number: h.number,
281+
level: h.level,
282+
};
283+
}
284+
result[shortname] = byId;
285+
} catch (error) {
286+
console.error(`Error reading headings from ${file}:`, error);
287+
}
288+
}
289+
290+
return result;
227291
}

routes/xref/lib/store.ts

Lines changed: 86 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,25 @@ import { readFileSync } from "fs";
33

44
import { env } from "../../../utils/misc.js";
55
import { DataEntry } from "./search.js";
6+
import { HeadingEntry, HeadingsBySpec } from "./scraper.js";
7+
8+
export type SpecMapGroup = {
9+
[specid: string]: {
10+
url: string;
11+
shortname: string;
12+
title: string;
13+
};
14+
};
615

716
export class Store {
817
version = -1;
918
bySpec: { [shortname: string]: DataEntry[] } = {};
1019
byTerm: { [term: string]: DataEntry[] } = {};
11-
specmap: {
12-
[specid: string]: {
13-
url: string;
14-
shortname: string;
15-
title: string;
16-
};
17-
} = {};
20+
specmap: { [group: string]: SpecMapGroup } = {};
21+
/** Headings pre-indexed by spec shortname, then by fragment id. */
22+
headings: HeadingsBySpec = {};
23+
/** Reverse lookup: shortname → spec title. */
24+
private specTitleByShortname: Map<string, string> = new Map();
1825

1926
constructor() {
2027
this.fill();
@@ -25,13 +32,85 @@ export class Store {
2532
this.byTerm = readJson("xref.json");
2633
this.bySpec = readJson("specs.json");
2734
this.specmap = readJson("specmap.json");
35+
this.headings = readJsonOptional("headings.json");
36+
this.specTitleByShortname = buildSpecTitleMap(this.specmap);
2837
this.version = Date.now();
2938
}
39+
40+
/** Look up a heading by spec shortname and fragment id. */
41+
getHeading(
42+
spec: string,
43+
id: string,
44+
): (HeadingEntry & { specTitle: string }) | null {
45+
const normalizedSpec = spec.toLowerCase();
46+
const specHeadings = this.resolveHeadings(normalizedSpec);
47+
if (!specHeadings) return null;
48+
49+
const heading = specHeadings[id];
50+
if (!heading) return null;
51+
52+
return {
53+
...heading,
54+
specTitle: this.specTitleByShortname.get(normalizedSpec)
55+
|| this.specTitleByShortname.get(normalizedSpec.replace(/-\d+$/, ""))
56+
|| spec,
57+
};
58+
}
59+
60+
private resolveHeadings(spec: string): Record<string, HeadingEntry> | null {
61+
const direct = this.headings[spec];
62+
if (direct) return direct;
63+
64+
// Try stripping version suffix (e.g., cssom-view-1 → cssom-view)
65+
const stripped = spec.replace(/-\d+$/, "");
66+
if (stripped !== spec) {
67+
const unversioned = this.headings[stripped];
68+
if (unversioned) return unversioned;
69+
}
70+
71+
// Try resolving series shortname to versioned (or vice versa) via specmap
72+
for (const group of Object.values(this.specmap)) {
73+
for (const [specId, entry] of Object.entries(group)) {
74+
if (entry.shortname === spec || entry.shortname === stripped) {
75+
const resolved = this.headings[specId];
76+
if (resolved) return resolved;
77+
}
78+
}
79+
}
80+
81+
return null;
82+
}
3083
}
3184

85+
/** Read a required JSON data file. Throws if missing. */
3286
function readJson(filename: string) {
3387
const DATA_DIR = env("DATA_DIR");
3488
const dataFile = path.resolve(DATA_DIR, `./xref/${filename}`);
3589
const text = readFileSync(dataFile, "utf8");
3690
return JSON.parse(text);
3791
}
92+
93+
/** Read an optional JSON data file. Returns {} if missing. */
94+
function readJsonOptional(filename: string) {
95+
try {
96+
return readJson(filename);
97+
} catch (err: any) {
98+
if (err?.code === "ENOENT") {
99+
console.warn(`Optional data file not found: ${filename}`);
100+
return {};
101+
}
102+
throw err;
103+
}
104+
}
105+
106+
/** Build a shortname → title map from the specmap for O(1) title lookup. */
107+
function buildSpecTitleMap(specmap: Store["specmap"]): Map<string, string> {
108+
const result = new Map<string, string>();
109+
// specmap is { current: { [specid]: entry }, snapshot: { [specid]: entry } }
110+
for (const group of Object.values(specmap)) {
111+
for (const entry of Object.values(group)) {
112+
result.set(entry.shortname, entry.title);
113+
}
114+
}
115+
return result;
116+
}

routes/xref/update.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,5 +58,7 @@ function hasRelevantUpdate(commits: Commit[]) {
5858
const changedFiles = commits
5959
.map(commit => [commit.added, commit.removed, commit.modified])
6060
.flat(2);
61-
return changedFiles.some(file => file?.startsWith("ed/dfns/"));
61+
return changedFiles.some(
62+
file => file?.startsWith("ed/dfns/") || file?.startsWith("ed/headings/"),
63+
);
6264
}

0 commit comments

Comments
 (0)