Skip to content

Commit 3b30747

Browse files
committed
2
1 parent 04a8a11 commit 3b30747

1 file changed

Lines changed: 175 additions & 83 deletions

File tree

scripts/docs-governance/lint-links.js

Lines changed: 175 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,57 @@ function isGovernedMarkdownPath(filePath) {
113113
return isMarkdownFile(normalized) && GOVERNED_MARKDOWN_PREFIXES.some((prefix) => normalized.startsWith(prefix));
114114
}
115115

116-
function buildIndexes(rootDir, manifest) {
116+
function progressEnabled() {
117+
return process.env.DOCS_LINT_PROGRESS !== '0';
118+
}
119+
120+
// Progress logger writes to stderr so JSON results on stdout / --output stay clean.
121+
function createProgressLogger(enabled) {
122+
const start = Date.now();
123+
return (stage, info) => {
124+
if (!enabled) return;
125+
const elapsed = ((Date.now() - start) / 1000).toFixed(1);
126+
const details = info && Object.keys(info).length
127+
? ' ' + Object.entries(info).map(([key, value]) => `${key}=${value}`).join(' ')
128+
: '';
129+
process.stderr.write(`[docs-lint-links] +${elapsed}s ${stage}${details}\n`);
130+
};
131+
}
132+
133+
function relativePathCandidates(pathname) {
134+
const candidates = [pathname];
135+
if (!path.extname(pathname)) {
136+
candidates.push(`${pathname}.md`, `${pathname}.mdx`, `${pathname}/index.md`, `${pathname}/index.mdx`);
137+
}
138+
return candidates;
139+
}
140+
141+
// Read every governed Markdown file exactly once. Downstream stages share this
142+
// cache so we never re-read or re-tokenize the same file across sub-lints.
143+
function collectFileCache(rootDir, manifest) {
144+
const cache = [];
145+
for (const entry of manifest.entries) {
146+
const absPath = path.join(rootDir, entry.source_path);
147+
if (!fs.existsSync(absPath)) {
148+
continue;
149+
}
150+
const raw = fs.readFileSync(absPath, 'utf8');
151+
cache.push({ entry, raw, links: extractMarkdownLinks(raw) });
152+
}
153+
return cache;
154+
}
155+
156+
function buildIndexes(rootDir, manifest, fileCache) {
117157
const bySource = new Map();
118158
const byRoute = new Map();
119159
const anchorsBySource = new Map();
160+
const rawBySource = new Map();
161+
162+
if (fileCache) {
163+
for (const item of fileCache) {
164+
rawBySource.set(item.entry.source_path, item.raw);
165+
}
166+
}
120167

121168
for (const entry of manifest.entries) {
122169
bySource.set(entry.source_path, entry);
@@ -125,9 +172,15 @@ function buildIndexes(rootDir, manifest) {
125172
byRoute.set(route.replace(/\/+$/, ''), entry);
126173
}
127174
}
128-
const absPath = path.join(rootDir, entry.source_path);
129-
if (fs.existsSync(absPath)) {
130-
anchorsBySource.set(entry.source_path, extractHeadingAnchors(fs.readFileSync(absPath, 'utf8')));
175+
let raw = rawBySource.get(entry.source_path);
176+
if (raw === undefined) {
177+
const absPath = path.join(rootDir, entry.source_path);
178+
if (fs.existsSync(absPath)) {
179+
raw = fs.readFileSync(absPath, 'utf8');
180+
}
181+
}
182+
if (raw !== undefined) {
183+
anchorsBySource.set(entry.source_path, extractHeadingAnchors(raw));
131184
}
132185
}
133186

@@ -137,11 +190,9 @@ function buildIndexes(rootDir, manifest) {
137190
function candidateFiles(rootDir, sourcePath, pathname) {
138191
const sourceDir = path.dirname(sourcePath);
139192
const normalized = normalizePath(path.normalize(path.join(sourceDir, pathname)));
140-
const candidates = [normalized];
141-
if (!path.extname(normalized)) {
142-
candidates.push(`${normalized}.md`, `${normalized}.mdx`, `${normalized}/index.md`, `${normalized}/index.mdx`);
143-
}
144-
return candidates.filter((candidate) => !candidate.startsWith('../') && fs.existsSync(path.join(rootDir, candidate)));
193+
return relativePathCandidates(normalized).filter(
194+
(candidate) => !candidate.startsWith('../') && fs.existsSync(path.join(rootDir, candidate)),
195+
);
145196
}
146197

147198
function resolveInternalTarget(rootDir, sourcePath, rawTarget, indexes) {
@@ -175,21 +226,12 @@ function resolveInternalTarget(rootDir, sourcePath, rawTarget, indexes) {
175226
return { kind: 'missing-file', pathname, hash };
176227
}
177228

178-
function scanMarkdownFiles(rootDir, manifest) {
179-
return manifest.entries
180-
.filter((entry) => fs.existsSync(path.join(rootDir, entry.source_path)))
181-
.map((entry) => ({
182-
entry,
183-
raw: fs.readFileSync(path.join(rootDir, entry.source_path), 'utf8'),
184-
}));
185-
}
186-
187-
function lintCurrentLinks(rootDir, manifest) {
229+
function lintCurrentLinks(rootDir, manifest, fileCache) {
188230
const findings = [];
189-
const indexes = buildIndexes(rootDir, manifest);
231+
const indexes = buildIndexes(rootDir, manifest, fileCache);
190232

191-
for (const { entry, raw } of scanMarkdownFiles(rootDir, manifest)) {
192-
for (const link of extractMarkdownLinks(raw)) {
233+
for (const { entry, links } of fileCache) {
234+
for (const link of links) {
193235
const resolved = resolveInternalTarget(rootDir, entry.source_path, link.target, indexes);
194236
if (resolved.kind === 'skip') {
195237
continue;
@@ -253,73 +295,87 @@ function recordOldPaths(records) {
253295
.filter((record) => isGovernedMarkdownPath(record.oldPath));
254296
}
255297

256-
function targetMatchesOldPath(sourcePath, target, oldPath) {
257-
if (isExternal(target) || isSkippable(target)) {
258-
return false;
259-
}
260-
const { pathname } = splitTarget(target);
261-
if (!pathname || pathname.startsWith('/')) {
262-
return false;
263-
}
264-
const sourceDir = path.dirname(sourcePath);
265-
const resolved = normalizePath(path.normalize(path.join(sourceDir, pathname)));
266-
const candidates = [resolved];
267-
if (!path.extname(resolved)) {
268-
candidates.push(`${resolved}.md`, `${resolved}.mdx`, `${resolved}/index.md`, `${resolved}/index.mdx`);
298+
// Walk every (file, link) pair once and bucket refs by every old-path candidate
299+
// the link could resolve to. Lookups per deleted/renamed record then collapse
300+
// from O(M) re-scans to O(1).
301+
function buildInboundMarkdownIndex(fileCache) {
302+
const index = new Map();
303+
for (const { entry, links } of fileCache) {
304+
const sourceDir = path.dirname(entry.source_path);
305+
for (const link of links) {
306+
if (isExternal(link.target) || isSkippable(link.target)) {
307+
continue;
308+
}
309+
const { pathname } = splitTarget(link.target);
310+
if (!pathname || pathname.startsWith('/')) {
311+
continue;
312+
}
313+
const resolved = normalizePath(path.normalize(path.join(sourceDir, pathname)));
314+
const ref = { path: entry.source_path, line: link.line, owner: entry.owner };
315+
for (const candidate of relativePathCandidates(resolved)) {
316+
let bucket = index.get(candidate);
317+
if (!bucket) {
318+
bucket = [];
319+
index.set(candidate, bucket);
320+
}
321+
bucket.push(ref);
322+
}
323+
}
269324
}
270-
return candidates.includes(oldPath);
325+
return index;
271326
}
272327

273-
function findInboundMarkdownReferences(rootDir, manifest, oldPath) {
274-
const refs = [];
275-
for (const { entry, raw } of scanMarkdownFiles(rootDir, manifest)) {
276-
for (const link of extractMarkdownLinks(raw)) {
277-
if (targetMatchesOldPath(entry.source_path, link.target, oldPath)) {
278-
refs.push({ path: entry.source_path, line: link.line, owner: entry.owner });
328+
function buildInboundSidebarIndex(rootDir, manifest) {
329+
const index = new Map();
330+
const sidebars = new Set(manifest.entries.map((entry) => entry.sidebar_source).filter(Boolean));
331+
for (const sidebarSource of sidebars) {
332+
const loaded = loadSidebarRefs(rootDir, sidebarSource);
333+
if (loaded.missing) {
334+
continue;
335+
}
336+
for (const docId of loaded.refs) {
337+
let bucket = index.get(docId);
338+
if (!bucket) {
339+
bucket = [];
340+
index.set(docId, bucket);
279341
}
342+
bucket.push({ path: sidebarSource, line: 1 });
280343
}
281344
}
282-
return refs;
345+
return index;
283346
}
284347

285-
function findInboundSidebarReferences(rootDir, manifest, oldPath) {
286-
const oldDocId = stripMarkdownExtension(oldPath)
348+
function oldPathToDocId(oldPath) {
349+
return stripMarkdownExtension(oldPath)
287350
.replace(/^docs\//, '')
288351
.replace(/^versioned_docs\/version-[^/]+\//, '')
289352
.replace(/^i18n\/zh-CN\/docusaurus-plugin-content-docs\/(?:current|version-[^/]+)\//, '')
290353
.replace(/^community\//, 'community:')
291354
.replace(/^i18n\/zh-CN\/docusaurus-plugin-content-docs-community\/current\//, 'community:');
292-
const sidebars = new Set(manifest.entries.map((entry) => entry.sidebar_source).filter(Boolean));
293-
const refs = [];
294-
for (const sidebarSource of sidebars) {
295-
const loaded = loadSidebarRefs(rootDir, sidebarSource);
296-
if (!loaded.missing && loaded.refs.has(oldDocId)) {
297-
refs.push({ path: sidebarSource, line: 1 });
298-
}
299-
}
300-
return refs;
301355
}
302356

303-
function lintMovedOrDeletedLinks(rootDir, manifest, changedRecords) {
357+
function lintMovedOrDeletedLinks(rootDir, manifest, changedRecords, fileCache) {
358+
const records = recordOldPaths(changedRecords || []);
359+
if (records.length === 0) {
360+
return [];
361+
}
362+
363+
const markdownIndex = buildInboundMarkdownIndex(fileCache);
364+
const sidebarIndex = buildInboundSidebarIndex(rootDir, manifest);
304365
const findings = [];
305-
for (const record of recordOldPaths(changedRecords || [])) {
366+
367+
for (const record of records) {
306368
const rule = record.status === 'R' ? 'link-moved-file-inbound-reference' : 'link-deleted-file-inbound-reference';
307-
const refs = [
308-
...findInboundMarkdownReferences(rootDir, manifest, record.oldPath),
309-
...findInboundSidebarReferences(rootDir, manifest, record.oldPath),
310-
];
311-
for (const ref of refs) {
312-
findings.push(
313-
makeFinding(
314-
'error',
315-
rule,
316-
ref.path,
317-
ref.line,
318-
`Inbound link still points to changed path ${record.oldPath}; review and update target ${record.path || ''}.`.trim(),
319-
ref.owner,
320-
[record.oldPath, record.path].filter(Boolean),
321-
),
322-
);
369+
const markdownRefs = markdownIndex.get(record.oldPath) || [];
370+
const sidebarRefs = sidebarIndex.get(oldPathToDocId(record.oldPath)) || [];
371+
const inboundMessage = `Inbound link still points to changed path ${record.oldPath}; review and update target ${record.path || ''}.`.trim();
372+
const relatedPaths = [record.oldPath, record.path].filter(Boolean);
373+
374+
for (const ref of markdownRefs) {
375+
findings.push(makeFinding('error', rule, ref.path, ref.line, inboundMessage, ref.owner, relatedPaths));
376+
}
377+
for (const ref of sidebarRefs) {
378+
findings.push(makeFinding('error', rule, ref.path, ref.line, inboundMessage, undefined, relatedPaths));
323379
}
324380
findings.push(
325381
makeFinding(
@@ -329,7 +385,7 @@ function lintMovedOrDeletedLinks(rootDir, manifest, changedRecords) {
329385
1,
330386
`Markdown path changed from ${record.oldPath}; review redirects and inbound links before merging.`,
331387
undefined,
332-
[record.oldPath, record.path].filter(Boolean),
388+
relatedPaths,
333389
),
334390
);
335391
}
@@ -395,11 +451,29 @@ function lintLinks(options = {}) {
395451
const manifest = options.manifest || buildManifest({ rootDir });
396452
const changedFiles = options.changedFiles || [];
397453
const changedRecords = options.changedRecords || [];
398-
return [
399-
...lintCurrentLinks(rootDir, manifest),
400-
...lintMovedOrDeletedLinks(rootDir, manifest, changedRecords),
401-
...lintSlugChanges(rootDir, changedFiles),
402-
];
454+
const progress = options.progress || createProgressLogger(progressEnabled());
455+
456+
progress('lintLinks start', {
457+
entries: manifest.entries.length,
458+
changedFiles: changedFiles.length,
459+
changedRecords: changedRecords.length,
460+
});
461+
462+
const fileCache = collectFileCache(rootDir, manifest);
463+
progress('file cache built', { files: fileCache.length });
464+
465+
const currentFindings = lintCurrentLinks(rootDir, manifest, fileCache);
466+
progress('lintCurrentLinks done', { findings: currentFindings.length });
467+
468+
const movedFindings = lintMovedOrDeletedLinks(rootDir, manifest, changedRecords, fileCache);
469+
progress('lintMovedOrDeletedLinks done', { findings: movedFindings.length });
470+
471+
const slugFindings = lintSlugChanges(rootDir, changedFiles);
472+
progress('lintSlugChanges done', { findings: slugFindings.length });
473+
474+
const all = [...currentFindings, ...movedFindings, ...slugFindings];
475+
progress('lintLinks total', { findings: all.length });
476+
return all;
403477
}
404478

405479
function filterLinkFindings(findings, changedFiles) {
@@ -425,22 +499,40 @@ function hasLinkErrors(findings) {
425499
function runCli() {
426500
const args = parseArgs(process.argv.slice(2));
427501
const rootDir = args.root ? path.resolve(args.root) : process.cwd();
502+
const progress = createProgressLogger(progressEnabled());
503+
504+
progress('CLI start', { mode: args.changed ? 'changed' : args.files ? 'files' : 'full' });
428505
const changedFiles = args.changed ? getChangedFiles(rootDir) : args.files ? args.files.split(',') : null;
429506
const changedRecords = args.changed ? getChangedRecords(rootDir) : [];
507+
if (args.changed || args.files) {
508+
progress('changed inputs resolved', {
509+
changedFiles: (changedFiles || []).length,
510+
changedRecords: changedRecords.length,
511+
});
512+
}
513+
430514
const manifest = buildManifest({ rootDir });
431-
const findings = filterLinkFindings(
432-
lintLinks({ rootDir, manifest, changedFiles: changedFiles || [], changedRecords }),
433-
changedFiles,
434-
);
515+
progress('manifest built', { entries: manifest.entries.length });
516+
517+
const rawFindings = lintLinks({ rootDir, manifest, changedFiles: changedFiles || [], changedRecords, progress });
518+
const findings = filterLinkFindings(rawFindings, changedFiles);
519+
if (changedFiles) {
520+
progress('filtered to changed scope', { kept: findings.length, dropped: rawFindings.length - findings.length });
521+
}
522+
435523
const output = JSON.stringify({ schema_version: 1, findings }, null, 2);
436524
if (args.output) {
437525
const outputPath = path.resolve(rootDir, args.output);
438526
ensureDirForFile(outputPath);
439527
fs.writeFileSync(outputPath, `${output}\n`, 'utf8');
528+
progress('output written', { path: args.output, bytes: output.length });
440529
} else {
441530
process.stdout.write(`${output}\n`);
442531
}
443-
if (args['fail-on-errors'] && hasLinkErrors(findings)) {
532+
533+
const hasErrors = hasLinkErrors(findings);
534+
progress('CLI done', { findings: findings.length, errors: hasErrors });
535+
if (args['fail-on-errors'] && hasErrors) {
444536
process.exitCode = 1;
445537
}
446538
}

0 commit comments

Comments
 (0)