@@ -113,10 +113,57 @@ function isGovernedMarkdownPath(filePath) {
113113 return isMarkdownFile ( normalized ) && GOVERNED_MARKDOWN_PREFIXES . some ( ( prefix ) => normalized . startsWith ( prefix ) ) ;
114114}
115115
116- function buildIndexes ( rootDir , manifest ) {
116+ function progressEnabled ( ) {
117+ return process . env . DOCS_LINT_PROGRESS !== '0' ;
118+ }
119+
120+ // Progress logger writes to stderr so JSON results on stdout / --output stay clean.
121+ function createProgressLogger ( enabled ) {
122+ const start = Date . now ( ) ;
123+ return ( stage , info ) => {
124+ if ( ! enabled ) return ;
125+ const elapsed = ( ( Date . now ( ) - start ) / 1000 ) . toFixed ( 1 ) ;
126+ const details = info && Object . keys ( info ) . length
127+ ? ' ' + Object . entries ( info ) . map ( ( [ key , value ] ) => `${ key } =${ value } ` ) . join ( ' ' )
128+ : '' ;
129+ process . stderr . write ( `[docs-lint-links] +${ elapsed } s ${ stage } ${ details } \n` ) ;
130+ } ;
131+ }
132+
133+ function relativePathCandidates ( pathname ) {
134+ const candidates = [ pathname ] ;
135+ if ( ! path . extname ( pathname ) ) {
136+ candidates . push ( `${ pathname } .md` , `${ pathname } .mdx` , `${ pathname } /index.md` , `${ pathname } /index.mdx` ) ;
137+ }
138+ return candidates ;
139+ }
140+
141+ // Read every governed Markdown file exactly once. Downstream stages share this
142+ // cache so we never re-read or re-tokenize the same file across sub-lints.
143+ function collectFileCache ( rootDir , manifest ) {
144+ const cache = [ ] ;
145+ for ( const entry of manifest . entries ) {
146+ const absPath = path . join ( rootDir , entry . source_path ) ;
147+ if ( ! fs . existsSync ( absPath ) ) {
148+ continue ;
149+ }
150+ const raw = fs . readFileSync ( absPath , 'utf8' ) ;
151+ cache . push ( { entry, raw, links : extractMarkdownLinks ( raw ) } ) ;
152+ }
153+ return cache ;
154+ }
155+
156+ function buildIndexes ( rootDir , manifest , fileCache ) {
117157 const bySource = new Map ( ) ;
118158 const byRoute = new Map ( ) ;
119159 const anchorsBySource = new Map ( ) ;
160+ const rawBySource = new Map ( ) ;
161+
162+ if ( fileCache ) {
163+ for ( const item of fileCache ) {
164+ rawBySource . set ( item . entry . source_path , item . raw ) ;
165+ }
166+ }
120167
121168 for ( const entry of manifest . entries ) {
122169 bySource . set ( entry . source_path , entry ) ;
@@ -125,9 +172,15 @@ function buildIndexes(rootDir, manifest) {
125172 byRoute . set ( route . replace ( / \/ + $ / , '' ) , entry ) ;
126173 }
127174 }
128- const absPath = path . join ( rootDir , entry . source_path ) ;
129- if ( fs . existsSync ( absPath ) ) {
130- anchorsBySource . set ( entry . source_path , extractHeadingAnchors ( fs . readFileSync ( absPath , 'utf8' ) ) ) ;
175+ let raw = rawBySource . get ( entry . source_path ) ;
176+ if ( raw === undefined ) {
177+ const absPath = path . join ( rootDir , entry . source_path ) ;
178+ if ( fs . existsSync ( absPath ) ) {
179+ raw = fs . readFileSync ( absPath , 'utf8' ) ;
180+ }
181+ }
182+ if ( raw !== undefined ) {
183+ anchorsBySource . set ( entry . source_path , extractHeadingAnchors ( raw ) ) ;
131184 }
132185 }
133186
@@ -137,11 +190,9 @@ function buildIndexes(rootDir, manifest) {
137190function candidateFiles ( rootDir , sourcePath , pathname ) {
138191 const sourceDir = path . dirname ( sourcePath ) ;
139192 const normalized = normalizePath ( path . normalize ( path . join ( sourceDir , pathname ) ) ) ;
140- const candidates = [ normalized ] ;
141- if ( ! path . extname ( normalized ) ) {
142- candidates . push ( `${ normalized } .md` , `${ normalized } .mdx` , `${ normalized } /index.md` , `${ normalized } /index.mdx` ) ;
143- }
144- return candidates . filter ( ( candidate ) => ! candidate . startsWith ( '../' ) && fs . existsSync ( path . join ( rootDir , candidate ) ) ) ;
193+ return relativePathCandidates ( normalized ) . filter (
194+ ( candidate ) => ! candidate . startsWith ( '../' ) && fs . existsSync ( path . join ( rootDir , candidate ) ) ,
195+ ) ;
145196}
146197
147198function resolveInternalTarget ( rootDir , sourcePath , rawTarget , indexes ) {
@@ -175,21 +226,12 @@ function resolveInternalTarget(rootDir, sourcePath, rawTarget, indexes) {
175226 return { kind : 'missing-file' , pathname, hash } ;
176227}
177228
178- function scanMarkdownFiles ( rootDir , manifest ) {
179- return manifest . entries
180- . filter ( ( entry ) => fs . existsSync ( path . join ( rootDir , entry . source_path ) ) )
181- . map ( ( entry ) => ( {
182- entry,
183- raw : fs . readFileSync ( path . join ( rootDir , entry . source_path ) , 'utf8' ) ,
184- } ) ) ;
185- }
186-
187- function lintCurrentLinks ( rootDir , manifest ) {
229+ function lintCurrentLinks ( rootDir , manifest , fileCache ) {
188230 const findings = [ ] ;
189- const indexes = buildIndexes ( rootDir , manifest ) ;
231+ const indexes = buildIndexes ( rootDir , manifest , fileCache ) ;
190232
191- for ( const { entry, raw } of scanMarkdownFiles ( rootDir , manifest ) ) {
192- for ( const link of extractMarkdownLinks ( raw ) ) {
233+ for ( const { entry, links } of fileCache ) {
234+ for ( const link of links ) {
193235 const resolved = resolveInternalTarget ( rootDir , entry . source_path , link . target , indexes ) ;
194236 if ( resolved . kind === 'skip' ) {
195237 continue ;
@@ -253,73 +295,87 @@ function recordOldPaths(records) {
253295 . filter ( ( record ) => isGovernedMarkdownPath ( record . oldPath ) ) ;
254296}
255297
256- function targetMatchesOldPath ( sourcePath , target , oldPath ) {
257- if ( isExternal ( target ) || isSkippable ( target ) ) {
258- return false ;
259- }
260- const { pathname } = splitTarget ( target ) ;
261- if ( ! pathname || pathname . startsWith ( '/' ) ) {
262- return false ;
263- }
264- const sourceDir = path . dirname ( sourcePath ) ;
265- const resolved = normalizePath ( path . normalize ( path . join ( sourceDir , pathname ) ) ) ;
266- const candidates = [ resolved ] ;
267- if ( ! path . extname ( resolved ) ) {
268- candidates . push ( `${ resolved } .md` , `${ resolved } .mdx` , `${ resolved } /index.md` , `${ resolved } /index.mdx` ) ;
298+ // Walk every (file, link) pair once and bucket refs by every old-path candidate
299+ // the link could resolve to. Lookups per deleted/renamed record then collapse
300+ // from O(M) re-scans to O(1).
301+ function buildInboundMarkdownIndex ( fileCache ) {
302+ const index = new Map ( ) ;
303+ for ( const { entry, links } of fileCache ) {
304+ const sourceDir = path . dirname ( entry . source_path ) ;
305+ for ( const link of links ) {
306+ if ( isExternal ( link . target ) || isSkippable ( link . target ) ) {
307+ continue ;
308+ }
309+ const { pathname } = splitTarget ( link . target ) ;
310+ if ( ! pathname || pathname . startsWith ( '/' ) ) {
311+ continue ;
312+ }
313+ const resolved = normalizePath ( path . normalize ( path . join ( sourceDir , pathname ) ) ) ;
314+ const ref = { path : entry . source_path , line : link . line , owner : entry . owner } ;
315+ for ( const candidate of relativePathCandidates ( resolved ) ) {
316+ let bucket = index . get ( candidate ) ;
317+ if ( ! bucket ) {
318+ bucket = [ ] ;
319+ index . set ( candidate , bucket ) ;
320+ }
321+ bucket . push ( ref ) ;
322+ }
323+ }
269324 }
270- return candidates . includes ( oldPath ) ;
325+ return index ;
271326}
272327
273- function findInboundMarkdownReferences ( rootDir , manifest , oldPath ) {
274- const refs = [ ] ;
275- for ( const { entry, raw } of scanMarkdownFiles ( rootDir , manifest ) ) {
276- for ( const link of extractMarkdownLinks ( raw ) ) {
277- if ( targetMatchesOldPath ( entry . source_path , link . target , oldPath ) ) {
278- refs . push ( { path : entry . source_path , line : link . line , owner : entry . owner } ) ;
328+ function buildInboundSidebarIndex ( rootDir , manifest ) {
329+ const index = new Map ( ) ;
330+ const sidebars = new Set ( manifest . entries . map ( ( entry ) => entry . sidebar_source ) . filter ( Boolean ) ) ;
331+ for ( const sidebarSource of sidebars ) {
332+ const loaded = loadSidebarRefs ( rootDir , sidebarSource ) ;
333+ if ( loaded . missing ) {
334+ continue ;
335+ }
336+ for ( const docId of loaded . refs ) {
337+ let bucket = index . get ( docId ) ;
338+ if ( ! bucket ) {
339+ bucket = [ ] ;
340+ index . set ( docId , bucket ) ;
279341 }
342+ bucket . push ( { path : sidebarSource , line : 1 } ) ;
280343 }
281344 }
282- return refs ;
345+ return index ;
283346}
284347
285- function findInboundSidebarReferences ( rootDir , manifest , oldPath ) {
286- const oldDocId = stripMarkdownExtension ( oldPath )
348+ function oldPathToDocId ( oldPath ) {
349+ return stripMarkdownExtension ( oldPath )
287350 . replace ( / ^ d o c s \/ / , '' )
288351 . replace ( / ^ v e r s i o n e d _ d o c s \/ v e r s i o n - [ ^ / ] + \/ / , '' )
289352 . replace ( / ^ i 1 8 n \/ z h - C N \/ d o c u s a u r u s - p l u g i n - c o n t e n t - d o c s \/ (?: c u r r e n t | v e r s i o n - [ ^ / ] + ) \/ / , '' )
290353 . replace ( / ^ c o m m u n i t y \/ / , 'community:' )
291354 . replace ( / ^ i 1 8 n \/ z h - C N \/ d o c u s a u r u s - p l u g i n - c o n t e n t - d o c s - c o m m u n i t y \/ c u r r e n t \/ / , 'community:' ) ;
292- const sidebars = new Set ( manifest . entries . map ( ( entry ) => entry . sidebar_source ) . filter ( Boolean ) ) ;
293- const refs = [ ] ;
294- for ( const sidebarSource of sidebars ) {
295- const loaded = loadSidebarRefs ( rootDir , sidebarSource ) ;
296- if ( ! loaded . missing && loaded . refs . has ( oldDocId ) ) {
297- refs . push ( { path : sidebarSource , line : 1 } ) ;
298- }
299- }
300- return refs ;
301355}
302356
303- function lintMovedOrDeletedLinks ( rootDir , manifest , changedRecords ) {
357+ function lintMovedOrDeletedLinks ( rootDir , manifest , changedRecords , fileCache ) {
358+ const records = recordOldPaths ( changedRecords || [ ] ) ;
359+ if ( records . length === 0 ) {
360+ return [ ] ;
361+ }
362+
363+ const markdownIndex = buildInboundMarkdownIndex ( fileCache ) ;
364+ const sidebarIndex = buildInboundSidebarIndex ( rootDir , manifest ) ;
304365 const findings = [ ] ;
305- for ( const record of recordOldPaths ( changedRecords || [ ] ) ) {
366+
367+ for ( const record of records ) {
306368 const rule = record . status === 'R' ? 'link-moved-file-inbound-reference' : 'link-deleted-file-inbound-reference' ;
307- const refs = [
308- ...findInboundMarkdownReferences ( rootDir , manifest , record . oldPath ) ,
309- ...findInboundSidebarReferences ( rootDir , manifest , record . oldPath ) ,
310- ] ;
311- for ( const ref of refs ) {
312- findings . push (
313- makeFinding (
314- 'error' ,
315- rule ,
316- ref . path ,
317- ref . line ,
318- `Inbound link still points to changed path ${ record . oldPath } ; review and update target ${ record . path || '' } .` . trim ( ) ,
319- ref . owner ,
320- [ record . oldPath , record . path ] . filter ( Boolean ) ,
321- ) ,
322- ) ;
369+ const markdownRefs = markdownIndex . get ( record . oldPath ) || [ ] ;
370+ const sidebarRefs = sidebarIndex . get ( oldPathToDocId ( record . oldPath ) ) || [ ] ;
371+ const inboundMessage = `Inbound link still points to changed path ${ record . oldPath } ; review and update target ${ record . path || '' } .` . trim ( ) ;
372+ const relatedPaths = [ record . oldPath , record . path ] . filter ( Boolean ) ;
373+
374+ for ( const ref of markdownRefs ) {
375+ findings . push ( makeFinding ( 'error' , rule , ref . path , ref . line , inboundMessage , ref . owner , relatedPaths ) ) ;
376+ }
377+ for ( const ref of sidebarRefs ) {
378+ findings . push ( makeFinding ( 'error' , rule , ref . path , ref . line , inboundMessage , undefined , relatedPaths ) ) ;
323379 }
324380 findings . push (
325381 makeFinding (
@@ -329,7 +385,7 @@ function lintMovedOrDeletedLinks(rootDir, manifest, changedRecords) {
329385 1 ,
330386 `Markdown path changed from ${ record . oldPath } ; review redirects and inbound links before merging.` ,
331387 undefined ,
332- [ record . oldPath , record . path ] . filter ( Boolean ) ,
388+ relatedPaths ,
333389 ) ,
334390 ) ;
335391 }
@@ -395,11 +451,29 @@ function lintLinks(options = {}) {
395451 const manifest = options . manifest || buildManifest ( { rootDir } ) ;
396452 const changedFiles = options . changedFiles || [ ] ;
397453 const changedRecords = options . changedRecords || [ ] ;
398- return [
399- ...lintCurrentLinks ( rootDir , manifest ) ,
400- ...lintMovedOrDeletedLinks ( rootDir , manifest , changedRecords ) ,
401- ...lintSlugChanges ( rootDir , changedFiles ) ,
402- ] ;
454+ const progress = options . progress || createProgressLogger ( progressEnabled ( ) ) ;
455+
456+ progress ( 'lintLinks start' , {
457+ entries : manifest . entries . length ,
458+ changedFiles : changedFiles . length ,
459+ changedRecords : changedRecords . length ,
460+ } ) ;
461+
462+ const fileCache = collectFileCache ( rootDir , manifest ) ;
463+ progress ( 'file cache built' , { files : fileCache . length } ) ;
464+
465+ const currentFindings = lintCurrentLinks ( rootDir , manifest , fileCache ) ;
466+ progress ( 'lintCurrentLinks done' , { findings : currentFindings . length } ) ;
467+
468+ const movedFindings = lintMovedOrDeletedLinks ( rootDir , manifest , changedRecords , fileCache ) ;
469+ progress ( 'lintMovedOrDeletedLinks done' , { findings : movedFindings . length } ) ;
470+
471+ const slugFindings = lintSlugChanges ( rootDir , changedFiles ) ;
472+ progress ( 'lintSlugChanges done' , { findings : slugFindings . length } ) ;
473+
474+ const all = [ ...currentFindings , ...movedFindings , ...slugFindings ] ;
475+ progress ( 'lintLinks total' , { findings : all . length } ) ;
476+ return all ;
403477}
404478
405479function filterLinkFindings ( findings , changedFiles ) {
@@ -425,22 +499,40 @@ function hasLinkErrors(findings) {
425499function runCli ( ) {
426500 const args = parseArgs ( process . argv . slice ( 2 ) ) ;
427501 const rootDir = args . root ? path . resolve ( args . root ) : process . cwd ( ) ;
502+ const progress = createProgressLogger ( progressEnabled ( ) ) ;
503+
504+ progress ( 'CLI start' , { mode : args . changed ? 'changed' : args . files ? 'files' : 'full' } ) ;
428505 const changedFiles = args . changed ? getChangedFiles ( rootDir ) : args . files ? args . files . split ( ',' ) : null ;
429506 const changedRecords = args . changed ? getChangedRecords ( rootDir ) : [ ] ;
507+ if ( args . changed || args . files ) {
508+ progress ( 'changed inputs resolved' , {
509+ changedFiles : ( changedFiles || [ ] ) . length ,
510+ changedRecords : changedRecords . length ,
511+ } ) ;
512+ }
513+
430514 const manifest = buildManifest ( { rootDir } ) ;
431- const findings = filterLinkFindings (
432- lintLinks ( { rootDir, manifest, changedFiles : changedFiles || [ ] , changedRecords } ) ,
433- changedFiles ,
434- ) ;
515+ progress ( 'manifest built' , { entries : manifest . entries . length } ) ;
516+
517+ const rawFindings = lintLinks ( { rootDir, manifest, changedFiles : changedFiles || [ ] , changedRecords, progress } ) ;
518+ const findings = filterLinkFindings ( rawFindings , changedFiles ) ;
519+ if ( changedFiles ) {
520+ progress ( 'filtered to changed scope' , { kept : findings . length , dropped : rawFindings . length - findings . length } ) ;
521+ }
522+
435523 const output = JSON . stringify ( { schema_version : 1 , findings } , null , 2 ) ;
436524 if ( args . output ) {
437525 const outputPath = path . resolve ( rootDir , args . output ) ;
438526 ensureDirForFile ( outputPath ) ;
439527 fs . writeFileSync ( outputPath , `${ output } \n` , 'utf8' ) ;
528+ progress ( 'output written' , { path : args . output , bytes : output . length } ) ;
440529 } else {
441530 process . stdout . write ( `${ output } \n` ) ;
442531 }
443- if ( args [ 'fail-on-errors' ] && hasLinkErrors ( findings ) ) {
532+
533+ const hasErrors = hasLinkErrors ( findings ) ;
534+ progress ( 'CLI done' , { findings : findings . length , errors : hasErrors } ) ;
535+ if ( args [ 'fail-on-errors' ] && hasErrors ) {
444536 process . exitCode = 1 ;
445537 }
446538}
0 commit comments