1818 * CHECK_ANCHORS - Whether to check anchor links (default: true)
1919 */
2020
21+ import fs from 'fs'
22+
2123import { program } from 'commander'
2224import chalk from 'chalk'
2325import { load } from 'cheerio'
@@ -31,6 +33,8 @@ import {
3133 checkInternalLink ,
3234 checkAssetLink ,
3335 isAssetLink ,
36+ extractLinksWithLiquid ,
37+ extractLinksFromMarkdown ,
3438} from '@/links/lib/extract-links'
3539import {
3640 type BrokenLink ,
@@ -62,34 +66,134 @@ interface CheckResult {
6266}
6367
6468/**
65- * Render a page and extract all internal links from the HTML
69+ * Count how many lines the frontmatter block occupies in the raw source file.
70+ * `page.markdown` has frontmatter stripped, so line numbers from markdown
71+ * parsing are relative to the body. Adding this offset converts them to
72+ * actual file line numbers.
73+ *
74+ * Results are cached by fullPath — the file is read once per page across
75+ * both getLinksFromMarkdown() and checkAnchorsOnPage().
6676 */
67- async function getLinksFromRenderedPage (
68- page : Page ,
69- permalink : Permalink ,
70- context : Context ,
71- ) : Promise < { href : string ; text : string } [ ] > {
72- const links : { href : string ; text : string } [ ] = [ ]
77+ const frontmatterLineOffsetCache = new Map < string , number > ( )
7378
79+ function getFrontmatterLineOffset ( fullPath : string ) : number {
80+ const cached = frontmatterLineOffsetCache . get ( fullPath )
81+ if ( cached !== undefined ) return cached
82+
83+ let offset = 0
7484 try {
75- // Render the page content
76- const html = await renderContent ( page . markdown , context )
77- const $ = load ( html )
85+ const raw = fs . readFileSync ( fullPath , 'utf8' )
86+ if ( raw . startsWith ( '---' ) ) {
87+ const lines = raw . split ( '\n' )
88+ for ( let i = 1 ; i < lines . length ; i ++ ) {
89+ if ( lines [ i ] . trimEnd ( ) === '---' ) {
90+ // i is the 0-based index of the closing `---`; adding 1 gives the
91+ // 1-based line number of that delimiter, which is the total number
92+ // of frontmatter lines. Body content starts on the next line.
93+ offset = i + 1
94+ break
95+ }
96+ }
97+ }
98+ } catch {
99+ // ignore — fall back to no offset
100+ }
78101
79- // Extract all anchor links
80- $ ( 'a[href]' ) . each ( ( _ , el ) => {
81- const href = $ ( el ) . attr ( 'href' )
82- const text = $ ( el ) . text ( )
102+ frontmatterLineOffsetCache . set ( fullPath , offset )
103+ return offset
104+ }
83105
84- if ( href && href . startsWith ( '/' ) ) {
85- links . push ( { href, text } )
106+ /**
107+ * Extract all internal links from the markdown source with accurate line numbers.
108+ *
109+ * Links are discovered from the Liquid-rendered content (which expands {% data reusables.xxx %}
110+ * and respects {% ifversion %} for the current version), so coverage matches the original
111+ * HTML-based checker. Line numbers are resolved against the raw markdown source to avoid
112+ * drift caused by Liquid post-processing (blank-line collapsing). Links that originate
113+ * from a reusable file rather than the page itself fall back to line 0.
114+ */
115+ async function getLinksFromMarkdown (
116+ page : Page ,
117+ context : Context ,
118+ ) : Promise < { href : string ; text : string | undefined ; line : number } [ ] > {
119+ const fmOffset = getFrontmatterLineOffset ( page . fullPath )
120+
121+ // Build a map of raw-markdown line numbers per href, plus a parallel index
122+ // map to consume them in encounter order without shifting (O(1) per lookup).
123+ //
124+ // When a raw href contains Liquid tags (e.g. `/{% ifversion fpt %}enterprise-cloud@latest/{% endif %}/path`),
125+ // the rendered href will differ from the raw string, so rawLinesByHref.get() would miss.
126+ // To fix this, we lazily import renderLiquid once and use it to resolve those hrefs to
127+ // their canonical (rendered) form before keying the map — matching what extractLinksWithLiquid produces.
128+ const rawResult = extractLinksFromMarkdown ( page . markdown )
129+
130+ const needsLiquidHrefResolution =
131+ rawResult . internalLinks . some ( ( l ) => l . href . includes ( '{%' ) || l . href . includes ( '{{' ) ) ||
132+ rawResult . liquidPrefixedLinks . length > 0
133+ type RenderLiquidFn = ( template : string , context : unknown ) => Promise < string >
134+ let renderLiquidFn : RenderLiquidFn | null = null
135+ if ( needsLiquidHrefResolution ) {
136+ const mod = await import ( '@/content-render/liquid/index' )
137+ renderLiquidFn = mod . renderLiquid
138+ }
139+
140+ const rawLinesByHref = new Map < string , number [ ] > ( )
141+ for ( const link of rawResult . internalLinks ) {
142+ let canonicalHref = link . href
143+ if ( renderLiquidFn && ( canonicalHref . includes ( '{%' ) || canonicalHref . includes ( '{{' ) ) ) {
144+ try {
145+ // Render only the href string so we get the same canonical href that
146+ // extractLinksWithLiquid will produce, without affecting line positions.
147+ canonicalHref = ( await renderLiquidFn ( canonicalHref , context ) ) . trim ( )
148+ } catch {
149+ // fall back to raw href if rendering fails
86150 }
87- } )
88- } catch ( error ) {
89- console . warn ( `Failed to render ${ page . relativePath } (${ permalink . href } ):` , error )
151+ }
152+ const existing = rawLinesByHref . get ( canonicalHref )
153+ if ( existing ) {
154+ existing . push ( link . line + fmOffset )
155+ } else {
156+ rawLinesByHref . set ( canonicalHref , [ link . line + fmOffset ] )
157+ }
90158 }
91159
92- return links
160+ // Liquid-prefixed links (href starts with `{%`) are absent from internalLinks because
161+ // INTERNAL_LINK_PATTERN requires a leading '/'. Render each href to its canonical form
162+ // and, if the result is an internal path, add it to the map so lookups don't miss.
163+ if ( renderLiquidFn ) {
164+ for ( const link of rawResult . liquidPrefixedLinks ) {
165+ try {
166+ const rendered = ( await renderLiquidFn ( link . href , context ) ) . trim ( ) . split ( '#' ) [ 0 ]
167+ if ( rendered . startsWith ( '/' ) ) {
168+ const existing = rawLinesByHref . get ( rendered )
169+ if ( existing ) {
170+ existing . push ( link . line + fmOffset )
171+ } else {
172+ rawLinesByHref . set ( rendered , [ link . line + fmOffset ] )
173+ }
174+ }
175+ } catch {
176+ // skip — can't resolve line number for this link
177+ }
178+ }
179+ }
180+ // Tracks how many line numbers have been consumed for each href.
181+ const rawLinesIndex = new Map < string , number > ( )
182+
183+ // The Liquid-rendered set drives which links are actually checked (expands
184+ // reusables, excludes version-gated links that don't apply here).
185+ // extractLinksWithLiquid already catches Liquid render failures internally and
186+ // falls back to raw extraction with a warning, so no outer try/catch is needed.
187+ const renderedResult = await extractLinksWithLiquid ( page . markdown , context )
188+ const renderedLinks = renderedResult . internalLinks . map ( ( l ) => ( { href : l . href , text : l . text } ) )
189+
190+ return renderedLinks . map ( ( link ) => {
191+ const lines = rawLinesByHref . get ( link . href )
192+ const idx = rawLinesIndex . get ( link . href ) ?? 0
193+ const line = lines && idx < lines . length ? lines [ idx ] : 0
194+ rawLinesIndex . set ( link . href , idx + 1 )
195+ return { href : link . href , text : link . text , line }
196+ } )
93197}
94198
95199/**
@@ -111,6 +215,17 @@ async function checkAnchorsOnPage(
111215 }
112216
113217 try {
218+ // Extract anchor links from markdown first to get accurate line numbers
219+ const mdResult = extractLinksFromMarkdown ( page . markdown )
220+ const fmOffset = getFrontmatterLineOffset ( page . fullPath )
221+ const anchorLineMap = new Map < string , number > ( )
222+ for ( const link of mdResult . anchorLinks ) {
223+ // Store the first occurrence of each anchor href
224+ if ( ! anchorLineMap . has ( link . href ) ) {
225+ anchorLineMap . set ( link . href , link . line + fmOffset )
226+ }
227+ }
228+
114229 const html = await renderContent ( page . markdown , context )
115230 const $ = load ( html )
116231
@@ -126,10 +241,12 @@ async function checkAnchorsOnPage(
126241 const targetExists = $ ( `#${ escapedId } ` ) . length > 0 || $ ( `[name="${ targetId } "]` ) . length > 0
127242
128243 if ( ! targetExists ) {
244+ // Look up the line number from the markdown source
245+ const line = anchorLineMap . get ( href ) ?? 0
129246 brokenAnchors . push ( {
130247 href,
131248 file : page . relativePath ,
132- lines : [ 0 ] , // Line number not available from rendered HTML
249+ lines : [ line ] ,
133250 text : $ ( el ) . text ( ) ,
134251 isAutotitle : false ,
135252 } )
@@ -194,8 +311,8 @@ async function checkVersion(
194311 // awaits before the next begins), so there is no concurrent access to baseContext.
195312 baseContext . page = page
196313
197- // Get links from rendered page
198- const links = await getLinksFromRenderedPage ( page , permalink , baseContext )
314+ // Get links from markdown source (preserves accurate line numbers)
315+ const links = await getLinksFromMarkdown ( page , baseContext )
199316 totalLinksChecked += links . length
200317
201318 // Check each link
@@ -208,7 +325,7 @@ async function checkVersion(
208325 brokenLinks . push ( {
209326 href : link . href ,
210327 file : page . relativePath ,
211- lines : [ 0 ] ,
328+ lines : [ link . line ] ,
212329 text : link . text ,
213330 } )
214331 }
@@ -222,14 +339,14 @@ async function checkVersion(
222339 brokenLinks . push ( {
223340 href : link . href ,
224341 file : page . relativePath ,
225- lines : [ 0 ] ,
342+ lines : [ link . line ] ,
226343 text : link . text ,
227344 } )
228345 } else if ( result . isRedirect ) {
229346 redirectLinks . push ( {
230347 href : link . href ,
231348 file : page . relativePath ,
232- lines : [ 0 ] ,
349+ lines : [ link . line ] ,
233350 text : link . text ,
234351 isRedirect : true ,
235352 redirectTarget : result . redirectTarget ,
0 commit comments