@@ -64,6 +64,43 @@ interface CacheData {
6464 urls : Record < string , CacheEntry >
6565}
6666
67+ interface LinkOccurrence {
68+ file : string
69+ line : number
70+ href : string
71+ }
72+
73+ /**
74+ * Normalize a URL for deduplication purposes:
75+ * - Remove URL fragment (#anchor)
76+ * - Remove trailing slash only for origin/root URLs
77+ *
78+ * For example, https://www.githubstatus.com and https://www.githubstatus.com/
79+ * are treated as the same URL.
80+ */
81+ function normalizeUrl ( href : string ) : string {
82+ // Remove fragment
83+ const withoutFragment = href . split ( '#' ) [ 0 ]
84+ // Remove trailing slash only for origin/root URLs
85+ try {
86+ const parsed = new URL ( withoutFragment )
87+ if ( parsed . pathname === '/' && ! parsed . search ) {
88+ return parsed . origin
89+ }
90+ } catch {
91+ // Keep original if URL parsing fails.
92+ }
93+ return withoutFragment
94+ }
95+
96+ function isDocsGithubUrl ( url : string ) : boolean {
97+ try {
98+ return new URL ( url ) . hostname === 'docs.github.com'
99+ } catch {
100+ return false
101+ }
102+ }
103+
67104/**
68105 * Sleep for a given number of milliseconds
69106 */
@@ -150,11 +187,11 @@ async function fetchWithTimeout(
150187/**
151188 * Extract all external links from content files
152189 */
153- async function extractAllExternalLinks ( ) : Promise < Map < string , { file : string ; line : number } [ ] > > {
154- const links = new Map < string , { file : string ; line : number } [ ] > ( )
190+ async function extractAllExternalLinks ( ) : Promise < Map < string , LinkOccurrence [ ] > > {
191+ const links = new Map < string , LinkOccurrence [ ] > ( )
155192
156193 // Find all Markdown files
157- const files = await glob ( 'content/**/*.md' )
194+ const files = await glob ( 'content/**/*.md' , { ignore : '**/README.md' } )
158195 console . log ( `Found ${ files . length } Markdown files to scan` )
159196
160197 const extractStart = Date . now ( )
@@ -175,13 +212,13 @@ async function extractAllExternalLinks(): Promise<Map<string, { file: string; li
175212 if ( ! link . href . startsWith ( 'https://' ) ) continue
176213 if ( isExcludedLink ( link . href ) ) continue
177214
178- // Normalize URL (remove anchors for checking)
179- const url = link . href . split ( '#' ) [ 0 ]
215+ // Normalize URL (remove anchors and trailing slashes for checking)
216+ const url = normalizeUrl ( link . href )
180217
181218 if ( ! links . has ( url ) ) {
182219 links . set ( url , [ ] )
183220 }
184- links . get ( url ) ! . push ( { file, line : link . line } )
221+ links . get ( url ) ! . push ( { file, line : link . line , href : link . href } )
185222 }
186223
187224 if ( ( i + 1 ) % 500 === 0 ) {
@@ -236,7 +273,19 @@ async function main() {
236273 // Extract all external links
237274 console . log ( 'Extracting external links from content files...' )
238275 const allLinks = await extractAllExternalLinks ( )
276+
277+ // Separate docs.github.com links — they're self-referential (this repo IS the docs site)
278+ // and will be reported separately as candidates for conversion to internal links.
279+ const selfReferentialLinks = new Map < string , LinkOccurrence [ ] > ( )
280+ for ( const [ url , occurrences ] of allLinks ) {
281+ if ( isDocsGithubUrl ( url ) ) {
282+ selfReferentialLinks . set ( url , occurrences )
283+ allLinks . delete ( url )
284+ }
285+ }
286+
239287 console . log ( `Found ${ allLinks . size } unique external URLs` )
288+ console . log ( `Found ${ selfReferentialLinks . size } self-referential docs.github.com URLs` )
240289 console . log ( '' )
241290
242291 if ( options . dryRun ) {
@@ -276,7 +325,7 @@ async function main() {
276325 if ( ! result . ok ) {
277326 for ( const occ of occurrences ) {
278327 brokenLinks . push ( {
279- href : url ,
328+ href : occ . href ,
280329 file : occ . file ,
281330 lines : [ occ . line ] ,
282331 statusCode : result . statusCode ,
@@ -318,28 +367,46 @@ async function main() {
318367 chalk . blue ( `Checked ${ checkedCount } URLs in ${ duration } s (${ cachedCount } from cache)` ) ,
319368 )
320369
321- if ( brokenLinks . length === 0 ) {
370+ // Build self-referential BrokenLink list for the report
371+ const selfReferentialBrokenLinks : BrokenLink [ ] = [ ]
372+ for ( const occurrences of selfReferentialLinks . values ( ) ) {
373+ for ( const occ of occurrences ) {
374+ selfReferentialBrokenLinks . push ( { href : occ . href , file : occ . file , lines : [ occ . line ] } )
375+ }
376+ }
377+
378+ if ( brokenLinks . length === 0 && selfReferentialBrokenLinks . length === 0 ) {
322379 console . log ( chalk . green ( '✅ All external links valid!' ) )
323380 process . exit ( 0 )
324381 }
325382
326383 // Generate report
327384 const report = generateExternalLinkReport ( brokenLinks , {
328385 actionUrl : process . env . ACTION_RUN_URL ,
386+ selfReferentialLinks : selfReferentialBrokenLinks ,
329387 } )
330388
331- console . log ( '' )
332- console . log ( chalk . red ( `❌ ${ report . uniqueTargets } domain(s) with broken links` ) )
333- console . log ( chalk . red ( ` ${ report . totalOccurrences } total occurrence(s)` ) )
389+ if ( brokenLinks . length === 0 ) {
390+ console . log ( chalk . green ( '✅ All external links valid!' ) )
391+ console . log (
392+ chalk . blue (
393+ `ℹ️ Found ${ selfReferentialBrokenLinks . length } docs.github.com absolute link occurrence(s) to convert.` ,
394+ ) ,
395+ )
396+ } else {
397+ console . log ( '' )
398+ console . log ( chalk . red ( `❌ ${ report . uniqueTargets } domain(s) with broken links` ) )
399+ console . log ( chalk . red ( ` ${ report . totalOccurrences } total occurrence(s)` ) )
334400
335- // Show summary by domain
336- console . log ( '' )
337- console . log ( 'Broken links by domain:' )
338- for ( const group of report . groups . slice ( 0 , 10 ) ) {
339- console . log ( ` ${ group . target } : ${ group . occurrences . length } occurrence(s)` )
340- }
341- if ( report . groups . length > 10 ) {
342- console . log ( ` ... and ${ report . groups . length - 10 } more domains` )
401+ // Show summary by domain
402+ console . log ( '' )
403+ console . log ( 'Broken links by domain:' )
404+ for ( const group of report . groups . slice ( 0 , 10 ) ) {
405+ console . log ( ` ${ group . target } : ${ group . occurrences . length } occurrence(s)` )
406+ }
407+ if ( report . groups . length > 10 ) {
408+ console . log ( ` ... and ${ report . groups . length - 10 } more domains` )
409+ }
343410 }
344411
345412 // Write artifact
@@ -351,7 +418,7 @@ async function main() {
351418 const createReport = process . env . CREATE_REPORT === 'true'
352419 const reportRepository = process . env . REPORT_REPOSITORY || 'github/docs-content'
353420
354- if ( createReport && process . env . GITHUB_TOKEN ) {
421+ if ( brokenLinks . length > 0 && createReport && process . env . GITHUB_TOKEN ) {
355422 console . log ( '' )
356423 console . log ( 'Creating issue report...' )
357424
0 commit comments