@@ -84,6 +84,12 @@ class MarkdownLink:
8484 suffix : str
8585
8686
87+ @dataclass
88+ class Segment :
89+ kind : str
90+ text : str
91+
92+
8793def repo_root () -> Path :
8894 return Path (__file__ ).resolve ().parents [1 ]
8995
@@ -217,19 +223,6 @@ def parse_doc_target(target: str) -> tuple[str, str] | None:
217223 return base_target , anchor
218224
219225
220- def find_candidates_by_suffix (
221- language : str , suffix : str , source_pages : tuple [str , ...]
222- ) -> list [str ]:
223- prefix = f"{ language } /"
224- full_suffix = f"{ language } /{ suffix } "
225- return [
226- page
227- for page in source_pages
228- if page .startswith (prefix )
229- and (page == full_suffix or page .endswith (f"/{ suffix } " ))
230- ]
231-
232-
233226def find_existing_source_path (
234227 candidate : PurePosixPath ,
235228 source_root : Path ,
@@ -248,7 +241,14 @@ def find_existing_source_path(
248241 if not suffix :
249242 return ResolutionResult (resolved_path = None )
250243
251- matches = find_candidates_by_suffix (language , suffix , source_pages )
244+ prefix = f"{ language } /"
245+ full_suffix = f"{ language } /{ suffix } "
246+ matches = [
247+ page
248+ for page in source_pages
249+ if page .startswith (prefix )
250+ and (page == full_suffix or page .endswith (f"/{ suffix } " ))
251+ ]
252252 if len (matches ) == 1 :
253253 return ResolutionResult (resolved_path = matches [0 ])
254254 if len (matches ) > 1 :
@@ -292,30 +292,30 @@ def __init__(self, source_root: Path):
292292 self .source_root = Path (source_root )
293293 self .source_pages = discover_source_pages (str (self .source_root ))
294294
295- def resolve (self , target : str , source_path : str ) -> ResolutionResult :
296- parsed_target = parse_doc_target (target )
297- if parsed_target is None :
298- return ResolutionResult (resolved_path = None )
299-
300- base_target , _ = parsed_target
295+ def resolve_base_target (
296+ self , base_target : str , source_path : str
297+ ) -> ResolutionResult :
301298 return resolve_link_path (
302299 base_target = base_target ,
303300 source_path = source_path ,
304301 source_root = self .source_root ,
305302 source_pages = self .source_pages ,
306303 )
307304
308- def resolve_path (self , target : str , source_path : str ) -> str | None :
309- return self .resolve (target , source_path ).resolved_path
305+ def resolve_markdown_target (
306+ self , target : str , source_path : str
307+ ) -> tuple [str | None , str ]:
308+ parsed_target = parse_doc_target (target )
309+ if parsed_target is None :
310+ return None , ""
310311
312+ base_target , anchor = parsed_target
313+ result = self .resolve_base_target (base_target , source_path )
314+ return result .resolved_path , anchor
311315
312- def rewrite_link_target (target : str , source_path : str , resolver : LinkResolver ) -> str :
313- parsed_target = parse_doc_target (target )
314- if parsed_target is None :
315- return target
316316
317- base_target , anchor = parsed_target
318- resolved = resolver .resolve_path ( base_target , source_path )
317+ def rewrite_link_target ( target : str , source_path : str , resolver : LinkResolver ) -> str :
318+ resolved , anchor = resolver .resolve_markdown_target ( target , source_path )
319319 if resolved is None :
320320 return target
321321
@@ -343,55 +343,55 @@ def rewrite_links_in_segment(
343343 return "" .join (result )
344344
345345
346- def rewrite_links (
347- content : str ,
348- source_path : str ,
349- resolver : LinkResolver ,
350- ) -> str :
351- parts : list [tuple [str , str ]] = []
346+ def iter_segments (content : str ):
352347 last_end = 0
353-
354- for fenced_match in FENCED_BLOCK_RE .finditer (content ):
355- before = content [last_end : fenced_match .start ()]
348+ for fenced in FENCED_BLOCK_RE .finditer (content ):
349+ before = content [last_end : fenced .start ()]
356350 if before :
357- parts .append (("text" , before ))
358- parts .append (("code" , fenced_match .group (0 )))
359- last_end = fenced_match .end ()
351+ last_inline_end = 0
352+ for inline in INLINE_CODE_RE .finditer (before ):
353+ if inline .start () > last_inline_end :
354+ yield Segment ("text" , before [last_inline_end : inline .start ()])
355+ yield Segment ("inline_code" , inline .group (0 ))
356+ last_inline_end = inline .end ()
357+ if last_inline_end < len (before ):
358+ yield Segment ("text" , before [last_inline_end :])
359+
360+ yield Segment ("code_block" , fenced .group (0 ))
361+ last_end = fenced .end ()
360362
361363 tail = content [last_end :]
362- if tail :
363- parts .append (("text" , tail ))
364-
365- output : list [str ] = []
366- for kind , chunk in parts :
367- if kind == "code" :
368- output .append (chunk )
369- continue
364+ if not tail :
365+ return
370366
371- last_inline_end = 0
372- for inline_match in INLINE_CODE_RE .finditer (chunk ):
373- before_inline = chunk [last_inline_end : inline_match .start ()]
374- if before_inline :
375- output .append (
376- rewrite_links_in_segment (
377- before_inline ,
378- source_path = source_path ,
379- resolver = resolver ,
380- )
381- )
367+ last_inline_end = 0
368+ for inline in INLINE_CODE_RE .finditer (tail ):
369+ if inline .start () > last_inline_end :
370+ yield Segment ("text" , tail [last_inline_end : inline .start ()])
371+ yield Segment ("inline_code" , inline .group (0 ))
372+ last_inline_end = inline .end ()
373+ if last_inline_end < len (tail ):
374+ yield Segment ("text" , tail [last_inline_end :])
382375
383- output .append (inline_match .group (0 ))
384- last_inline_end = inline_match .end ()
385376
386- after_inline = chunk [last_inline_end :]
387- if after_inline :
377+ def rewrite_links (
378+ content : str ,
379+ source_path : str ,
380+ resolver : LinkResolver ,
381+ ) -> str :
382+ output : list [str ] = []
383+ for segment in iter_segments (content ):
384+ if segment .kind == "text" :
388385 output .append (
389386 rewrite_links_in_segment (
390- after_inline ,
387+ segment . text ,
391388 source_path = source_path ,
392389 resolver = resolver ,
393390 )
394391 )
392+ continue
393+
394+ output .append (segment .text )
395395
396396 return "" .join (output )
397397
@@ -404,12 +404,16 @@ def find_unresolved_doc_links(source_root: Path) -> list[str]:
404404 for source_path in resolver .source_pages :
405405 content = (root / source_path ).read_text (encoding = "utf-8" )
406406 for link in iter_markdown_links (content ):
407+ resolved_path , _ = resolver .resolve_markdown_target (
408+ link .target , source_path
409+ )
410+ if resolved_path is not None :
411+ continue
407412 parsed_target = parse_doc_target (link .target )
408413 if parsed_target is None :
409414 continue
410- resolution = resolver .resolve (link .target , source_path )
411- if resolution .resolved_path is not None :
412- continue
415+ base_target , _ = parsed_target
416+ resolution = resolver .resolve_base_target (base_target , source_path )
413417 if resolution .ambiguous_matches :
414418 unresolved .append (
415419 f"{ source_path } -> { link .target } (ambiguous: { ', ' .join (resolution .ambiguous_matches )} )" ,
0 commit comments