22import sys
33from pathlib import Path
44from urllib .parse import quote
5+ from typing import Dict , List
56
6- def build_note_index (root ):
7+ WikiTarget = str
8+ RelPathStr = str
9+
10+
11+ def build_note_indices (root : Path ):
712 """
8- Build a mapping from note title (filename without extension) to site-relative URL
9- like '/notes/Folder/Subfolder/File.html' with percent-encoding for spaces.
13+ Build indices for resolving wiki links:
14+ - stem_to_paths: map from note stem (filename without extension) to list of relative paths (no extension)
15+ - all_paths: set of all relative paths (no extension) for explicit path matches
16+ - path_to_url: map from relative path (no extension) to site URL '/notes/.../File.html'
1017 """
11- index = {}
18+ stem_to_paths : Dict [WikiTarget , List [Path ]] = {}
19+ all_paths : Dict [RelPathStr , Path ] = {}
20+ path_to_url : Dict [RelPathStr , str ] = {}
21+
1222 for md_file in root .rglob ("*.md" ):
13- title = md_file .stem
14- rel_path = md_file .relative_to (root ).with_suffix (".html" ).as_posix ()
15- # Prefix with '/notes/' and percent-encode path segments but keep slashes
16- url = "/notes/" + quote (rel_path , safe = "/" )
17- index [title ] = url
18- return index
19-
20- def convert_links (text , note_index ):
21- # [[Target|Text]] → - [Text]({{ '/notes/..../File.html' | relative_url }})
22- def repl_wikilink_with_text (match ):
23- target , link_text = match .group (1 ), match .group (2 )
24- url = note_index .get (target )
23+ rel_no_ext = md_file .relative_to (root ).with_suffix ("" )
24+ stem = md_file .stem
25+ stem_to_paths .setdefault (stem , []).append (rel_no_ext )
26+ all_paths [rel_no_ext .as_posix ()] = rel_no_ext
27+ url = "/notes/" + quote (md_file .relative_to (root ).with_suffix (".html" ).as_posix (), safe = "/" )
28+ path_to_url [rel_no_ext .as_posix ()] = url
29+
30+ return stem_to_paths , all_paths , path_to_url
31+
32+
33+ def top_level_segment (rel_path : Path ) -> str :
34+ parts = rel_path .parts
35+ return parts [0 ] if parts else ""
36+
37+
38+ def path_distance (from_dir : Path , to_path_no_ext : Path ) -> int :
39+ """Heuristic distance between directories for choosing the closest note."""
40+ try :
41+ rel = to_path_no_ext .parent .relative_to (from_dir )
42+ # Prefer notes deeper inside current subtree (shorter relative path without '..')
43+ score = len (rel .parts )
44+ except ValueError :
45+ # Different branches; compute rough distance via common prefix length
46+ from_parts = from_dir .parts
47+ to_parts = to_path_no_ext .parent .parts
48+ i = 0
49+ while i < min (len (from_parts ), len (to_parts )) and from_parts [i ] == to_parts [i ]:
50+ i += 1
51+ # penalize number of steps up and down
52+ score = (len (from_parts ) - i ) + (len (to_parts ) - i )
53+ return score
54+
55+
56+ def resolve_target (current_file : Path , target : str , root : Path , stem_to_paths , all_paths ) -> RelPathStr :
57+ # 1) Explicit path target (contains '/') – try exact match (no extension)
58+ if "/" in target :
59+ explicit = target .rstrip ("/" )
60+ # Try both exact case and with spaces preserved
61+ if explicit in all_paths :
62+ return explicit
63+ # Also try with spaces replaced by dashes (in case links were already normalized)
64+ dash_explicit = explicit .replace (" " , "-" )
65+ if dash_explicit in all_paths :
66+ return dash_explicit
67+ # Fall through to stem resolution using last segment
68+ target = Path (target ).name
69+
70+ # 2) Resolve by stem, preferring same directory, then same top-level, then closest by distance
71+ candidates : List [Path ] = stem_to_paths .get (target , [])
72+ if not candidates :
73+ return target .replace (" " , "-" ) # fallback string; will likely 404 but better than empty
74+
75+ current_rel_dir = current_file .parent .relative_to (root )
76+ top_current = top_level_segment (current_rel_dir )
77+
78+ # Same directory
79+ same_dir = [p for p in candidates if p .parent == current_rel_dir ]
80+ if same_dir :
81+ return same_dir [0 ].as_posix ()
82+
83+ # Same top-level folder
84+ same_top = [p for p in candidates if top_level_segment (p ) == top_current ]
85+ if same_top :
86+ # pick the closest by path distance
87+ best = min (same_top , key = lambda p : path_distance (current_rel_dir , p ))
88+ return best .as_posix ()
89+
90+ # Global best by distance
91+ best_global = min (candidates , key = lambda p : path_distance (current_rel_dir , p ))
92+ return best_global .as_posix ()
93+
94+
95+ def convert_links (text : str , current_file : Path , root : Path , stem_to_paths , all_paths , path_to_url ) -> str :
96+ def url_for_rel_no_ext (rel_no_ext : RelPathStr ) -> str :
97+ url = path_to_url .get (rel_no_ext )
2598 if not url :
26- # Fallback: naive dash replacement, .html
27- url = "/notes/" + quote (target .replace (" " , "-" ) + ".html" )
99+ # fallback naive
100+ url = "/notes/" + quote (rel_no_ext + ".html" )
101+ return url
102+
103+ # [[Target|Text]] → - [Text]({{ '/notes/.../File.html' | relative_url }})
104+ def repl_wikilink_with_text (m ):
105+ target , link_text = m .group (1 ), m .group (2 )
106+ rel_no_ext = resolve_target (current_file , target , root , stem_to_paths , all_paths )
107+ url = url_for_rel_no_ext (rel_no_ext )
28108 return f"- [{ link_text } ]({{{{ '{ url } ' | relative_url }}}})"
29109
30110 text = re .sub (r"\[\[([^\]|]+)\|([^\]]+)\]\]" , repl_wikilink_with_text , text )
31111
32- # [[Target]] → - [Target]({{ '/notes/..../File.html' | relative_url }})
33- def repl_wikilink (match ):
34- target = match .group (1 )
35- url = note_index .get (target )
36- if not url :
37- url = "/notes/" + quote (target .replace (" " , "-" ) + ".html" )
112+ # [[Target]] → - [Target]({{ '/notes/.../File.html' | relative_url }})
113+ def repl_wikilink (m ):
114+ target = m .group (1 )
115+ rel_no_ext = resolve_target (current_file , target , root , stem_to_paths , all_paths )
116+ url = url_for_rel_no_ext (rel_no_ext )
38117 return f"- [{ target } ]({{{{ '{ url } ' | relative_url }}}})"
39118
40119 text = re .sub (r"\[\[([^\]]+)\]\]" , repl_wikilink , text )
41120
42121 # Ensure each list item is on its own line
43- # Insert a newline before any '- ' that doesn't already start a line
44122 text = re .sub (r"(?<!\n)- " , "\n - " , text )
45- # Trim leading newlines
46123 text = text .lstrip ("\n " )
47124 return text
48125
49- def process_file (path , note_index ):
50- with open (path , 'r' , encoding = 'utf-8' ) as f :
126+
127+ def process_file (path : Path , root : Path , stem_to_paths , all_paths , path_to_url ):
128+ with open (path , "r" , encoding = "utf-8" ) as f :
51129 content = f .read ()
52- new_content = convert_links (content , note_index )
130+ new_content = convert_links (content , path , root , stem_to_paths , all_paths , path_to_url )
53131 if new_content != content :
54- with open (path , 'w' , encoding = ' utf-8' ) as f :
132+ with open (path , "w" , encoding = " utf-8" ) as f :
55133 f .write (new_content )
56134 print (f"Converted: { path } " )
57135
136+
58137if __name__ == "__main__" :
59- root = Path (sys .argv [1 ]) if len (sys .argv ) > 1 else Path ("." )
60- note_index = build_note_index ( root )
61- for md_file in root .rglob ("*.md" ):
62- process_file (md_file , note_index )
138+ notes_root = Path (sys .argv [1 ]) if len (sys .argv ) > 1 else Path ("." )
139+ stem_to_paths , all_paths , path_to_url = build_note_indices ( notes_root )
140+ for md_file in notes_root .rglob ("*.md" ):
141+ process_file (md_file , notes_root , stem_to_paths , all_paths , path_to_url )
0 commit comments