Skip to content

Commit a11fd5b

Browse files
committed
update
1 parent fa0d103 commit a11fd5b

1 file changed

Lines changed: 113 additions & 34 deletions

File tree

convert_wikilinks.py

Lines changed: 113 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,61 +2,140 @@
22
import sys
33
from pathlib import Path
44
from urllib.parse import quote
5+
from typing import Dict, List
56

6-
def build_note_index(root):
7+
WikiTarget = str
8+
RelPathStr = str
9+
10+
11+
def build_note_indices(root: Path):
712
"""
8-
Build a mapping from note title (filename without extension) to site-relative URL
9-
like '/notes/Folder/Subfolder/File.html' with percent-encoding for spaces.
13+
Build indices for resolving wiki links:
14+
- stem_to_paths: map from note stem (filename without extension) to list of relative paths (no extension)
15+
- all_paths: set of all relative paths (no extension) for explicit path matches
16+
- path_to_url: map from relative path (no extension) to site URL '/notes/.../File.html'
1017
"""
11-
index = {}
18+
stem_to_paths: Dict[WikiTarget, List[Path]] = {}
19+
all_paths: Dict[RelPathStr, Path] = {}
20+
path_to_url: Dict[RelPathStr, str] = {}
21+
1222
for md_file in root.rglob("*.md"):
13-
title = md_file.stem
14-
rel_path = md_file.relative_to(root).with_suffix(".html").as_posix()
15-
# Prefix with '/notes/' and percent-encode path segments but keep slashes
16-
url = "/notes/" + quote(rel_path, safe="/")
17-
index[title] = url
18-
return index
19-
20-
def convert_links(text, note_index):
21-
# [[Target|Text]] → - [Text]({{ '/notes/..../File.html' | relative_url }})
22-
def repl_wikilink_with_text(match):
23-
target, link_text = match.group(1), match.group(2)
24-
url = note_index.get(target)
23+
rel_no_ext = md_file.relative_to(root).with_suffix("")
24+
stem = md_file.stem
25+
stem_to_paths.setdefault(stem, []).append(rel_no_ext)
26+
all_paths[rel_no_ext.as_posix()] = rel_no_ext
27+
url = "/notes/" + quote(md_file.relative_to(root).with_suffix(".html").as_posix(), safe="/")
28+
path_to_url[rel_no_ext.as_posix()] = url
29+
30+
return stem_to_paths, all_paths, path_to_url
31+
32+
33+
def top_level_segment(rel_path: Path) -> str:
34+
parts = rel_path.parts
35+
return parts[0] if parts else ""
36+
37+
38+
def path_distance(from_dir: Path, to_path_no_ext: Path) -> int:
39+
"""Heuristic distance between directories for choosing the closest note."""
40+
try:
41+
rel = to_path_no_ext.parent.relative_to(from_dir)
42+
# Prefer notes deeper inside current subtree (shorter relative path without '..')
43+
score = len(rel.parts)
44+
except ValueError:
45+
# Different branches; compute rough distance via common prefix length
46+
from_parts = from_dir.parts
47+
to_parts = to_path_no_ext.parent.parts
48+
i = 0
49+
while i < min(len(from_parts), len(to_parts)) and from_parts[i] == to_parts[i]:
50+
i += 1
51+
# penalize number of steps up and down
52+
score = (len(from_parts) - i) + (len(to_parts) - i)
53+
return score
54+
55+
56+
def resolve_target(current_file: Path, target: str, root: Path, stem_to_paths, all_paths) -> RelPathStr:
57+
# 1) Explicit path target (contains '/') – try exact match (no extension)
58+
if "/" in target:
59+
explicit = target.rstrip("/")
60+
# Try both exact case and with spaces preserved
61+
if explicit in all_paths:
62+
return explicit
63+
# Also try with spaces replaced by dashes (in case links were already normalized)
64+
dash_explicit = explicit.replace(" ", "-")
65+
if dash_explicit in all_paths:
66+
return dash_explicit
67+
# Fall through to stem resolution using last segment
68+
target = Path(target).name
69+
70+
# 2) Resolve by stem, preferring same directory, then same top-level, then closest by distance
71+
candidates: List[Path] = stem_to_paths.get(target, [])
72+
if not candidates:
73+
return target.replace(" ", "-") # fallback string; will likely 404 but better than empty
74+
75+
current_rel_dir = current_file.parent.relative_to(root)
76+
top_current = top_level_segment(current_rel_dir)
77+
78+
# Same directory
79+
same_dir = [p for p in candidates if p.parent == current_rel_dir]
80+
if same_dir:
81+
return same_dir[0].as_posix()
82+
83+
# Same top-level folder
84+
same_top = [p for p in candidates if top_level_segment(p) == top_current]
85+
if same_top:
86+
# pick the closest by path distance
87+
best = min(same_top, key=lambda p: path_distance(current_rel_dir, p))
88+
return best.as_posix()
89+
90+
# Global best by distance
91+
best_global = min(candidates, key=lambda p: path_distance(current_rel_dir, p))
92+
return best_global.as_posix()
93+
94+
95+
def convert_links(text: str, current_file: Path, root: Path, stem_to_paths, all_paths, path_to_url) -> str:
96+
def url_for_rel_no_ext(rel_no_ext: RelPathStr) -> str:
97+
url = path_to_url.get(rel_no_ext)
2598
if not url:
26-
# Fallback: naive dash replacement, .html
27-
url = "/notes/" + quote(target.replace(" ", "-") + ".html")
99+
# fallback naive
100+
url = "/notes/" + quote(rel_no_ext + ".html")
101+
return url
102+
103+
# [[Target|Text]] → - [Text]({{ '/notes/.../File.html' | relative_url }})
104+
def repl_wikilink_with_text(m):
105+
target, link_text = m.group(1), m.group(2)
106+
rel_no_ext = resolve_target(current_file, target, root, stem_to_paths, all_paths)
107+
url = url_for_rel_no_ext(rel_no_ext)
28108
return f"- [{link_text}]({{{{ '{url}' | relative_url }}}})"
29109

30110
text = re.sub(r"\[\[([^\]|]+)\|([^\]]+)\]\]", repl_wikilink_with_text, text)
31111

32-
# [[Target]] → - [Target]({{ '/notes/..../File.html' | relative_url }})
33-
def repl_wikilink(match):
34-
target = match.group(1)
35-
url = note_index.get(target)
36-
if not url:
37-
url = "/notes/" + quote(target.replace(" ", "-") + ".html")
112+
# [[Target]] → - [Target]({{ '/notes/.../File.html' | relative_url }})
113+
def repl_wikilink(m):
114+
target = m.group(1)
115+
rel_no_ext = resolve_target(current_file, target, root, stem_to_paths, all_paths)
116+
url = url_for_rel_no_ext(rel_no_ext)
38117
return f"- [{target}]({{{{ '{url}' | relative_url }}}})"
39118

40119
text = re.sub(r"\[\[([^\]]+)\]\]", repl_wikilink, text)
41120

42121
# Ensure each list item is on its own line
43-
# Insert a newline before any '- ' that doesn't already start a line
44122
text = re.sub(r"(?<!\n)- ", "\n- ", text)
45-
# Trim leading newlines
46123
text = text.lstrip("\n")
47124
return text
48125

49-
def process_file(path, note_index):
50-
with open(path, 'r', encoding='utf-8') as f:
126+
127+
def process_file(path: Path, root: Path, stem_to_paths, all_paths, path_to_url):
128+
with open(path, "r", encoding="utf-8") as f:
51129
content = f.read()
52-
new_content = convert_links(content, note_index)
130+
new_content = convert_links(content, path, root, stem_to_paths, all_paths, path_to_url)
53131
if new_content != content:
54-
with open(path, 'w', encoding='utf-8') as f:
132+
with open(path, "w", encoding="utf-8") as f:
55133
f.write(new_content)
56134
print(f"Converted: {path}")
57135

136+
58137
if __name__ == "__main__":
59-
root = Path(sys.argv[1]) if len(sys.argv) > 1 else Path(".")
60-
note_index = build_note_index(root)
61-
for md_file in root.rglob("*.md"):
62-
process_file(md_file, note_index)
138+
notes_root = Path(sys.argv[1]) if len(sys.argv) > 1 else Path(".")
139+
stem_to_paths, all_paths, path_to_url = build_note_indices(notes_root)
140+
for md_file in notes_root.rglob("*.md"):
141+
process_file(md_file, notes_root, stem_to_paths, all_paths, path_to_url)

0 commit comments

Comments
 (0)