@@ -119,7 +119,42 @@ def extract_fragment_ids(html_path):
119119 return ids
120120
121121
122- def resolve (href , source_dir_str , source_str , root_str ):
122+ def _normalize_base_path (s ):
123+ """Coerce a base-path arg into the canonical '/prefix' form (leading
124+ slash, no trailing slash). Empty input maps to empty string."""
125+ if not s :
126+ return ""
127+ s = s .strip ().rstrip ("/" )
128+ if not s :
129+ return ""
130+ if not s .startswith ("/" ):
131+ s = "/" + s
132+ return s
133+
134+
135+ def _strip_base_path (path_str , base_path ):
136+ """Lop a base-path prefix off an absolute URL path, if it matches.
137+
138+ A Jekyll build with `--baseurl /twinBASIC-docs` produces hrefs like
139+ '/twinBASIC-docs/foo' that resolve, in the deployed site, to '/foo'
140+ under the actual root. This mirrors lychee's `--remap` regex but as
141+ a clean prefix strip:
142+
143+ '/twinBASIC-docs/foo' -> '/foo' (prefix + /...)
144+ '/twinBASIC-docs' -> '/' (bare prefix, treat as root)
145+ '/twinBASIC-docs-other' -> unchanged (only strip on '/' or end-of-string)
146+ '/foo' -> unchanged (no prefix match)
147+ """
148+ if not base_path :
149+ return path_str
150+ if path_str == base_path :
151+ return "/"
152+ if path_str .startswith (base_path + "/" ):
153+ return path_str [len (base_path ):]
154+ return path_str
155+
156+
157+ def resolve (href , source_dir_str , source_str , root_str , base_path = "" ):
123158 """Lexically resolve href -> (normalized_target_str, is_dir_link, fragment).
124159 Returns None for schemes/netlocs we skip. Uses only string ops — no
125160 filesystem syscalls (Path.resolve is ~110us per call on Windows).
@@ -129,6 +164,10 @@ def resolve(href, source_dir_str, source_str, root_str):
129164 for resolution: 'foo/' must resolve as a directory (try index files),
130165 while 'foo' falls through to fallback extensions ('foo.html') if no
131166 file/dir 'foo' exists.
167+
168+ base_path is an absolute-URL prefix to strip before resolving against
169+ root_str -- e.g. '/twinBASIC-docs' to handle a Jekyll --baseurl build.
170+ Only applied to absolute URLs; relative paths are unaffected.
132171 """
133172 if "#" in href :
134173 path_part , frag = href .split ("#" , 1 )
@@ -151,6 +190,7 @@ def resolve(href, source_dir_str, source_str, root_str):
151190 is_dir_link = path_str .endswith ("/" ) or path_str .endswith ("/." )
152191
153192 if path_str .startswith ("/" ):
193+ path_str = _strip_base_path (path_str , base_path )
154194 target = os .path .normpath (os .path .join (root_str , path_str .lstrip ("/" )))
155195 else :
156196 target = os .path .normpath (os .path .join (source_dir_str , path_str ))
@@ -242,6 +282,15 @@ def _build_parser():
242282 "as broken."
243283 ),
244284 )
285+ ap .add_argument (
286+ "--base-path" , default = "" , metavar = "PREFIX" ,
287+ help = (
288+ "URL-path prefix to strip from absolute URLs before resolving "
289+ "against --root-dir. Matches a Jekyll build's --baseurl, e.g. "
290+ "'/twinBASIC-docs'. Equivalent to a constrained form of "
291+ "lychee's --remap. Empty by default (no stripping)."
292+ ),
293+ )
245294 ap .add_argument (
246295 "--threads" , type = int , default = os .cpu_count () or 4 , metavar = "N" ,
247296 help = "Worker threads for HTML parsing. Default: CPU count." ,
@@ -292,6 +341,7 @@ def main():
292341 root_str = str (args .root_dir .resolve ()) if args .root_dir else ""
293342 fallback_exts = [e for e in args .fallback_extensions .split ("," ) if e ]
294343 index_files = [e for e in args .index_files .split ("," ) if e ]
344+ base_path = _normalize_base_path (args .base_path )
295345
296346 t0 = time .perf_counter ()
297347 html_files = _collect_html_files (args .inputs )
@@ -317,7 +367,7 @@ def main():
317367 rk = (src_dir , href )
318368 r = resolution_cache .get (rk , ...)
319369 if r is ...:
320- r = resolve (href , src_dir , src_str , root_str )
370+ r = resolve (href , src_dir , src_str , root_str , base_path )
321371 resolution_cache [rk ] = r
322372 if r is None :
323373 continue
@@ -346,24 +396,28 @@ def main():
346396 fragment_cache [f ] = ids
347397 t_fragments = time .perf_counter ()
348398
349- broken = []
350- for (target_str , is_dir , frag ), sources in unique_checks .items ():
399+ broken = [] # one entry per occurrence; for human-readable report
400+ broken_keys = set () # unique broken (target, is_dir, frag) keys
401+ for key , sources in unique_checks .items ():
402+ target_str , is_dir , frag = key
351403 resolved = target_resolution .get ((target_str , is_dir ))
352404 if resolved is None :
405+ broken_keys .add (key )
353406 for src_str , href in sources :
354407 broken .append ((src_str , href , "target not found" ))
355408 continue
356409 if frag and args .include_fragments :
357410 ids = fragment_cache .get (resolved , set ())
358411 if frag not in ids :
412+ broken_keys .add (key )
359413 for src_str , href in sources :
360414 broken .append ((src_str , href , f"fragment #{ frag } not found" ))
361415 t_done = time .perf_counter ()
362416
363417 total = len (occurrences )
364418 unique = len (unique_checks )
365- errors = len (broken )
366- ok = unique - errors
419+ errors_unique = len (broken_keys )
420+ ok_unique = unique - errors_unique
367421
368422 if broken :
369423 # Group by source file, lychee-style.
@@ -378,8 +432,8 @@ def main():
378432
379433 elapsed = t_done - t0
380434 print (
381- f"Checked { total } links ({ unique } unique) in { elapsed :.3f} s "
382- f"-- { ok } OK, { errors } errors "
435+ f"Checked { total } occurrences ({ unique } unique) in { elapsed :.3f} s "
436+ f"-- { ok_unique } OK, { errors_unique } broken "
383437 )
384438
385439 if args .verbose :
0 commit comments