11# frozen_string_literal: true
22
33# Patch for jekyll-relative-links (>=0.7.0): replace the O(N) linear
4- # scan in `url_for_path` with an O(1) hash lookup.
4+ # scan in `url_for_path` with an O(1) hash lookup, and extend lookup
5+ # to consult `permalink:` frontmatter and `redirect_from:` aliases
6+ # when a file-path match misses.
57#
6- # === The bug ===
8+ # === The perf bug ===
79#
810# `JekyllRelativeLinks::Generator#url_for_path` is invoked once for
911# every markdown link match (both inline `[X](Y)` and reference-style
2931# bulk of GENERATE on a build that otherwise takes ~600ms in that
3032# phase.
3133#
32- # === The fix ===
34+ # The perf fix builds a hash from `relative_path` (leading slash
35+ # stripped, matching the unpatched comparison) to the target object
36+ # once, and looks up by key thereafter. O(M*N) -> O(M+N). First-wins
37+ # semantics (`unless h.key?(key)`) match the unpatched `.find`.
3338#
34- # Build a hash from `relative_path` (with the leading slash stripped,
35- # to match the unpatched comparison) to the target object once, and
36- # look up by key thereafter. Hash construction is O(N) once; each
37- # subsequent lookup is O(1). Total cost drops from O(M*N) to O(M+N),
38- # and the GENERATE phase shrinks accordingly.
39+ # === The semantic gap ===
3940#
40- # The hash is built with first-wins semantics (`unless h.key?(key)`)
41- # to match the unpatched `.find`, which returns the first matching
42- # target. In practice `relative_path` is unique across pages, static
43- # files, and docs, so this only matters as defence against an
44- # unexpected duplicate -- but matching the upstream behaviour exactly
45- # keeps the patch a safe drop-in.
41+ # Upstream only matches the link path against `relative_path` (the
42+ # file's on-disk path). Pages that use `permalink:` frontmatter to
43+ # rename their URL slug are invisible to the gem -- e.g. source
44+ # `[twinBASIC Videos](Videos/tB)` targets `docs/Videos/twinBASIC.md`
45+ # (`permalink: /Videos/tB`), but the gem looks for `Videos/tB.md`,
46+ # doesn't find one, and leaves the link unrewritten. The rendered
47+ # HTML keeps the relative path, which works online only by accident
48+ # of relative-path math, and falls back further on `redirect_from:`
49+ # stubs as an undocumented safety net. In the PDF book (where chapter
50+ # bodies get concatenated under `/book.html`) the same relative path
51+ # can no longer reach the target page, and the rewriter that turns
52+ # in-book hrefs into chapter anchors can't match the unresolved form
53+ # either -- so cross-references break.
54+ #
55+ # The fix adds two fallback hashes after the file-path table:
56+ #
57+ # potential_targets_by_url keys: leading-slash-stripped
58+ # `page.url`. Both with- and
59+ # without-trailing-slash forms
60+ # are indexed for folder-style
61+ # index pages whose permalinks
62+ # end in `/`, so
63+ # `[X](Tutorials/CEF)` and
64+ # `[X](Tutorials/CEF/)` both
65+ # resolve.
66+ #
67+ # potential_targets_by_redirect_from keys: leading-slash-stripped,
68+ # trailing-slash-trimmed
69+ # `redirect_from` aliases.
70+ # Returns the target page
71+ # whose canonical permalink is
72+ # `page.url`, so url_for_path
73+ # emits the canonical form
74+ # rather than relying on the
75+ # redirect stub at runtime.
76+ #
77+ # `url_for_path` chains all three: file-path first (upstream behaviour
78+ # -- author-intended file references always win), then permalink, then
79+ # redirect_from. First hit wins. Misses still return nil and the gem
80+ # leaves the link unrewritten, matching upstream's fail-open contract.
4681#
4782# === Compatibility ===
4883#
4984# Targets the upstream gem version pinned by Gemfile.lock (0.7.0). The
50- # patch overrides only `url_for_path` and adds one new memoiser
51- # (`potential_targets_by_path`); every other method is untouched. The
52- # `unless method_defined?` guard makes the patch idempotent against
53- # accidental double-load.
85+ # patch overrides only `url_for_path` and adds three new memoisers
86+ # (`potential_targets_by_path`, `..._by_url`, `..._by_redirect_from`);
87+ # every other method is untouched. The `unless method_defined?` guard
88+ # makes the patch idempotent against accidental double-load.
5489#
5590# If a future release rewrites `url_for_path`, re-verify that the
5691# replacement still resolves a path to a target by scanning
57- # `potential_targets` (or an equivalent) and that swapping in a hash
58- # lookup remains a faithful drop-in . If the upstream project takes a
59- # PR for this, delete this file.
92+ # `potential_targets` (or an equivalent) and that swapping in the
93+ # three-tier hash lookup remains a faithful extension . If the upstream
94+ # project takes a PR for this, delete this file.
6095
6196require "jekyll-relative-links"
6297
@@ -70,9 +105,66 @@ def potential_targets_by_path
70105 end
71106 end
72107
108+ # Pages indexed by their rendered URL (permalink), leading slash
109+ # stripped to match the form `path_from_root` produces. Folder-
110+ # style permalinks (URL ending in `/`) are also indexed under
111+ # their trimmed form so source markdown can drop the trailing
112+ # slash. Restricted to pages and writable docs -- static files
113+ # have a `url` but it's just the file path, which the by_path
114+ # table already covers.
115+ #
116+ # `JekyllRedirectFrom::RedirectPage` instances are excluded:
117+ # the jekyll-redirect-from plugin synthesizes a stub page for
118+ # every `redirect_from` alias, each with `url` equal to the
119+ # alias itself. Indexing those would route source links through
120+ # the redirect stub (a one-hop intermediate that only works in
121+ # a browser) instead of resolving straight to the canonical
122+ # target. The `by_redirect_from` table below indexes the same
123+ # aliases but points at the canonical page, which is what we
124+ # want.
125+ def potential_targets_by_url
126+ @potential_targets_by_url ||= begin
127+ is_redirect_stub = defined? ( JekyllRedirectFrom ::RedirectPage ) \
128+ ? -> ( p ) { p . is_a? ( JekyllRedirectFrom ::RedirectPage ) } \
129+ : -> ( _p ) { false }
130+ ( site . pages + site . docs_to_write ) . each_with_object ( { } ) do |p , h |
131+ next if is_redirect_stub . call ( p )
132+ url = p . url . to_s
133+ next if url . empty? || url == "/"
134+ key = url . sub ( %r!\A /! , "" )
135+ h [ key ] = p unless h . key? ( key )
136+ if key . end_with? ( "/" )
137+ alt = key . chomp ( "/" )
138+ h [ alt ] = p unless h . key? ( alt )
139+ end
140+ end
141+ end
142+ end
143+
144+ # Pages indexed by their `redirect_from` aliases (set by the
145+ # jekyll-redirect-from plugin). Each alias is normalised to the
146+ # leading-slash-stripped, trailing-slash-trimmed form so source
147+ # markdown using a historical URL (e.g. a moved page's old slug)
148+ # resolves to the page's current canonical URL.
149+ def potential_targets_by_redirect_from
150+ @potential_targets_by_redirect_from ||= begin
151+ ( site . pages + site . docs_to_write ) . each_with_object ( { } ) do |p , h |
152+ Array ( p . data [ "redirect_from" ] ) . each do |alias_url |
153+ alias_str = alias_url . to_s
154+ next if alias_str . empty?
155+ key = alias_str . sub ( %r!\A /! , "" ) . chomp ( "/" )
156+ next if key . empty?
157+ h [ key ] = p unless h . key? ( key )
158+ end
159+ end
160+ end
161+ end
162+
73163 def url_for_path ( path )
74164 path = CGI . unescape ( path )
75- target = potential_targets_by_path [ path ]
165+ target = potential_targets_by_path [ path ] ||
166+ potential_targets_by_url [ path . chomp ( "/" ) ] ||
167+ potential_targets_by_redirect_from [ path . chomp ( "/" ) ]
76168 relative_url ( target . url ) if target &.url
77169 end
78170 end
0 commit comments