Skip to content

Commit 9aa01f2

Browse files
committed
Fix broken legacy rdoc-ref labels and duplicate heading IDs
Decode legacy CGI-encoded labels (e.g., `What-27s+Here`) in rdoc-ref links so they resolve to the correct GitHub-style anchors. Also deduplicate heading IDs by appending -1, -2, etc. when multiple headings produce the same anchor (e.g., "Method match" and "Method match?" both becoming `method-match`). Fixes #1590
1 parent 23bccee commit 9aa01f2

6 files changed

Lines changed: 150 additions & 8 deletions

File tree

lib/rdoc/markup/to_html.rb

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,7 @@ def start_accepting
282282
@res = []
283283
@in_list_entry = []
284284
@list = []
285+
@heading_ids = {}
285286
end
286287

287288
##
@@ -412,8 +413,8 @@ def accept_blank_line(blank_line)
412413
def accept_heading(heading)
413414
level = [6, heading.level].min
414415

415-
label = heading.label @code_object
416-
legacy_label = heading.legacy_label @code_object
416+
label = deduplicate_heading_id(heading.label(@code_object))
417+
legacy_label = deduplicate_heading_id(heading.legacy_label(@code_object))
417418

418419
# Add legacy anchor before the heading for backward compatibility.
419420
# This allows old links with label- prefix to still work.
@@ -468,6 +469,20 @@ def accept_table(header, body, aligns)
468469

469470
# :section: Utilities
470471

472+
##
473+
# Returns a unique heading ID, appending -1, -2, etc. for duplicates.
474+
# Matches GitHub's behavior for duplicate heading anchors.
475+
476+
def deduplicate_heading_id(id)
477+
if @heading_ids.key?(id)
478+
@heading_ids[id] += 1
479+
"#{id}-#{@heading_ids[id]}"
480+
else
481+
@heading_ids[id] = 0
482+
id
483+
end
484+
end
485+
471486
##
472487
# CGI-escapes +text+
473488

lib/rdoc/markup/to_html_crossref.rb

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,11 @@ def cross_reference(name, text = nil, code = true, rdoc_ref: false)
6161

6262
name = name[1..-1] unless @show_hash if name[0, 1] == '#'
6363

64-
if !(name.end_with?('+@', '-@')) and name =~ /(.*[^#:])?@/
65-
text ||= [CGI.unescape($'), (" at <code>#{$1}</code>" if $~.begin(1))].join("")
64+
if !name.end_with?('+@', '-@') && match = name.match(/(.*[^#:])?@(.*)/)
65+
context_name = match[1]
66+
label = RDoc::Text.decode_legacy_label(match[2])
67+
text ||= "#{label} at <code>#{context_name}</code>" if context_name
68+
text ||= label
6669
code = false
6770
else
6871
text ||= name
@@ -168,9 +171,10 @@ def link(name, text, code = true, rdoc_ref: false)
168171
end
169172

170173
if label
171-
# Convert label to GitHub-style anchor format
172-
# First convert + to space (URL encoding), then apply GitHub-style rules
173-
formatted_label = RDoc::Text.to_anchor(label.tr('+', ' '))
174+
# Decode legacy labels (e.g., "What-27s+Here" -> "What's Here")
175+
# then convert to GitHub-style anchor format
176+
decoded_label = RDoc::Text.decode_legacy_label(label)
177+
formatted_label = RDoc::Text.to_anchor(decoded_label)
174178

175179
# Case 1: Path already has an anchor (e.g., method link)
176180
# Input: C1#method@label -> path="C1.html#method-i-m"
@@ -181,7 +185,7 @@ def link(name, text, code = true, rdoc_ref: false)
181185
# Case 2: Label matches a section title
182186
# Input: C1@Section -> path="C1.html", section "Section" exists
183187
# Output: C1.html#section (uses section.aref for GitHub-style)
184-
elsif (section = ref&.sections&.find { |s| label.tr('+', ' ') == s.title })
188+
elsif (section = ref&.sections&.find { |s| decoded_label == s.title })
185189
path << "##{section.aref}"
186190

187191
# Case 3: Ref has an aref (class/module context)

lib/rdoc/text.rb

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,4 +335,27 @@ def wrap(txt, line_len = 76)
335335
text.downcase.gsub(/[^a-z0-9 \-]/, '').gsub(' ', '-')
336336
end
337337

338+
##
339+
# Decodes a label that may be in legacy RDoc format where CGI.escape was
340+
# applied and then '%' was replaced with '-'. Converts '+' to space,
341+
# then reverses -XX hex encoding for non-alphanumeric characters.
342+
#
343+
# Labels in new format pass through unchanged because -XX patterns that
344+
# decode to alphanumeric characters are left as-is (CGI.escape never
345+
# encodes alphanumerics).
346+
#
347+
# Examples:
348+
# "What-27s+Here" -> "What's Here" (legacy: -27 is apostrophe)
349+
# "Foo-3A-3ABar" -> "Foo::Bar" (legacy: -3A is colon)
350+
# "Whats-Here" -> "Whats-Here" (new format, unchanged)
351+
352+
module_function def decode_legacy_label(label)
353+
label = label.tr('+', ' ')
354+
label.gsub!(/-([0-7][0-9A-F])/) do
355+
char = [$1.hex].pack('C')
356+
char.match?(/[a-zA-Z0-9]/) ? $& : char
357+
end
358+
label
359+
end
360+
338361
end

test/rdoc/markup/to_html_crossref_test.rb

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,23 @@ def test_convert_CROSSREF_section_with_spaces
111111
assert_equal para("<a href=\"C1.html#public-methods\">Public Methods at <code>C1</code></a>"), result
112112
end
113113

114+
def test_convert_CROSSREF_legacy_label
115+
result = @to.convert 'C1@What-27s+Here'
116+
assert_equal para("<a href=\"C1.html#class-c1-whats-here\">What\u2019s Here at <code>C1</code></a>"), result
117+
end
118+
119+
def test_convert_CROSSREF_legacy_label_colon
120+
result = @to.convert 'C1@Foo-3A-3ABar'
121+
assert_equal para("<a href=\"C1.html#class-c1-foobar\">Foo::Bar at <code>C1</code></a>"), result
122+
end
123+
124+
def test_convert_CROSSREF_legacy_section
125+
@c1.add_section "What's Here"
126+
127+
result = @to.convert "C1@What-27s+Here"
128+
assert_equal para("<a href=\"C1.html#whats-here\">What\u2019s Here at <code>C1</code></a>"), result
129+
end
130+
114131
def test_convert_CROSSREF_constant
115132
result = @to.convert 'C1::CONST'
116133

test/rdoc/markup/to_html_test.rb

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,56 @@ def test_accept_heading_pipe
360360
assert_equal "\n<h1 id=\"hello\">Hello</h1>\n", @to.res.join
361361
end
362362

363+
def test_accept_heading_duplicate
364+
@to.start_accepting
365+
366+
@to.accept_heading @RM::Heading.new(2, 'Hello')
367+
@to.accept_heading @RM::Heading.new(2, 'Hello')
368+
369+
result = @to.res.join
370+
assert_match(/<h2 id="hello">/, result)
371+
assert_match(/<h2 id="hello-1">/, result)
372+
assert_match(/id="label-Hello" class="legacy-anchor"/, result)
373+
assert_match(/id="label-Hello-1" class="legacy-anchor"/, result)
374+
end
375+
376+
def test_accept_heading_duplicate_punctuation_collision
377+
@to.start_accepting
378+
379+
@to.accept_heading @RM::Heading.new(2, 'Method match')
380+
@to.accept_heading @RM::Heading.new(2, 'Method match?')
381+
382+
result = @to.res.join
383+
assert_match(/<h2 id="method-match">/, result)
384+
assert_match(/<h2 id="method-match-1">/, result)
385+
end
386+
387+
def test_accept_heading_three_duplicates
388+
@to.start_accepting
389+
390+
@to.accept_heading @RM::Heading.new(2, 'Hello')
391+
@to.accept_heading @RM::Heading.new(2, 'Hello')
392+
@to.accept_heading @RM::Heading.new(2, 'Hello')
393+
394+
result = @to.res.join
395+
assert_match(/<h2 id="hello">/, result)
396+
assert_match(/<h2 id="hello-1">/, result)
397+
assert_match(/<h2 id="hello-2">/, result)
398+
end
399+
400+
def test_accept_heading_dedup_resets_on_start_accepting
401+
@to.start_accepting
402+
@to.accept_heading @RM::Heading.new(2, 'Hello')
403+
@to.accept_heading @RM::Heading.new(2, 'Hello')
404+
405+
@to.start_accepting
406+
@to.accept_heading @RM::Heading.new(2, 'Hello')
407+
408+
result = @to.res.join
409+
assert_match(/<h2 id="hello">/, result)
410+
refute_match(/id="hello-1"/, result)
411+
end
412+
363413
def test_accept_paragraph_newline
364414
hellos = ["hello", "\u{393 3b5 3b9 3ac} \u{3c3 3bf 3c5}"]
365415
worlds = ["world", "\u{3ba 3cc 3c3 3bc 3bf 3c2}"]

test/rdoc/markup/to_label_test.rb

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,4 +111,37 @@ def test_convert_tt
111111
assert_equal 'tt', @to.convert('<tt>tt</tt>')
112112
end
113113

114+
def test_decode_legacy_label
115+
# [input, expected] pairs grouped by behavior:
116+
#
117+
# Legacy encoded characters are decoded
118+
[
119+
["What-27s+Here", "What's Here"], # -27 = apostrophe
120+
["Foo-3A-3ABar", "Foo::Bar"], # -3A = colon
121+
["a-2Bb", "a+b"], # -2B = plus sign
122+
["foo+-25W+bar", "foo %W bar"], # -25 = percent, + = space
123+
["foo+bar", "foo bar"], # + = space
124+
["Whats-Here", "Whats-Here"], # New-format labels pass through unchanged
125+
# -4F matches the regex (first digit 0-7) but decodes to 'O' (alphanumeric),
126+
# so the alphanumeric guard leaves it as literal
127+
["class-4Fther", "class-4Fther"],
128+
# Lowercase hex patterns are not decoded (CGI.escape only produces uppercase)
129+
["a-3a-test", "a-3a-test"],
130+
# -FE is outside ASCII range (0x00-0x7F), first digit must be 0-7
131+
["x-FEy", "x-FEy"],
132+
].each do |input, expected|
133+
assert_equal expected, RDoc::Text.decode_legacy_label(input),
134+
"decode_legacy_label(#{input.inspect})"
135+
end
136+
end
137+
138+
def test_decode_legacy_label_round_trip
139+
# Verify that legacy-encoded labels produce the same anchor as direct conversion
140+
["What's Here", "Foo::Bar", "a + b", "Hello World"].each do |heading|
141+
legacy = CGI.escape(heading).gsub('%', '-').sub(/^-/, '')
142+
decoded = RDoc::Text.decode_legacy_label(legacy)
143+
assert_equal RDoc::Text.to_anchor(heading), RDoc::Text.to_anchor(decoded),
144+
"Round-trip failed for heading: #{heading.inspect}"
145+
end
146+
end
114147
end

0 commit comments

Comments
 (0)