Skip to content

Commit c59a7a8

Browse files
authored
Fix markdown table parser consuming lines without pipes as table rows (#1626)
The TableRow rule in the PEG grammar allowed rows with zero pipe characters (`TableItem2*`). This caused lines like `<br>` immediately following a table to be parsed as single-cell table rows, producing spurious `<td><br></td>` in the rendered HTML. Change `TableItem2*` to `TableItem2+` so that rows not starting with `|` must contain at least one `|` to be recognized as table rows.
1 parent 92b07d6 commit c59a7a8

File tree

3 files changed

+69
-18
lines changed

3 files changed

+69
-18
lines changed

lib/rdoc/markdown.kpeg

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1270,13 +1270,13 @@ Table = &{ github? }
12701270
TableHead = TableItem2+:items "|"? @Newline
12711271
{ items }
12721272

1273-
TableRow = ( ( TableItem:item1 TableItem2*:items { [item1, *items] } ):row | TableItem2+:row ) "|"? @Newline
1273+
TableRow = ( ( TableItem:item1 TableItem2+:items { [item1, *items] } ):row | TableItem2+:row ) "|"? @Newline
12741274
{ row }
12751275
TableItem2 = "|" TableItem
12761276
TableItem = < /(?:\\.|[^|\n])+/ >
12771277
{ text.strip.gsub(/\\([|])/, '\1') }
12781278

1279-
TableLine = ( ( TableAlign:align1 TableAlign2*:aligns {[align1, *aligns] } ):line | TableAlign2+:line ) "|"? @Newline
1279+
TableLine = ( ( TableAlign:align1 TableAlign2+:aligns {[align1, *aligns] } ):line | TableAlign2+:line ) "|"? @Newline
12801280
{ line }
12811281
TableAlign2 = "|" @Sp TableAlign
12821282
TableAlign = < /:?-+:?/ > @Sp

lib/rdoc/markdown.rb

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15937,7 +15937,7 @@ def _TableHead
1593715937
return _tmp
1593815938
end
1593915939

15940-
# TableRow = ((TableItem:item1 TableItem2*:items { [item1, *items] }):row | TableItem2+:row) "|"? @Newline { row }
15940+
# TableRow = ((TableItem:item1 TableItem2+:items { [item1, *items] }):row | TableItem2+:row) "|"? @Newline { row }
1594115941
def _TableRow
1594215942

1594315943
_save = self.pos
@@ -15954,14 +15954,21 @@ def _TableRow
1595415954
self.pos = _save2
1595515955
break
1595615956
end
15957+
_save3 = self.pos
1595715958
_ary = []
15958-
while true
15959-
_tmp = apply(:_TableItem2)
15960-
_ary << @result if _tmp
15961-
break unless _tmp
15959+
_tmp = apply(:_TableItem2)
15960+
if _tmp
15961+
_ary << @result
15962+
while true
15963+
_tmp = apply(:_TableItem2)
15964+
_ary << @result if _tmp
15965+
break unless _tmp
15966+
end
15967+
_tmp = true
15968+
@result = _ary
15969+
else
15970+
self.pos = _save3
1596215971
end
15963-
_tmp = true
15964-
@result = _ary
1596515972
items = @result
1596615973
unless _tmp
1596715974
self.pos = _save2
@@ -16077,7 +16084,7 @@ def _TableItem
1607716084
return _tmp
1607816085
end
1607916086

16080-
# TableLine = ((TableAlign:align1 TableAlign2*:aligns {[align1, *aligns] }):line | TableAlign2+:line) "|"? @Newline { line }
16087+
# TableLine = ((TableAlign:align1 TableAlign2+:aligns {[align1, *aligns] }):line | TableAlign2+:line) "|"? @Newline { line }
1608116088
def _TableLine
1608216089

1608316090
_save = self.pos
@@ -16094,14 +16101,21 @@ def _TableLine
1609416101
self.pos = _save2
1609516102
break
1609616103
end
16104+
_save3 = self.pos
1609716105
_ary = []
16098-
while true
16099-
_tmp = apply(:_TableAlign2)
16100-
_ary << @result if _tmp
16101-
break unless _tmp
16106+
_tmp = apply(:_TableAlign2)
16107+
if _tmp
16108+
_ary << @result
16109+
while true
16110+
_tmp = apply(:_TableAlign2)
16111+
_ary << @result if _tmp
16112+
break unless _tmp
16113+
end
16114+
_tmp = true
16115+
@result = _ary
16116+
else
16117+
self.pos = _save3
1610216118
end
16103-
_tmp = true
16104-
@result = _ary
1610516119
aligns = @result
1610616120
unless _tmp
1610716121
self.pos = _save2
@@ -16666,10 +16680,10 @@ def _DefinitionListDefinition
1666616680
Rules[:_CodeFence] = rule_info("CodeFence", "&{ github? } Ticks3 (@Sp StrChunk:format)? Spnl < ((!\"`\" Nonspacechar)+ | !Ticks3 /`+/ | Spacechar | @Newline)+ > Ticks3 @Sp @Newline* { verbatim = RDoc::Markup::Verbatim.new text verbatim.format = format.intern if format.instance_of?(String) verbatim }")
1666716681
Rules[:_Table] = rule_info("Table", "&{ github? } TableHead:header TableLine:line TableRow+:body { table = RDoc::Markup::Table.new(header, line, body) parse_table_cells(table) }")
1666816682
Rules[:_TableHead] = rule_info("TableHead", "TableItem2+:items \"|\"? @Newline { items }")
16669-
Rules[:_TableRow] = rule_info("TableRow", "((TableItem:item1 TableItem2*:items { [item1, *items] }):row | TableItem2+:row) \"|\"? @Newline { row }")
16683+
Rules[:_TableRow] = rule_info("TableRow", "((TableItem:item1 TableItem2+:items { [item1, *items] }):row | TableItem2+:row) \"|\"? @Newline { row }")
1667016684
Rules[:_TableItem2] = rule_info("TableItem2", "\"|\" TableItem")
1667116685
Rules[:_TableItem] = rule_info("TableItem", "< /(?:\\\\.|[^|\\n])+/ > { text.strip.gsub(/\\\\([|])/, '\\1') }")
16672-
Rules[:_TableLine] = rule_info("TableLine", "((TableAlign:align1 TableAlign2*:aligns {[align1, *aligns] }):line | TableAlign2+:line) \"|\"? @Newline { line }")
16686+
Rules[:_TableLine] = rule_info("TableLine", "((TableAlign:align1 TableAlign2+:aligns {[align1, *aligns] }):line | TableAlign2+:line) \"|\"? @Newline { line }")
1667316687
Rules[:_TableAlign2] = rule_info("TableAlign2", "\"|\" @Sp TableAlign")
1667416688
Rules[:_TableAlign] = rule_info("TableAlign", "< /:?-+:?/ > @Sp { text.start_with?(\":\") ? (text.end_with?(\":\") ? :center : :left) : (text.end_with?(\":\") ? :right : nil) }")
1667516689
Rules[:_DefinitionList] = rule_info("DefinitionList", "&{ definition_lists? } DefinitionListItem+:list { RDoc::Markup::List.new :NOTE, *list.flatten }")

test/rdoc/rdoc_markdown_test.rb

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1310,6 +1310,43 @@ def test_gfm_table_with_links_and_code
13101310
assert_equal expected, doc
13111311
end
13121312

1313+
def test_gfm_table_does_not_consume_following_line_without_pipe
1314+
doc = parse <<~MD
1315+
| Command | Shows |
1316+
|---------|-------|
1317+
| ri File | Document for Ruby class File. |
1318+
<br>
1319+
1320+
Following paragraph.
1321+
MD
1322+
1323+
head = %w[Command Shows]
1324+
align = [nil, nil]
1325+
body = [
1326+
['ri File', 'Document for Ruby class File.'],
1327+
]
1328+
expected = doc(
1329+
@RM::Table.new(head, align, body),
1330+
para('<br>'),
1331+
para('Following paragraph.')
1332+
)
1333+
1334+
assert_equal expected, doc
1335+
end
1336+
1337+
def test_gfm_table_separator_without_pipe_does_not_form_table
1338+
doc = parse <<~MD
1339+
| Command | Shows |
1340+
---------
1341+
| ri File | Document for Ruby class File. |
1342+
MD
1343+
1344+
# The separator line has no pipe, so this should NOT parse as a table.
1345+
# "| Command | Shows |" becomes a paragraph, "---" becomes a paragraph,
1346+
# and "| ri File | ..." becomes a paragraph.
1347+
assert(doc.parts.none? { |part| part.is_a?(@RM::Table) })
1348+
end
1349+
13131350
def test_gfm_table_with_backslashes_in_code_spans
13141351
doc = parse <<~MD
13151352
Plain text: `$\\\\` and `$\\\\ ` should work.

0 commit comments

Comments
 (0)