Skip to content

Commit cf762e7

Browse files
Merge pull request #408 from Yorwba/empty-first-table-cell
Correctly parse table with first cell of a row being empty
2 parents a050b89 + 44d1268 commit cf762e7

2 files changed

Lines changed: 42 additions & 25 deletions

File tree

src/wikitextprocessor/parser.py

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1607,23 +1607,23 @@ def table_cell_fn(ctx: "Wtp", token: str) -> None:
16071607
# data cells
16081608
_parser_merge_str_children(ctx)
16091609
node = ctx.parser_stack[-1]
1610-
if (
1611-
not node.attrs
1612-
and len(node.children) == 1
1613-
and isinstance(attrs := node.children[0], str)
1610+
if node.kind in (
1611+
NodeKind.TABLE_CAPTION,
1612+
NodeKind.TABLE_HEADER_CELL,
1613+
NodeKind.TABLE_CELL,
16141614
):
1615-
if node.kind in (
1616-
NodeKind.TABLE_CAPTION,
1617-
NodeKind.TABLE_HEADER_CELL,
1618-
NodeKind.TABLE_CELL,
1619-
):
1620-
node.children.pop()
1621-
# Using the walrus operator and pop()ing without return
1622-
# is just to make the type-checker happy without using
1623-
# an assert that attrs is definitely a str...
1624-
parse_attrs(node, attrs)
1615+
if len(node.attrs) == 0:
1616+
if len(node.children) == 1 and isinstance(
1617+
attrs := node.children[0], str
1618+
):
1619+
node.children.pop()
1620+
# Using the walrus operator and pop()ing without return
1621+
# is just to make the type-checker happy without using
1622+
# an assert that attrs is definitely a str...
1623+
parse_attrs(node, attrs)
16251624
return
1626-
return text_fn(ctx, token)
1625+
else:
1626+
return text_fn(ctx, token)
16271627

16281628
while True:
16291629
node = ctx.parser_stack[-1]
@@ -1676,16 +1676,6 @@ def double_vbar_fn(ctx: "Wtp", token: str) -> None:
16761676
vbar_fn(ctx, "|")
16771677
return
16781678

1679-
# If it is at the beginning of a line, interpret it as starting a new
1680-
# cell, without any HTML attributes. We do this by emitting one vbar.
1681-
if ctx.beginning_of_line and ctx.begline_enabled:
1682-
if _parser_have(ctx, NodeKind.TABLE):
1683-
vbar_fn(ctx, "|")
1684-
else:
1685-
vbar_fn(ctx, "|")
1686-
vbar_fn(ctx, "|")
1687-
return
1688-
16891679
while True:
16901680
node = ctx.parser_stack[-1]
16911681
if node.kind == NodeKind.TABLE_ROW:
@@ -2147,6 +2137,7 @@ def magicword_fn(ctx: "Wtp", token: str) -> None:
21472137
r"!!",
21482138
r"\s*https?://[\w.-]+(/[^][{}<>|\s]*)?",
21492139
r"^[ \t]*!",
2140+
r"^\|",
21502141
r"\|\|",
21512142
r"\|",
21522143
r"^----+",

tests/test_parser.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2101,6 +2101,9 @@ def test_table_hdr4(self):
21012101
)
21022102
row = tree.children[0].children[0]
21032103
self.assertEqual(len(row.children), 3)
2104+
self.assertEqual(row.children[0].children, ["bar\n"])
2105+
self.assertEqual(row.children[1].children, ["baz\n"])
2106+
self.assertEqual(row.children[2].children, [" zap\n"])
21042107

21052108
def test_table_bang1(self):
21062109
# Testing that the single exclamation mark in the middle of a table
@@ -2115,6 +2118,29 @@ def test_table_bang1(self):
21152118
self.assertEqual(self.ctx.warnings, [])
21162119
self.assertEqual(self.ctx.debugs, [])
21172120

2121+
def test_table_triple_vbar(self):
2122+
# Testing parsing for an empty cell at the beginning of the row.
2123+
# en edition page "돌아가시다", Template:ko-conj/verb
2124+
tree = self.parse(
2125+
"test",
2126+
"""{|
2127+
|-
2128+
|||foo
2129+
|}""",
2130+
)
2131+
self.assertEqual(len(tree.children), 1)
2132+
t = tree.children[0]
2133+
self.assertEqual(t.kind, NodeKind.TABLE)
2134+
self.assertEqual(len(t.children), 1)
2135+
row = t.children[0]
2136+
self.assertEqual(row.kind, NodeKind.TABLE_ROW)
2137+
self.assertEqual(len(row.children), 2)
2138+
a, b = row.children
2139+
self.assertEqual(a.kind, NodeKind.TABLE_CELL)
2140+
self.assertEqual(a.children, [])
2141+
self.assertEqual(b.kind, NodeKind.TABLE_CELL)
2142+
self.assertEqual(b.children, ["foo\n"])
2143+
21182144
def test_error1(self):
21192145
self.parse("test", "'''")
21202146
self.assertEqual(self.ctx.errors, [])

0 commit comments

Comments
 (0)