diff --git a/src/wikitextprocessor/node_expand.py b/src/wikitextprocessor/node_expand.py index 419932ec..825b7af0 100644 --- a/src/wikitextprocessor/node_expand.py +++ b/src/wikitextprocessor/node_expand.py @@ -139,7 +139,10 @@ def recurse(node: Union[GeneralNode, WikiNodeListArgs]) -> str: parts.append(recurse(node.children)) parts.append("\n|}\n") elif kind == NodeKind.TABLE_CAPTION: - parts.append("\n|+ {}\n".format(to_attrs(node))) + if tc_attrs := to_attrs(node): + parts.append("\n|+ {} |\n".format(tc_attrs)) + else: + parts.append("\n|+\n") parts.append(recurse(node.children)) elif kind == NodeKind.TABLE_ROW: parts.append("\n|- {}\n".format(to_attrs(node))) diff --git a/src/wikitextprocessor/parser.py b/src/wikitextprocessor/parser.py index 59b84dfd..a8fac10c 100644 --- a/src/wikitextprocessor/parser.py +++ b/src/wikitextprocessor/parser.py @@ -1414,7 +1414,8 @@ def table_start_fn(ctx: "Wtp", token: str) -> None: # something=other, something="other", something = 'other' attr_assignment_pair = ( - r"""\s*[^"'>/=\0-\037\s]+""" r"""\s*=\s*("[^"]*"|'[^']*'|[^"'<>`\s]+)""" + r"""\s*[^"'>/=\0-\037\s]+""" + r"""\s*=\s*("[^"]*"|'[^']*'|[^"'<>`\s]+)""" ) attr_assignments_re = re.compile( @@ -1428,7 +1429,7 @@ def check_for_attributes(ctx: "Wtp", node: WikiNode) -> tuple[bool, str]: # Old behavior added here to return earlier without needing # to use regex matching; if the old version worked, why not? - # If this fail, then resort to the reverse parsing + regex. + # If this fails, then resort to the reverse parsing + regex. _parser_merge_str_children(ctx) if len(node.children) == 1 and isinstance(node.children[0], str): ret = node.children.pop() @@ -1623,7 +1624,10 @@ def table_cell_fn(ctx: "Wtp", token: str) -> None: if len(node.children) == 1 and isinstance( attrs := node.children[0], str ): - node.children.pop() + # At this point of parsing, we're just behind the start + # of one of the above node types; if they are followed + # by a `|`, that means the first child is an attr section + node.children.pop(0) # Using the walrus operator and pop()ing without return # is just to make the type-checker happy without using # an assert that attrs is definitely a str... diff --git a/tests/test_node_expand.py b/tests/test_node_expand.py index 3fa1ba50..5b5f38db 100644 --- a/tests/test_node_expand.py +++ b/tests/test_node_expand.py @@ -154,7 +154,13 @@ def test_table2(self): self.backcvt('{| class="x"\n|}', '\n{| class="x"\n\n|}\n') def test_tablecaption1(self): - self.backcvt("{|\n|+\ncapt\n|}", "\n{| \n\n|+ \n\ncapt\n\n|}\n") + self.backcvt("{|\n|+\ncapt\n|}", "\n{| \n\n|+\n\ncapt\n\n|}\n") + + def test_tablecaption2(self): + self.backcvt( + "{|\n |+ class='foo' |\ncapt\n|}", + '\n{| \n\n|+ class="foo" |\n\ncapt\n\n|}\n', + ) def test_tablerowcell1(self): self.backcvt(