Skip to content

Commit 78f8aec

Browse files
authored
Fix block content leaking out of marker-line nested-list items (#251)
A nested list opened on its parent item's marker line (`- - A`) was treated as line-scoped: the inner list was materialized from an isolated single-line slice, so its item closed immediately. A block placed in that item then leaked out to the outer item, and a following same-indent marker fragmented the list into two. The reference djot.js 0.3.2 keeps the inner item open: it absorbs blocks indented to its content column and continues the list across following same-indent markers. djot-php now matches that. When an item's content itself begins with a list marker, collect the whole nested region (lines indented past the inner marker column, plus markers at it, across blank lines) into the item's lines and parse them as blocks. The existing recursive list parser then builds a persistent inner list - reusing the nested-list handling that already works for a sublist appearing on a following line, rather than adding a parallel path. A non-marker line at the inner marker column, or anything less indented, stays outer-item content. Cases now matching the reference: - `- - A\n\n block for A\n - B` -> inner item A keeps the block; B stays in the same inner list. - `- - A\n\n block under A` -> block stays inside inner item A (no leak to the outer item). - `- - A\n - B\n - C` -> single tight inner list [A, B, C] (unchanged). The paragraph-interrupt rule and bare-marker rejection are untouched. carve-php inherits this parser and was affected by the same bug.
1 parent 58a181a commit 78f8aec

2 files changed

Lines changed: 166 additions & 2 deletions

File tree

src/Parser/BlockParser.php

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2052,6 +2052,18 @@ protected function tryParseList(Node $parent, array $lines, int $start): ?int
20522052
/** @var string $itemContent */
20532053
$itemContent = $itemInfo['content'];
20542054

2055+
// A sublist opened on this item's marker line (`- - A`): the content
2056+
// itself begins with a list marker. Such an inner list must behave like
2057+
// any other nested container - its item stays open and absorbs blocks
2058+
// indented to the inner content column, and a following same-indent
2059+
// marker continues it. We achieve that by collecting the whole nested
2060+
// region into the item's lines below and parsing it as blocks, so the
2061+
// recursive list parser builds a persistent inner list (matching the
2062+
// reference djot.js). Definition lists keep their dedicated handling.
2063+
$markerLineSublist = $itemContent !== ''
2064+
&& ($subMarker = $this->listParser->parseListItemMarker($itemContent)) !== null
2065+
&& $subMarker['type'] !== ListBlock::TYPE_DEFINITION;
2066+
20552067
// Collect item content lines (without blank line = tight continuation)
20562068
/** @var array<string> $itemLines */
20572069
$itemLines = [$itemContent];
@@ -2141,6 +2153,73 @@ protected function tryParseList(Node $parent, array $lines, int $start): ?int
21412153
$i++;
21422154
}
21432155

2156+
// Marker-line sublist (`- - A`): pull the whole nested region into
2157+
// this item's lines so the recursive parse builds a persistent inner
2158+
// list. A line that belongs to the inner list is either indented past
2159+
// the inner marker column (`> $contentIndent` - inner item content or
2160+
// a deeper sublist), or a list marker sitting exactly at the inner
2161+
// marker column (`== $contentIndent` - a following/sibling item). A
2162+
// non-marker line at the inner marker column, or anything less
2163+
// indented, is outer-item (or higher-level) content and ends the
2164+
// region, left for the outer loop to attach correctly. Without this
2165+
// the outer "indented continuation" path would steal the inner item's
2166+
// blocks and attach them beside the inner list, on the outer item.
2167+
$markerLineSublistParsed = false;
2168+
if ($markerLineSublist) {
2169+
$j = $i;
2170+
$gathered = [];
2171+
$consumedThrough = $i;
2172+
while ($j < $count) {
2173+
$subLine = $lines[$j];
2174+
if (IndentationHelper::isBlankLine($subLine)) {
2175+
$gathered[] = '';
2176+
$j++;
2177+
2178+
continue;
2179+
}
2180+
$lineIndent = IndentationHelper::getLeadingSpaces($subLine);
2181+
if ($lineIndent < $contentIndent) {
2182+
break;
2183+
}
2184+
if (
2185+
$lineIndent === $contentIndent
2186+
&& $this->listParser->parseListItemMarker(ltrim($subLine)) === null
2187+
) {
2188+
// A non-marker line sitting exactly at the inner marker
2189+
// column is the inner item's content only when it lazily
2190+
// continues an open paragraph (the previously gathered line
2191+
// is non-blank). After a blank line, or with nothing open
2192+
// before it, it is outer-item content and ends the region.
2193+
$previousIsOpenParagraph = $gathered !== []
2194+
&& $gathered[array_key_last($gathered)] !== '';
2195+
if (!$previousIsOpenParagraph) {
2196+
break;
2197+
}
2198+
}
2199+
$gathered[] = IndentationHelper::stripLeadingIndent($subLine, $contentIndent);
2200+
$j++;
2201+
$consumedThrough = $j;
2202+
}
2203+
// Drop trailing blank lines collected past the last real content.
2204+
while ($gathered !== [] && $gathered[array_key_last($gathered)] === '') {
2205+
array_pop($gathered);
2206+
}
2207+
if ($gathered !== []) {
2208+
if ($itemLines !== [] && $itemLines[array_key_last($itemLines)] !== '') {
2209+
$itemLines[] = '';
2210+
}
2211+
foreach ($gathered as $gatheredLine) {
2212+
$itemLines[] = $gatheredLine;
2213+
}
2214+
// Resume after the last consumed content line, not past blank
2215+
// lines that precede outer content (the outer loop needs them
2216+
// to detect the loose-list boundary).
2217+
$i = $consumedThrough;
2218+
$hasNonMarkerContinuation = true;
2219+
$markerLineSublistParsed = true;
2220+
}
2221+
}
2222+
21442223
// Check for list item attributes on the next line.
21452224
//
21462225
// Rule: a standalone {...} line attaches to the <li> ONLY when it
@@ -2158,8 +2237,8 @@ protected function tryParseList(Node $parent, array $lines, int $start): ?int
21582237
$markerAttrsRaw = $itemInfo['attrs'];
21592238
$itemAttributes = AttributeParser::parseOrdered($markerAttrsRaw);
21602239
}
2161-
$parseItemLinesAsBlocks = false;
2162-
if ($i < $count) {
2240+
$parseItemLinesAsBlocks = $markerLineSublistParsed;
2241+
if (!$markerLineSublistParsed && $i < $count) {
21632242
$potentialAttrLine = $lines[$i];
21642243
$trimmedAttrLine = ltrim($potentialAttrLine);
21652244
if (

tests/TestCase/NestedListEdgeCasesTest.php

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -772,4 +772,89 @@ public function testListItemOverIndentedContinuationWithoutNestedMarker(): void
772772
$this->assertStringContainsString("a\nc", $result);
773773
$this->assertStringNotContainsString(' c', $result);
774774
}
775+
776+
// ============ Sublist opened on the parent item's marker line ============
777+
//
778+
// `- - A` opens a nested list on the OUTER item's marker line. That inner
779+
// list must behave like any other nested container: its item stays open,
780+
// absorbs a block indented to the inner content column, and a following
781+
// same-indent marker continues it. Output is pinned to the reference
782+
// djot.js 0.3.2. (carve-php inherits this parser, so it was affected too.)
783+
784+
public function testMarkerLineSublistItemAbsorbsBlockAndKeepsFollowingMarker(): void
785+
{
786+
// Case 1: the inner item A keeps the block indented to its content
787+
// column, and a following same-indent marker stays in the same list.
788+
$result = $this->converter->convert("- - A\n\n block for A\n - B");
789+
790+
$expected = <<<'HTML'
791+
<ul>
792+
<li>
793+
<ul>
794+
<li>
795+
<p>A</p>
796+
<p>block for A</p>
797+
</li>
798+
<li>
799+
<p>B</p>
800+
</li>
801+
</ul>
802+
</li>
803+
</ul>
804+
805+
HTML;
806+
807+
$this->assertSame($expected, $result);
808+
}
809+
810+
public function testMarkerLineSublistItemAbsorbsBlockWithoutLeak(): void
811+
{
812+
// Case 2: the block stays inside inner item A; it must not leak out to
813+
// the outer item.
814+
$result = $this->converter->convert("- - A\n\n block under A");
815+
816+
$expected = <<<'HTML'
817+
<ul>
818+
<li>
819+
<ul>
820+
<li>
821+
<p>A</p>
822+
<p>block under A</p>
823+
</li>
824+
</ul>
825+
</li>
826+
</ul>
827+
828+
HTML;
829+
830+
$this->assertSame($expected, $result);
831+
}
832+
833+
public function testMarkerLineSublistTightItemsStayOneList(): void
834+
{
835+
// Case 3 (regression guard): without an interleaved block, the markers
836+
// form a single tight inner list [A, B, C].
837+
$result = $this->converter->convert("- - A\n - B\n - C");
838+
839+
$expected = <<<'HTML'
840+
<ul>
841+
<li>
842+
<ul>
843+
<li>
844+
A
845+
</li>
846+
<li>
847+
B
848+
</li>
849+
<li>
850+
C
851+
</li>
852+
</ul>
853+
</li>
854+
</ul>
855+
856+
HTML;
857+
858+
$this->assertSame($expected, $result);
859+
}
775860
}

0 commit comments

Comments
 (0)