Skip to content

Commit 39c56cc

Browse files
authored
Fix renderer content loss in non-HTML renderers (markdown/plain/ansi) (#246)
Content vanished in the markdown / plain / ansi renderers (HTML was fine): - EscapedText (e.g. an escaped literal like a backslash-star) was dropped in all three: the node is not a Text subclass, so it fell through to renderChildren() and produced nothing. Added an explicit arm in each renderer. Markdown keeps the leading backslash so the literal stays literal when re-parsed; plain/ansi emit the raw content. - MarkdownRenderer had no Figure / Caption arms, so a captioned image glued the caption to the image; and no Abbreviation arm, so the title was lost. Added renderFigure (caption on its own block), renderCaption, and renderAbbreviation (inline abbr HTML, mirroring how sub/sup fall back to inline HTML). - A Div's title attribute (Djot attribute syntax title="...") was dropped in all three non-HTML renderers' renderDiv. Preserve it as a leading bold/plain line. Ported from carve-php commit 217b72f.
1 parent 72c4591 commit 39c56cc

4 files changed

Lines changed: 148 additions & 3 deletions

File tree

src/Renderer/AnsiRenderer.php

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
use Djot\Node\Inline\Code;
3131
use Djot\Node\Inline\Delete;
3232
use Djot\Node\Inline\Emphasis;
33+
use Djot\Node\Inline\EscapedText;
3334
use Djot\Node\Inline\FootnoteRef;
3435
use Djot\Node\Inline\HardBreak;
3536
use Djot\Node\Inline\Highlight;
@@ -403,6 +404,7 @@ protected function renderNode(Node $node): string
403404
$node instanceof LineBlock => $this->renderLineBlock($node),
404405
$node instanceof Footnote => $this->renderFootnote($node),
405406
$node instanceof Text => $node->getContent(),
407+
$node instanceof EscapedText => $node->getContent(),
406408
$node instanceof Abbreviation => $this->renderAbbreviation($node),
407409
$node instanceof Emphasis => $this->renderEmphasis($node),
408410
$node instanceof Strong => $this->renderStrong($node),
@@ -608,7 +610,16 @@ protected function renderThematicBreak(): string
608610

609611
protected function renderDiv(Div $node): string
610612
{
611-
return $this->renderChildren($node);
613+
$body = $this->renderChildren($node);
614+
// A Div's quoted title (e.g. an admonition title carried as the `title`
615+
// attribute) is preserved as a leading bold line instead of being
616+
// dropped.
617+
$title = $node->getAttribute('title');
618+
if (is_string($title) && $title !== '') {
619+
return $this->style($title, self::BOLD) . "\n\n" . $body;
620+
}
621+
622+
return $body;
612623
}
613624

614625
protected function renderTable(Table $node): string

src/Renderer/MarkdownRenderer.php

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@
66

77
use Djot\Event\RenderEvent;
88
use Djot\Node\Block\BlockQuote;
9+
use Djot\Node\Block\Caption;
910
use Djot\Node\Block\CodeBlock;
1011
use Djot\Node\Block\Comment;
1112
use Djot\Node\Block\DefinitionDescription;
1213
use Djot\Node\Block\DefinitionList;
1314
use Djot\Node\Block\DefinitionTerm;
1415
use Djot\Node\Block\Div;
16+
use Djot\Node\Block\Figure;
1517
use Djot\Node\Block\Footnote;
1618
use Djot\Node\Block\Heading;
1719
use Djot\Node\Block\LineBlock;
@@ -24,9 +26,11 @@
2426
use Djot\Node\Block\TableRow;
2527
use Djot\Node\Block\ThematicBreak;
2628
use Djot\Node\Document;
29+
use Djot\Node\Inline\Abbreviation;
2730
use Djot\Node\Inline\Code;
2831
use Djot\Node\Inline\Delete;
2932
use Djot\Node\Inline\Emphasis;
33+
use Djot\Node\Inline\EscapedText;
3034
use Djot\Node\Inline\FootnoteRef;
3135
use Djot\Node\Inline\HardBreak;
3236
use Djot\Node\Inline\Highlight;
@@ -140,6 +144,14 @@ protected function renderNode(Node $node): string
140144
$node instanceof LineBlock => $this->renderLineBlock($node),
141145
$node instanceof Footnote => $this->renderFootnote($node),
142146
$node instanceof Text => $this->escapeText($node->getContent()),
147+
// Keep the backslash so the literal stays literal when re-parsed as
148+
// Markdown: a bare `.` from `\.` would turn `1\. x` back into an
149+
// ordered list. EscapedText only ever holds escaped ASCII
150+
// punctuation, all of which CommonMark allows a `\` before.
151+
$node instanceof EscapedText => '\\' . $node->getContent(),
152+
$node instanceof Figure => $this->renderFigure($node),
153+
$node instanceof Caption => $this->renderCaption($node),
154+
$node instanceof Abbreviation => $this->renderAbbreviation($node),
143155
$node instanceof Emphasis => $this->renderEmphasis($node),
144156
$node instanceof Strong => $this->renderStrong($node),
145157
$node instanceof Code => $this->renderCode($node),
@@ -287,8 +299,17 @@ protected function renderDefinitionDescription(DefinitionDescription $node): str
287299

288300
protected function renderDiv(Div $node): string
289301
{
290-
// Divs don't exist in Markdown, just render content
291-
return $this->renderChildren($node);
302+
// Divs/admonitions don't exist in Markdown; render the content. A Div's
303+
// quoted title (e.g. an admonition title carried as the `title`
304+
// attribute) would otherwise be lost - preserve it as a leading bold
305+
// line.
306+
$body = $this->renderChildren($node);
307+
$title = $node->getAttribute('title');
308+
if (is_string($title) && $title !== '') {
309+
return '**' . $this->escapeText($title) . "**\n\n" . $body;
310+
}
311+
312+
return $body;
292313
}
293314

294315
protected function renderTable(Table $node): string
@@ -475,6 +496,48 @@ protected function renderRawInline(RawInline $node): string
475496
return '';
476497
}
477498

499+
/**
500+
* A figure renders its target then its caption as a separate block
501+
* (Markdown has no figure element). A BLANK line before the caption is
502+
* required, not just a newline: against a block-quote target a single
503+
* newline would make the caption a lazy continuation of the quote and
504+
* swallow it.
505+
*/
506+
protected function renderFigure(Figure $node): string
507+
{
508+
$output = '';
509+
foreach ($node->getChildren() as $child) {
510+
if ($child instanceof Caption) {
511+
$output = rtrim($output) . "\n\n" . $this->renderCaption($child);
512+
} else {
513+
$output .= $this->renderNode($child);
514+
}
515+
}
516+
517+
return $output;
518+
}
519+
520+
protected function renderCaption(Caption $node): string
521+
{
522+
return trim($this->renderChildren($node)) . "\n\n";
523+
}
524+
525+
/**
526+
* Markdown has no abbreviation syntax; emit inline abbr HTML so the title
527+
* is preserved (mirrors how subscript/superscript fall back to inline HTML).
528+
*/
529+
protected function renderAbbreviation(Abbreviation $node): string
530+
{
531+
// The whole element is raw inline HTML, so both the title (attribute)
532+
// and the text (element content) need HTML escaping, NOT Markdown text
533+
// escaping: a `"` in the title or a `<` in the text would otherwise
534+
// break the tag / be misparsed as markup downstream.
535+
$title = htmlspecialchars($node->getTitle(), ENT_QUOTES, 'UTF-8');
536+
$text = htmlspecialchars($this->renderChildren($node), ENT_QUOTES, 'UTF-8');
537+
538+
return '<abbr title="' . $title . '">' . $text . '</abbr>';
539+
}
540+
478541
protected function escapeText(string $text): string
479542
{
480543
// Escape special Markdown characters in text

src/Renderer/PlainTextRenderer.php

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
use Djot\Node\Block\DefinitionDescription;
1212
use Djot\Node\Block\DefinitionList;
1313
use Djot\Node\Block\DefinitionTerm;
14+
use Djot\Node\Block\Div;
1415
use Djot\Node\Block\Footnote;
1516
use Djot\Node\Block\Heading;
1617
use Djot\Node\Block\LineBlock;
@@ -25,6 +26,7 @@
2526
use Djot\Node\Document;
2627
use Djot\Node\Inline\Code;
2728
use Djot\Node\Inline\Delete;
29+
use Djot\Node\Inline\EscapedText;
2830
use Djot\Node\Inline\FootnoteRef;
2931
use Djot\Node\Inline\HardBreak;
3032
use Djot\Node\Inline\Image;
@@ -114,6 +116,7 @@ protected function renderNode(Node $node): string
114116

115117
return match (true) {
116118
$node instanceof Document => $this->renderChildren($node),
119+
$node instanceof Div => $this->renderDiv($node),
117120
$node instanceof Paragraph => $this->renderParagraph($node),
118121
$node instanceof Heading => $this->renderHeading($node),
119122
$node instanceof CodeBlock => $this->renderCodeBlock($node),
@@ -132,6 +135,7 @@ protected function renderNode(Node $node): string
132135
$node instanceof LineBlock => $this->renderLineBlock($node),
133136
$node instanceof Footnote => $this->renderFootnote($node),
134137
$node instanceof Text => $node->getContent(),
138+
$node instanceof EscapedText => $node->getContent(),
135139
$node instanceof Code => $node->getContent(),
136140
$node instanceof Math => $node->getContent(),
137141
$node instanceof Image => $node->getAlt(),
@@ -161,6 +165,19 @@ protected function renderParagraph(Paragraph $node): string
161165
return $this->renderChildren($node) . "\n\n";
162166
}
163167

168+
protected function renderDiv(Div $node): string
169+
{
170+
$body = $this->renderChildren($node);
171+
// A Div's quoted title (e.g. an admonition title carried as the `title`
172+
// attribute) is preserved as a leading line instead of being dropped.
173+
$title = $node->getAttribute('title');
174+
if (is_string($title) && $title !== '') {
175+
return $title . "\n\n" . $body;
176+
}
177+
178+
return $body;
179+
}
180+
164181
protected function renderHeading(Heading $node): string
165182
{
166183
return $this->renderChildren($node) . "\n\n";
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Djot\Test\TestCase\Renderer;
6+
7+
use Djot\DjotConverter;
8+
use PHPUnit\Framework\TestCase;
9+
10+
/**
11+
* Renderer content loss: the non-HTML renderers (markdown/plain/ansi) must not
12+
* silently drop content the HTML renderer keeps.
13+
*
14+
* Ported from carve-php commit 217b72f.
15+
*/
16+
class RendererContentLossTest extends TestCase
17+
{
18+
public function testEscapedTextIsNotDroppedInNonHtmlRenderers(): void
19+
{
20+
$md = DjotConverter::markdown()->convert('a \*lit\* b');
21+
$this->assertStringContainsString('lit', $md);
22+
$this->assertStringContainsString('*', $md);
23+
24+
$plain = DjotConverter::plainText()->convert('a \*lit\* b');
25+
$this->assertStringContainsString('*lit*', $plain);
26+
27+
$ansi = DjotConverter::ansi()->convert('a \*lit\* b');
28+
$this->assertStringContainsString('*lit*', $ansi);
29+
}
30+
31+
public function testAbbreviationTitlePreservedInMarkdown(): void
32+
{
33+
$md = DjotConverter::markdown()->convert("The HTML spec.\n\n*[HTML]: HyperText Markup Language");
34+
$this->assertStringContainsString('HyperText Markup Language', $md);
35+
}
36+
37+
public function testFigureCaptionNotGluedInMarkdown(): void
38+
{
39+
$md = DjotConverter::markdown()->convert("![a](i.png)\n^ Cap text");
40+
// caption sits on its own line, not glued to the image
41+
$this->assertStringNotContainsString('i.png)Cap', $md);
42+
$this->assertStringContainsString('Cap text', $md);
43+
}
44+
45+
public function testDivTitlePreservedInNonHtmlRenderers(): void
46+
{
47+
// A Div carries a `title` attribute via Djot's attribute syntax; the
48+
// non-HTML renderers must surface it as a leading line, not drop it.
49+
$src = "{title=\"Heads up\"}\n:::\nbody\n:::";
50+
$this->assertStringContainsString('Heads up', DjotConverter::markdown()->convert($src));
51+
$this->assertStringContainsString('Heads up', DjotConverter::plainText()->convert($src));
52+
$this->assertStringContainsString('Heads up', DjotConverter::ansi()->convert($src));
53+
}
54+
}

0 commit comments

Comments
 (0)