Skip to content

Commit f584070

Browse files
authored
security: harden URL schemes in non-HTML renderers (#255)
Follow-up to the always-on HTML hardening: the Markdown, plain-text and ANSI renderers emitted link destinations and image sources verbatim, so a dangerous scheme (javascript:/vbscript:/data:/file:) carried straight into a Markdown or terminal export - markup that gets rendered or clicked elsewhere. - Add Djot\Util\UrlSafety with the shared scheme denylist + sanitize helpers. - MarkdownRenderer / AnsiRenderer blank a dangerous link/image URL; PlainText falls back to the link text. Safe and relative URLs are untouched. - Refactor HtmlRenderer to delegate its baseline to UrlSafety (single source). This is the renderer-side fix for the carry-through; the HtmlToDjot importer is deliberately not a sanitizer (its output is re-rendered through these now-hardened renderers).
1 parent ff46886 commit f584070

6 files changed

Lines changed: 141 additions & 27 deletions

File tree

src/Renderer/AnsiRenderer.php

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
use Djot\Node\Inline\Text;
4949
use Djot\Node\Node;
5050
use Djot\Renderer\Utility\AbbreviationBudgetTrait;
51+
use Djot\Util\UrlSafety;
5152

5253
/**
5354
* Renders AST to ANSI-formatted terminal output
@@ -790,13 +791,13 @@ protected function renderCode(Code $node): string
790791
protected function renderLink(Link $node): string
791792
{
792793
$text = $this->renderChildren($node);
793-
$url = $node->getDestination();
794+
$url = UrlSafety::sanitize($node->getDestination() ?? '');
794795

795796
// OSC 8 hyperlink support (for terminals that support it)
796797
// Format: \033]8;;URL\033\\TEXT\033]8;;\033\\
797798
$styled = $this->style($text, self::UNDERLINE . self::FG_BLUE);
798799

799-
if ($url !== null && $url !== '' && $url !== $text) {
800+
if ($url !== '' && $url !== $text) {
800801
$styled .= $this->style(' (' . $url . ')', self::DIM);
801802
}
802803

src/Renderer/HtmlRenderer.php

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
use Djot\Renderer\Utility\EventDispatcherTrait;
5353
use Djot\SafeMode;
5454
use Djot\Util\StringUtil;
55+
use Djot\Util\UrlSafety;
5556

5657
/**
5758
* Renders AST to HTML
@@ -1111,14 +1112,6 @@ protected function getRenderableAttributes(Node $node, array $exclude = []): arr
11111112
return $attrs;
11121113
}
11131114

1114-
/**
1115-
* URL schemes that are always neutralized in attribute values and in
1116-
* `href` / `src`, regardless of safe mode.
1117-
*
1118-
* @var array<string>
1119-
*/
1120-
private const DANGEROUS_VALUE_SCHEMES = ['javascript', 'vbscript', 'data', 'file'];
1121-
11221115
/**
11231116
* Always-on attribute hardening, applied regardless of safe mode.
11241117
*
@@ -1152,12 +1145,8 @@ protected function sanitizeAttributes(array $attrs): array
11521145
*/
11531146
protected function sanitizeAttributeValue(string $name, string $value): string
11541147
{
1155-
$colon = strpos($value, ':');
1156-
if ($colon !== false) {
1157-
$scheme = strtolower((string)preg_replace('/[\x00-\x20]+/', '', substr($value, 0, $colon)));
1158-
if (in_array($scheme, self::DANGEROUS_VALUE_SCHEMES, true)) {
1159-
return '';
1160-
}
1148+
if (UrlSafety::hasDangerousScheme($value)) {
1149+
return '';
11611150
}
11621151
if ($name === 'style' && $this->hasDangerousCss($value)) {
11631152
return '';
@@ -1213,14 +1202,7 @@ static function (array $m): string {
12131202
*/
12141203
protected function sanitizeUrlBaseline(string $url): string
12151204
{
1216-
$probe = (string)preg_replace('/[\x00-\x20]+/', '', $url);
1217-
if (preg_match('/^([a-zA-Z][a-zA-Z0-9+.\-]*):/', $probe, $m) === 1) {
1218-
if (in_array(strtolower($m[1]), self::DANGEROUS_VALUE_SCHEMES, true)) {
1219-
return '';
1220-
}
1221-
}
1222-
1223-
return $url;
1205+
return UrlSafety::sanitize($url);
12241206
}
12251207

12261208
/**

src/Renderer/MarkdownRenderer.php

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
use Djot\Renderer\Utility\AbbreviationBudgetTrait;
5151
use Djot\Renderer\Utility\EventDispatcherTrait;
5252
use Djot\Util\StringUtil;
53+
use Djot\Util\UrlSafety;
5354

5455
/**
5556
* Renders AST to Markdown (CommonMark compatible where possible)
@@ -411,7 +412,7 @@ protected function renderCode(Code $node): string
411412
protected function renderLink(Link $node): string
412413
{
413414
$text = $this->renderChildren($node);
414-
$url = $node->getDestination();
415+
$url = UrlSafety::sanitize($node->getDestination() ?? '');
415416
$title = $node->getTitle();
416417

417418
if ($title !== null) {
@@ -424,7 +425,7 @@ protected function renderLink(Link $node): string
424425
protected function renderImage(Image $node): string
425426
{
426427
$alt = $node->getAlt();
427-
$src = $node->getSource();
428+
$src = UrlSafety::sanitize($node->getSource());
428429
$title = $node->getTitle();
429430

430431
if ($title !== null) {

src/Renderer/PlainTextRenderer.php

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
use Djot\Node\Inline\Text;
3939
use Djot\Node\Node;
4040
use Djot\Renderer\Utility\EventDispatcherTrait;
41+
use Djot\Util\UrlSafety;
4142

4243
/**
4344
* Renders AST to plain text
@@ -294,7 +295,12 @@ protected function renderFootnote(Footnote $node): string
294295

295296
protected function renderLink(Link $node): string
296297
{
297-
return $node->getDestination() ?? $this->renderChildren($node);
298+
$url = $node->getDestination();
299+
if ($url === null || UrlSafety::hasDangerousScheme($url)) {
300+
return $this->renderChildren($node);
301+
}
302+
303+
return $url;
298304
}
299305

300306
/**

src/Util/UrlSafety.php

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Djot\Util;
6+
7+
/**
8+
* Always-on URL scheme hardening shared by every renderer.
9+
*
10+
* A link destination or image source whose (normalized) scheme is on the
11+
* dangerous denylist (`javascript`, `vbscript`, `data`, `file`) is neutralized
12+
* regardless of safe mode or output format - there is no legitimate reason to
13+
* emit such a URL from untrusted markup, and a non-HTML export (Markdown, plain
14+
* text) carries it to wherever it is rendered next. Scheme detection strips C0
15+
* controls + spaces first to defeat `java\tscript:` style evasion.
16+
*/
17+
final class UrlSafety
18+
{
19+
/**
20+
* @var array<string>
21+
*/
22+
public const DANGEROUS_SCHEMES = ['javascript', 'vbscript', 'data', 'file'];
23+
24+
/**
25+
* True when the URL carries a dangerous denylist scheme. Scheme-less and
26+
* relative URLs, and every non-denylist scheme, return false.
27+
*/
28+
public static function hasDangerousScheme(string $url): bool
29+
{
30+
$probe = (string)preg_replace('/[\x00-\x20]+/', '', $url);
31+
if (preg_match('/^([a-zA-Z][a-zA-Z0-9+.\-]*):/', $probe, $m) === 1) {
32+
return in_array(strtolower($m[1]), self::DANGEROUS_SCHEMES, true);
33+
}
34+
35+
return false;
36+
}
37+
38+
/**
39+
* Blank a URL that carries a dangerous scheme; otherwise return it unchanged.
40+
*/
41+
public static function sanitize(string $url): string
42+
{
43+
return self::hasDangerousScheme($url) ? '' : $url;
44+
}
45+
}
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Djot\Test\TestCase\Renderer;
6+
7+
use Djot\DjotConverter;
8+
use Djot\Renderer\AnsiRenderer;
9+
use Djot\Renderer\MarkdownRenderer;
10+
use Djot\Renderer\PlainTextRenderer;
11+
use PHPUnit\Framework\Attributes\DataProvider;
12+
use PHPUnit\Framework\TestCase;
13+
14+
/**
15+
* Non-HTML renderers must not carry a dangerous URL scheme into their output.
16+
*
17+
* The HTML renderer blanks `javascript:` / `vbscript:` / `data:` / `file:` URLs,
18+
* but a Markdown / plain-text / ANSI export of the same document is markup that
19+
* gets rendered (or clicked) somewhere else - so the same denylist applies to
20+
* every output format, not just HTML.
21+
*/
22+
class NonHtmlUrlHardeningTest extends TestCase
23+
{
24+
/**
25+
* @return array<string, array{0: string}>
26+
*/
27+
public static function dangerousUrlProvider(): array
28+
{
29+
return [
30+
'javascript' => ['javascript:alert(1)'],
31+
'vbscript' => ['vbscript:msgbox(1)'],
32+
'data' => ['data:text/html,<script>alert(1)</script>'],
33+
'file' => ['file:///etc/passwd'],
34+
'evasion' => ["java\tscript:alert(1)"],
35+
];
36+
}
37+
38+
#[DataProvider('dangerousUrlProvider')]
39+
public function testMarkdownBlanksDangerousLinkUrl(string $url): void
40+
{
41+
$converter = DjotConverter::create(renderer: new MarkdownRenderer());
42+
$result = $converter->convert('[x](' . $url . ')');
43+
44+
$this->assertStringNotContainsString('script:', $result);
45+
$this->assertStringNotContainsString('data:', $result);
46+
$this->assertStringNotContainsString('file:', $result);
47+
$this->assertStringContainsString('[x]()', $result);
48+
}
49+
50+
public function testMarkdownBlanksDangerousImageUrl(): void
51+
{
52+
$converter = DjotConverter::create(renderer: new MarkdownRenderer());
53+
$this->assertStringContainsString('![a]()', $converter->convert('![a](data:text/html,x)'));
54+
}
55+
56+
public function testMarkdownPreservesSafeUrl(): void
57+
{
58+
$converter = DjotConverter::create(renderer: new MarkdownRenderer());
59+
$this->assertStringContainsString('[x](https://ok.example)', $converter->convert('[x](https://ok.example)'));
60+
}
61+
62+
public function testPlainTextDropsDangerousUrlToLinkText(): void
63+
{
64+
$converter = DjotConverter::create(renderer: new PlainTextRenderer());
65+
$result = $converter->convert('[label](javascript:alert(1))');
66+
67+
$this->assertStringNotContainsString('javascript', $result);
68+
$this->assertStringContainsString('label', $result);
69+
}
70+
71+
public function testAnsiDropsDangerousUrl(): void
72+
{
73+
$converter = DjotConverter::create(renderer: new AnsiRenderer());
74+
$plain = preg_replace('/\033\[[0-9;]*m/', '', $converter->convert('[label](vbscript:x)')) ?? '';
75+
76+
$this->assertStringNotContainsString('vbscript', $plain);
77+
$this->assertStringContainsString('label', $plain);
78+
}
79+
}

0 commit comments

Comments
 (0)