Skip to content

Commit 0f040fe

Browse files
committed
Content: Tuned HTML purifier for our use
Tested it with a range of supported, including uncommon, content types and added support, or changed config, where needed. Been through docs for all HTMLPurifier options to assess what's relevant.
1 parent 10ebe53 commit 0f040fe

File tree

3 files changed

+104
-6
lines changed

3 files changed

+104
-6
lines changed

app/Entities/Tools/PageContent.php

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,12 +321,13 @@ public function render(bool $blankIncludes = false): string
321321
$cacheKey = $this->getContentCacheKey($doc->getBodyInnerHtml());
322322
$cached = cache()->get($cacheKey, null);
323323
if ($cached !== null) {
324-
return $cached;
324+
// return $cached;
325325
}
326326

327327
$filterConfig = HtmlContentFilterConfig::fromConfigString(config('app.content_filtering'));
328328
$filter = new HtmlContentFilter($filterConfig);
329329
$filtered = $filter->filterDocument($doc);
330+
// $filtered = $doc->getBodyInnerHtml();
330331

331332
$cacheTime = 86400 * 7; // 1 week
332333
cache()->put($cacheKey, $filtered, $cacheTime);
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
<?php
2+
3+
namespace BookStack\Util;
4+
5+
use HTMLPurifier;
6+
use HTMLPurifier_Config;
7+
use HTMLPurifier_HTML5Config;
8+
use HTMLPurifier_HTMLDefinition;
9+
10+
class ConfiguredHtmlPurifier
11+
{
12+
protected HTMLPurifier $purifier;
13+
14+
public function __construct()
15+
{
16+
$config = HTMLPurifier_HTML5Config::createDefault();
17+
$this->setConfig($config);
18+
19+
$htmlDef = $config->getDefinition('HTML', true, true);
20+
if ($htmlDef instanceof HTMLPurifier_HTMLDefinition) {
21+
$this->configureDefinition($htmlDef);
22+
}
23+
24+
$this->purifier = new HTMLPurifier($config);
25+
}
26+
27+
protected function setConfig(HTMLPurifier_Config $config): void
28+
{
29+
$config->set('Cache.SerializerPath', storage_path('purifier'));
30+
$config->set('CSS.AllowTricky', true);
31+
$config->set('HTML.SafeIframe', true);
32+
$config->set('Attr.EnableID', true);
33+
$config->set('Attr.ID.HTML5', true);
34+
$config->set('Output.FixInnerHTML', false);
35+
$config->set('URI.SafeIframeRegexp', '%^(http://|https://)%');
36+
$config->set('URI.AllowedSchemes', [
37+
'http' => true,
38+
'https' => true,
39+
'mailto' => true,
40+
'ftp' => true,
41+
'nntp' => true,
42+
'news' => true,
43+
'tel' => true,
44+
'file' => true,
45+
]);
46+
47+
$config->set('Cache.DefinitionImpl', null); // Disable cache during testing
48+
}
49+
50+
public function configureDefinition(HTMLPurifier_HTMLDefinition $definition): void
51+
{
52+
// Allow the object element
53+
$definition->addElement(
54+
'object',
55+
'Inline',
56+
'Flow',
57+
'Common',
58+
[
59+
'data' => 'URI',
60+
'type' => 'Text',
61+
'width' => 'Length',
62+
'height' => 'Length',
63+
]
64+
);
65+
66+
// Allow the embed element
67+
$definition->addElement(
68+
'embed',
69+
'Inline',
70+
'Empty',
71+
'Common',
72+
[
73+
'src' => 'URI',
74+
'type' => 'Text',
75+
'width' => 'Length',
76+
'height' => 'Length',
77+
]
78+
);
79+
80+
// Allow checkbox inputs
81+
$definition->addElement(
82+
'input',
83+
'Formctrl',
84+
'Empty',
85+
'Common',
86+
[
87+
'checked' => 'Bool#checked',
88+
'disabled' => 'Bool#disabled',
89+
'name' => 'Text',
90+
'readonly' => 'Bool#readonly',
91+
'type' => 'Enum#checkbox',
92+
'value' => 'Text',
93+
]
94+
);
95+
}
96+
97+
public function purify(string $html): string
98+
{
99+
return $this->purifier->purify($html);
100+
}
101+
}

app/Util/HtmlContentFilter.php

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
use DOMAttr;
66
use DOMElement;
77
use DOMNodeList;
8-
use HTMLPurifier;
9-
use HTMLPurifier_HTML5Config;
108

119
class HtmlContentFilter
1210
{
@@ -45,9 +43,7 @@ public function filterString(string $html): string
4543

4644
protected function applyAllowListFiltering(string $html): string
4745
{
48-
$config = HTMLPurifier_HTML5Config::createDefault();
49-
$config->set('Cache.SerializerPath', storage_path('purifier'));
50-
$purifier = new HTMLPurifier($config);
46+
$purifier = new ConfiguredHtmlPurifier();
5147
return $purifier->purify($html);
5248
}
5349

0 commit comments

Comments
 (0)