Skip to content

Commit c7d3775

Browse files
committed
Plain text: Created a new HTML to plain text converter
To centralise logic to be more consistent, and to have smarter logic which avoids just following newline format from input, preventing smushing HTML elements (like list elements) next to eachother
1 parent 25790fd commit c7d3775

File tree

7 files changed

+125
-5
lines changed

7 files changed

+125
-5
lines changed

app/Activity/Models/Comment.php

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
use BookStack\Users\Models\OwnableInterface;
1010
use BookStack\Util\HtmlContentFilter;
1111
use BookStack\Util\HtmlContentFilterConfig;
12+
use BookStack\Util\HtmlToPlainText;
1213
use Illuminate\Database\Eloquent\Builder;
1314
use Illuminate\Database\Eloquent\Factories\HasFactory;
1415
use Illuminate\Database\Eloquent\Relations\BelongsTo;
@@ -87,6 +88,12 @@ public function safeHtml(): string
8788
return $filter->filterString($this->html ?? '');
8889
}
8990

91+
public function getPlainText(): string
92+
{
93+
$converter = new HtmlToPlainText();
94+
return $converter->convert($this->html ?? '');
95+
}
96+
9097
public function jointPermissions(): HasMany
9198
{
9299
return $this->hasMany(JointPermission::class, 'entity_id', 'commentable_id')

app/Activity/Notifications/Messages/CommentCreationNotification.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ public function toMail(User $notifiable): MailMessage
2424
$locale->trans('notifications.detail_page_name') => new EntityLinkMessageLine($page),
2525
$locale->trans('notifications.detail_page_path') => $this->buildPagePathLine($page, $notifiable),
2626
$locale->trans('notifications.detail_commenter') => $this->user->name,
27-
$locale->trans('notifications.detail_comment') => strip_tags($comment->html),
27+
$locale->trans('notifications.detail_comment') => $comment->getPlainText(),
2828
]);
2929

3030
return $this->newMailMessage($locale)

app/Activity/Notifications/Messages/CommentMentionNotification.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ public function toMail(User $notifiable): MailMessage
2424
$locale->trans('notifications.detail_page_name') => new EntityLinkMessageLine($page),
2525
$locale->trans('notifications.detail_page_path') => $this->buildPagePathLine($page, $notifiable),
2626
$locale->trans('notifications.detail_commenter') => $this->user->name,
27-
$locale->trans('notifications.detail_comment') => strip_tags($comment->html),
27+
$locale->trans('notifications.detail_comment') => $comment->getPlainText(),
2828
]);
2929

3030
return $this->newMailMessage($locale)

app/Entities/Repos/BaseRepo.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
use BookStack\Sorting\BookSorter;
1717
use BookStack\Uploads\ImageRepo;
1818
use BookStack\Util\HtmlDescriptionFilter;
19+
use BookStack\Util\HtmlToPlainText;
1920
use Illuminate\Http\UploadedFile;
2021

2122
class BaseRepo
@@ -151,9 +152,10 @@ protected function updateDescription(Entity $entity, array $input): void
151152
}
152153

153154
if (isset($input['description_html'])) {
155+
$plainTextConverter = new HtmlToPlainText();
154156
$entity->descriptionInfo()->set(
155157
HtmlDescriptionFilter::filterFromString($input['description_html']),
156-
html_entity_decode(strip_tags($input['description_html']))
158+
$plainTextConverter->convert($input['description_html']),
157159
);
158160
} else if (isset($input['description'])) {
159161
$entity->descriptionInfo()->set('', $input['description']);

app/Entities/Tools/PageContent.php

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
use BookStack\Util\HtmlContentFilter;
1717
use BookStack\Util\HtmlContentFilterConfig;
1818
use BookStack\Util\HtmlDocument;
19+
use BookStack\Util\HtmlToPlainText;
1920
use BookStack\Util\WebSafeMimeSniffer;
2021
use Closure;
2122
use DOMElement;
@@ -303,8 +304,8 @@ protected function setUniqueId(DOMNode $element, array &$idMap): array
303304
public function toPlainText(): string
304305
{
305306
$html = $this->render(true);
306-
307-
return html_entity_decode(strip_tags($html));
307+
$converter = new HtmlToPlainText();
308+
return $converter->convert($html);
308309
}
309310

310311
/**

app/Util/HtmlToPlainText.php

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
<?php
2+
3+
namespace BookStack\Util;
4+
5+
class HtmlToPlainText
6+
{
7+
/**
8+
* Inline tags types where the content should not be put on a new line.
9+
*/
10+
protected array $inlineTags = [
11+
'a', 'b', 'i', 'u', 'strong', 'em', 'small', 'sup', 'sub', 'span', 'div',
12+
];
13+
14+
/**
15+
* Convert the provided HTML to relatively clean plain text.
16+
*/
17+
public function convert(string $html): string
18+
{
19+
$doc = new HtmlDocument($html);
20+
$text = $this->nodeToText($doc->getBody());
21+
22+
// Remove repeated newlines
23+
$text = preg_replace('/\n+/', "\n", $text);
24+
// Remove leading/trailing whitespace
25+
$text = trim($text);
26+
27+
return $text;
28+
}
29+
30+
protected function nodeToText(\DOMNode $node): string
31+
{
32+
if ($node->nodeType === XML_TEXT_NODE) {
33+
return $node->textContent;
34+
}
35+
36+
$text = '';
37+
if (!in_array($node->nodeName, $this->inlineTags)) {
38+
$text .= "\n";
39+
}
40+
41+
foreach ($node->childNodes as $childNode) {
42+
$text .= $this->nodeToText($childNode);
43+
}
44+
45+
return $text;
46+
}
47+
}

tests/Util/HtmlToPlainTextTest.php

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
<?php
2+
3+
namespace Tests\Util;
4+
5+
use BookStack\Util\HtmlToPlainText;
6+
use Tests\TestCase;
7+
8+
class HtmlToPlainTextTest extends TestCase
9+
{
10+
public function test_it_converts_html_to_plain_text()
11+
{
12+
$html = <<<HTML
13+
<p>This is a test</p>
14+
<ul>
15+
<li>Item 1</li>
16+
<li>Item 2</li>
17+
</ul>
18+
<h2>A Header</h2>
19+
<p>more &lt;&copy;&gt; text <strong>with bold</strong></p>
20+
HTML;
21+
$expected = <<<TEXT
22+
This is a test
23+
Item 1
24+
Item 2
25+
A Header
26+
more <©> text with bold
27+
TEXT;
28+
29+
$this->runTest($html, $expected);
30+
}
31+
32+
public function test_adjacent_list_items_are_separated_by_newline()
33+
{
34+
$html = <<<HTML
35+
<ul><li>Item A</li><li>Item B</li></ul>
36+
HTML;
37+
$expected = <<<TEXT
38+
Item A
39+
Item B
40+
TEXT;
41+
42+
$this->runTest($html, $expected);
43+
}
44+
45+
public function test_inline_formats_dont_cause_newlines()
46+
{
47+
$html = <<<HTML
48+
<p><strong>H</strong><a>e</a><sup>l</sup><span>l</span><em>o</em></p>
49+
HTML;
50+
$expected = <<<TEXT
51+
Hello
52+
TEXT;
53+
54+
$this->runTest($html, $expected);
55+
}
56+
57+
protected function runTest(string $html, string $expected): void
58+
{
59+
$converter = new HtmlToPlainText();
60+
$result = $converter->convert(trim($html));
61+
$this->assertEquals(trim($expected), $result);
62+
}
63+
}

0 commit comments

Comments
 (0)