Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions app/Activity/Models/Comment.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
use BookStack\Users\Models\OwnableInterface;
use BookStack\Util\HtmlContentFilter;
use BookStack\Util\HtmlContentFilterConfig;
use BookStack\Util\HtmlToPlainText;
use Illuminate\Database\Eloquent\Builder;
use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Relations\BelongsTo;
Expand Down Expand Up @@ -87,6 +88,12 @@ public function safeHtml(): string
return $filter->filterString($this->html ?? '');
}

public function getPlainText(): string
{
$converter = new HtmlToPlainText();
return $converter->convert($this->html ?? '');
}

public function jointPermissions(): HasMany
{
return $this->hasMany(JointPermission::class, 'entity_id', 'commentable_id')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public function toMail(User $notifiable): MailMessage
$locale->trans('notifications.detail_page_name') => new EntityLinkMessageLine($page),
$locale->trans('notifications.detail_page_path') => $this->buildPagePathLine($page, $notifiable),
$locale->trans('notifications.detail_commenter') => $this->user->name,
$locale->trans('notifications.detail_comment') => strip_tags($comment->html),
$locale->trans('notifications.detail_comment') => $comment->getPlainText(),
]);

return $this->newMailMessage($locale)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public function toMail(User $notifiable): MailMessage
$locale->trans('notifications.detail_page_name') => new EntityLinkMessageLine($page),
$locale->trans('notifications.detail_page_path') => $this->buildPagePathLine($page, $notifiable),
$locale->trans('notifications.detail_commenter') => $this->user->name,
$locale->trans('notifications.detail_comment') => strip_tags($comment->html),
$locale->trans('notifications.detail_comment') => $comment->getPlainText(),
]);

return $this->newMailMessage($locale)
Expand Down
4 changes: 3 additions & 1 deletion app/Entities/Repos/BaseRepo.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
use BookStack\Sorting\BookSorter;
use BookStack\Uploads\ImageRepo;
use BookStack\Util\HtmlDescriptionFilter;
use BookStack\Util\HtmlToPlainText;
use Illuminate\Http\UploadedFile;

class BaseRepo
Expand Down Expand Up @@ -151,9 +152,10 @@ protected function updateDescription(Entity $entity, array $input): void
}

if (isset($input['description_html'])) {
$plainTextConverter = new HtmlToPlainText();
$entity->descriptionInfo()->set(
HtmlDescriptionFilter::filterFromString($input['description_html']),
html_entity_decode(strip_tags($input['description_html']))
$plainTextConverter->convert($input['description_html']),
);
} else if (isset($input['description'])) {
$entity->descriptionInfo()->set('', $input['description']);
Expand Down
5 changes: 3 additions & 2 deletions app/Entities/Tools/PageContent.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
use BookStack\Util\HtmlContentFilter;
use BookStack\Util\HtmlContentFilterConfig;
use BookStack\Util\HtmlDocument;
use BookStack\Util\HtmlToPlainText;
use BookStack\Util\WebSafeMimeSniffer;
use Closure;
use DOMElement;
Expand Down Expand Up @@ -303,8 +304,8 @@ protected function setUniqueId(DOMNode $element, array &$idMap): array
public function toPlainText(): string
{
$html = $this->render(true);

return html_entity_decode(strip_tags($html));
$converter = new HtmlToPlainText();
return $converter->convert($html);
}

/**
Expand Down
20 changes: 5 additions & 15 deletions app/Exports/ExportFormatter.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
use BookStack\Uploads\ImageService;
use BookStack\Util\CspService;
use BookStack\Util\HtmlDocument;
use BookStack\Util\HtmlToPlainText;
use DOMElement;
use Exception;
use Throwable;
Expand Down Expand Up @@ -242,32 +243,21 @@ protected function containHtml(string $htmlContent): string

/**
* Converts the page contents into simple plain text.
* This method filters any bad looking content to provide a nice final output.
* We re-generate the plain text from HTML at this point, post-page-content rendering.
*/
public function pageToPlainText(Page $page, bool $pageRendered = false, bool $fromParent = false): string
{
$html = $pageRendered ? $page->html : (new PageContent($page))->render();
// Add proceeding spaces before tags so spaces remain between
// text within elements after stripping tags.
$html = str_replace('<', " <", $html);
$text = trim(strip_tags($html));
// Replace multiple spaces with single spaces
$text = preg_replace('/ {2,}/', ' ', $text);
// Reduce multiple horrid whitespace characters.
$text = preg_replace('/(\x0A|\xA0|\x0A|\r|\n){2,}/su', "\n\n", $text);
$text = html_entity_decode($text);
// Add title
$text = $page->name . ($fromParent ? "\n" : "\n\n") . $text;

return $text;
$contentText = (new HtmlToPlainText())->convert($html);
return $page->name . ($fromParent ? "\n" : "\n\n") . $contentText;
}

/**
* Convert a chapter into a plain text string.
*/
public function chapterToPlainText(Chapter $chapter): string
{
$text = $chapter->name . "\n" . $chapter->description;
$text = $chapter->name . "\n" . $chapter->descriptionInfo()->getPlain();
$text = trim($text) . "\n\n";

$parts = [];
Expand Down
47 changes: 47 additions & 0 deletions app/Util/HtmlToPlainText.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
<?php

namespace BookStack\Util;

class HtmlToPlainText
{
/**
* Inline tags types where the content should not be put on a new line.
*/
protected array $inlineTags = [
'a', 'b', 'i', 'u', 'strong', 'em', 'small', 'sup', 'sub', 'span', 'div',
];

/**
* Convert the provided HTML to relatively clean plain text.
*/
public function convert(string $html): string
{
$doc = new HtmlDocument($html);
$text = $this->nodeToText($doc->getBody());

// Remove repeated newlines
$text = preg_replace('/\n+/', "\n", $text);
// Remove leading/trailing whitespace
$text = trim($text);

return $text;
}

protected function nodeToText(\DOMNode $node): string
{
if ($node->nodeType === XML_TEXT_NODE) {
return $node->textContent;
}

$text = '';
if (!in_array($node->nodeName, $this->inlineTags)) {
$text .= "\n";
}

foreach ($node->childNodes as $childNode) {
$text .= $this->nodeToText($childNode);
}

return $text;
}
}
4 changes: 2 additions & 2 deletions tests/Exports/TextExportTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public function test_book_text_export_format()
$resp = $this->asEditor()->get($entities['book']->getUrl('/export/plaintext'));

$expected = "Export Book\nThis is a book with stuff to export\n\nExport chapter\nA test chapter to be exported\nIt has loads of info within\n\n";
$expected .= "My wonderful page!\nMy great page Full of great stuff";
$expected .= "My wonderful page!\nMy great page\nFull of great stuff";
$resp->assertSee($expected);
}

Expand Down Expand Up @@ -82,7 +82,7 @@ public function test_chapter_text_export_format()
$resp = $this->asEditor()->get($entities['book']->getUrl('/export/plaintext'));

$expected = "Export chapter\nA test chapter to be exported\nIt has loads of info within\n\n";
$expected .= "My wonderful page!\nMy great page Full of great stuff";
$expected .= "My wonderful page!\nMy great page\nFull of great stuff";
$resp->assertSee($expected);
}
}
63 changes: 63 additions & 0 deletions tests/Util/HtmlToPlainTextTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
<?php

namespace Tests\Util;

use BookStack\Util\HtmlToPlainText;
use Tests\TestCase;

class HtmlToPlainTextTest extends TestCase
{
public function test_it_converts_html_to_plain_text()
{
$html = <<<HTML
<p>This is a test</p>
<ul>
<li>Item 1</li>
<li>Item 2</li>
</ul>
<h2>A Header</h2>
<p>more &lt;&copy;&gt; text <strong>with bold</strong></p>
HTML;
$expected = <<<TEXT
This is a test
Item 1
Item 2
A Header
more <©> text with bold
TEXT;

$this->runTest($html, $expected);
}

public function test_adjacent_list_items_are_separated_by_newline()
{
$html = <<<HTML
<ul><li>Item A</li><li>Item B</li></ul>
HTML;
$expected = <<<TEXT
Item A
Item B
TEXT;

$this->runTest($html, $expected);
}

public function test_inline_formats_dont_cause_newlines()
{
$html = <<<HTML
<p><strong>H</strong><a>e</a><sup>l</sup><span>l</span><em>o</em></p>
HTML;
$expected = <<<TEXT
Hello
TEXT;

$this->runTest($html, $expected);
}

protected function runTest(string $html, string $expected): void
{
$converter = new HtmlToPlainText();
$result = $converter->convert(trim($html));
$this->assertEquals(trim($expected), $result);
}
}
Loading