|
11 | 11 | use BookStack\Uploads\ImageService; |
12 | 12 | use BookStack\Util\CspService; |
13 | 13 | use BookStack\Util\HtmlDocument; |
| 14 | +use BookStack\Util\HtmlToPlainText; |
14 | 15 | use DOMElement; |
15 | 16 | use Exception; |
16 | 17 | use Throwable; |
@@ -242,32 +243,21 @@ protected function containHtml(string $htmlContent): string |
242 | 243 |
|
243 | 244 | /** |
244 | 245 | * Converts the page contents into simple plain text. |
245 | | - * This method filters any bad looking content to provide a nice final output. |
| 246 | + * We re-generate the plain text from HTML at this point, post-page-content rendering. |
246 | 247 | */ |
247 | 248 | public function pageToPlainText(Page $page, bool $pageRendered = false, bool $fromParent = false): string |
248 | 249 | { |
249 | 250 | $html = $pageRendered ? $page->html : (new PageContent($page))->render(); |
250 | | - // Add proceeding spaces before tags so spaces remain between |
251 | | - // text within elements after stripping tags. |
252 | | - $html = str_replace('<', " <", $html); |
253 | | - $text = trim(strip_tags($html)); |
254 | | - // Replace multiple spaces with single spaces |
255 | | - $text = preg_replace('/ {2,}/', ' ', $text); |
256 | | - // Reduce multiple horrid whitespace characters. |
257 | | - $text = preg_replace('/(\x0A|\xA0|\x0A|\r|\n){2,}/su', "\n\n", $text); |
258 | | - $text = html_entity_decode($text); |
259 | | - // Add title |
260 | | - $text = $page->name . ($fromParent ? "\n" : "\n\n") . $text; |
261 | | - |
262 | | - return $text; |
| 251 | + $contentText = (new HtmlToPlainText())->convert($html); |
| 252 | + return $page->name . ($fromParent ? "\n" : "\n\n") . $contentText; |
263 | 253 | } |
264 | 254 |
|
265 | 255 | /** |
266 | 256 | * Convert a chapter into a plain text string. |
267 | 257 | */ |
268 | 258 | public function chapterToPlainText(Chapter $chapter): string |
269 | 259 | { |
270 | | - $text = $chapter->name . "\n" . $chapter->description; |
| 260 | + $text = $chapter->name . "\n" . $chapter->descriptionInfo()->getPlain(); |
271 | 261 | $text = trim($text) . "\n\n"; |
272 | 262 |
|
273 | 263 | $parts = []; |
|
0 commit comments