Skip to content

Commit f5df556

Browse files
Merge pull request #64 from lepidus/stable-3_3_0
Format markup to submit to Thoth (OMP 3.3.0)
2 parents 6293dd0 + d38a72e commit f5df556

6 files changed

Lines changed: 369 additions & 28 deletions

File tree

classes/factories/ThothAbstractFactory.inc.php

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
use ThothApi\GraphQL\Models\AbstractText as ThothAbstract;
1818

1919
import('plugins.generic.thoth.classes.i18n.ThothLocaleCode');
20+
import('plugins.generic.thoth.classes.formatters.ThothMarkupFormatter');
2021

2122
class ThothAbstractFactory
2223
{
@@ -34,6 +35,7 @@ private function create($entity, string $workId, ?string $preferredLocale = null
3435
{
3536
$canonicalLocale = $this->getCanonicalLocale($entity, $preferredLocale);
3637
$abstracts = $this->getLocalizedValues($entity, 'abstract', $canonicalLocale);
38+
$markupFormatter = new ThothMarkupFormatter();
3739
$thothAbstracts = [];
3840

3941
foreach ($abstracts as $locale => $abstract) {
@@ -46,7 +48,7 @@ private function create($entity, string $workId, ?string $preferredLocale = null
4648
$thothAbstracts[$this->getLocaleKey($localeCode)] = new ThothAbstract([
4749
'workId' => $workId,
4850
'localeCode' => $localeCode,
49-
'content' => $this->wrapInParagraph($abstract),
51+
'content' => $markupFormatter->format($abstract),
5052
'canonical' => $locale === $canonicalLocale,
5153
'abstractType' => 'LONG',
5254
]);
@@ -108,14 +110,4 @@ private function logUnsupportedLocale(string $entityType, ?string $locale): void
108110
$normalizedLocaleCode
109111
));
110112
}
111-
112-
private function wrapInParagraph($content)
113-
{
114-
$content = trim($content);
115-
if (preg_match('/^<p\b[^>]*>.*<\/p>$/is', $content) === 1) {
116-
return $content;
117-
}
118-
119-
return sprintf('<p>%s</p>', $content);
120-
}
121113
}

classes/factories/ThothBiographyFactory.inc.php

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,15 @@
1717
use ThothApi\GraphQL\Models\Biography as ThothBiography;
1818

1919
import('plugins.generic.thoth.classes.i18n.ThothLocaleCode');
20+
import('plugins.generic.thoth.classes.formatters.ThothMarkupFormatter');
2021

2122
class ThothBiographyFactory
2223
{
2324
public function createFromAuthor($author, string $contributionId, ?string $preferredLocale = null): array
2425
{
2526
$canonicalLocale = $this->getCanonicalLocale($author, $preferredLocale);
2627
$biographies = $this->getLocalizedValues($author, 'biography', $canonicalLocale);
28+
$markupFormatter = new ThothMarkupFormatter();
2729
$thothBiographies = [];
2830

2931
foreach ($biographies as $locale => $biography) {
@@ -36,7 +38,7 @@ public function createFromAuthor($author, string $contributionId, ?string $prefe
3638
$thothBiographies[$this->getLocaleKey($localeCode)] = new ThothBiography([
3739
'contributionId' => $contributionId,
3840
'localeCode' => $localeCode,
39-
'content' => $this->wrapInParagraph($biography),
41+
'content' => $markupFormatter->format($biography),
4042
'canonical' => $locale === $canonicalLocale,
4143
]);
4244
}
@@ -97,14 +99,4 @@ private function logUnsupportedLocale(string $entityType, ?string $locale): void
9799
$normalizedLocaleCode
98100
));
99101
}
100-
101-
private function wrapInParagraph($content)
102-
{
103-
$content = trim($content);
104-
if (preg_match('/^<p\b[^>]*>.*<\/p>$/is', $content) === 1) {
105-
return $content;
106-
}
107-
108-
return sprintf('<p>%s</p>', $content);
109-
}
110102
}
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
<?php
2+
3+
/**
4+
* @file plugins/generic/thoth/classes/formatters/ThothMarkupFormatter.inc.php
5+
*
6+
* Copyright (c) 2024-2026 Lepidus Tecnologia
7+
* Copyright (c) 2024-2026 Thoth
8+
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
9+
*
10+
* @class ThothMarkupFormatter
11+
*
12+
* @ingroup plugins_generic_thoth
13+
*
14+
* @brief Formats HTML markup for Thoth text fields
15+
*/
16+
17+
class ThothMarkupFormatter
18+
{
19+
public function format(string $content): string
20+
{
21+
$content = trim($content);
22+
if (!$this->needsStructuralFormatting($content)) {
23+
return $content;
24+
}
25+
26+
$document = new \DOMDocument('1.0', 'UTF-8');
27+
$previousUseInternalErrors = libxml_use_internal_errors(true);
28+
$loaded = $document->loadHTML(
29+
'<?xml encoding="UTF-8"><div>' . $content . '</div>',
30+
LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD
31+
);
32+
libxml_clear_errors();
33+
libxml_use_internal_errors($previousUseInternalErrors);
34+
35+
if (!$loaded) {
36+
return $content;
37+
}
38+
39+
$wrapper = $this->getValueWrapper($document) ?? $document->getElementsByTagName('div')->item(0);
40+
if ($wrapper === null) {
41+
return $content;
42+
}
43+
44+
$blocks = [];
45+
$inlineContent = '';
46+
foreach (iterator_to_array($wrapper->childNodes) as $node) {
47+
$this->appendMarkupNode($document, $node, $blocks, $inlineContent);
48+
}
49+
$this->flushParagraph($blocks, $inlineContent);
50+
51+
return $this->removeBreaks(implode('', $blocks));
52+
}
53+
54+
private function needsStructuralFormatting(string $content): bool
55+
{
56+
return preg_match('/<div\b[^>]*class=["\'][^"\']*\bvalue\b/i', $content) === 1
57+
|| preg_match('/<br\b/i', $content) === 1
58+
|| preg_match('/<\/?(ul|ol)\b/i', $content) === 1;
59+
}
60+
61+
private function getValueWrapper(\DOMDocument $document): ?\DOMElement
62+
{
63+
$xpath = new \DOMXPath($document);
64+
$nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " value ")]');
65+
$node = $nodes !== false ? $nodes->item(0) : null;
66+
67+
return $node instanceof \DOMElement ? $node : null;
68+
}
69+
70+
private function appendMarkupNode(
71+
\DOMDocument $document,
72+
\DOMNode $node,
73+
array &$blocks,
74+
string &$inlineContent
75+
): void {
76+
if ($node instanceof \DOMElement) {
77+
$tagName = strtolower($node->tagName);
78+
79+
if ($tagName === 'br') {
80+
$this->flushParagraph($blocks, $inlineContent);
81+
return;
82+
}
83+
84+
if ($tagName === 'p') {
85+
$this->flushParagraph($blocks, $inlineContent);
86+
$this->appendParagraphNode($document, $node, $blocks);
87+
return;
88+
}
89+
90+
if (in_array($tagName, ['ul', 'ol'], true)) {
91+
$this->flushParagraph($blocks, $inlineContent);
92+
$blocks[] = trim($document->saveHTML($node));
93+
return;
94+
}
95+
}
96+
97+
$inlineContent .= $document->saveHTML($node);
98+
}
99+
100+
private function appendParagraphNode(\DOMDocument $document, \DOMElement $paragraph, array &$blocks): void
101+
{
102+
$inlineContent = '';
103+
foreach (iterator_to_array($paragraph->childNodes) as $node) {
104+
$this->appendMarkupNode($document, $node, $blocks, $inlineContent);
105+
}
106+
$this->flushParagraph($blocks, $inlineContent);
107+
}
108+
109+
private function flushParagraph(array &$blocks, string &$inlineContent): void
110+
{
111+
$content = trim($inlineContent);
112+
if ($content !== '') {
113+
$blocks[] = sprintf('<p>%s</p>', $content);
114+
}
115+
116+
$inlineContent = '';
117+
}
118+
119+
private function removeBreaks(string $content): string
120+
{
121+
return preg_replace('/<br\b[^>]*>/i', ' ', $content) ?? $content;
122+
}
123+
}

tests/classes/factories/ThothAbstractFactoryTest.php

Lines changed: 119 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
<?php
22

3+
require_once(__DIR__ . '/../../../vendor/autoload.php');
4+
35
import('lib.pkp.tests.PKPTestCase');
46
import('plugins.generic.thoth.classes.factories.ThothAbstractFactory');
57

68
class ThothAbstractFactoryTest extends PKPTestCase
79
{
8-
public function testCreateFromPublicationWrapsAbstractWithoutParagraph()
10+
public function testCreateFromPublicationSendsAbstractWithoutParagraphUnchanged()
911
{
1012
$publication = new class () {
1113
public function getData($key)
@@ -22,7 +24,7 @@ public function getData($key)
2224
$factory = new ThothAbstractFactory();
2325
$thothAbstracts = $factory->createFromPublication($publication, 'work-id', 'en_US');
2426

25-
$this->assertSame('<p>English abstract</p>', $thothAbstracts['EN_US']->getContent());
27+
$this->assertSame('English abstract', $thothAbstracts['EN_US']->getContent());
2628
}
2729

2830
public function testCreateFromPublicationPreservesAbstractAlreadyWrappedInParagraph()
@@ -44,4 +46,119 @@ public function getData($key)
4446

4547
$this->assertSame('<p>English abstract</p>', $thothAbstracts['EN_US']->getContent());
4648
}
49+
50+
public function testCreateFromPublicationMovesListsOutsideParagraphs()
51+
{
52+
$publication = new class () {
53+
public function getData($key)
54+
{
55+
$values = [
56+
'locale' => 'en_US',
57+
'abstract' => ['en_US' => 'Intro<ul><li>First item</li></ul>Outro'],
58+
];
59+
60+
return $values[$key] ?? null;
61+
}
62+
};
63+
64+
$factory = new ThothAbstractFactory();
65+
$thothAbstracts = $factory->createFromPublication($publication, 'work-id', 'en_US');
66+
67+
$this->assertSame(
68+
'<p>Intro</p><ul><li>First item</li></ul><p>Outro</p>',
69+
$thothAbstracts['EN_US']->getContent()
70+
);
71+
}
72+
73+
public function testCreateFromPublicationMovesNestedListsOutsideParagraphs()
74+
{
75+
$publication = new class () {
76+
public function getData($key)
77+
{
78+
$values = [
79+
'locale' => 'en_US',
80+
'abstract' => ['en_US' => '<p>Intro<ul><li>First item</li></ul>Outro</p>'],
81+
];
82+
83+
return $values[$key] ?? null;
84+
}
85+
};
86+
87+
$factory = new ThothAbstractFactory();
88+
$thothAbstracts = $factory->createFromPublication($publication, 'work-id', 'en_US');
89+
90+
$this->assertSame(
91+
'<p>Intro</p><ul><li>First item</li></ul><p>Outro</p>',
92+
$thothAbstracts['EN_US']->getContent()
93+
);
94+
}
95+
96+
public function testCreateFromPublicationConvertsBreaksToParagraphs()
97+
{
98+
$publication = new class () {
99+
public function getData($key)
100+
{
101+
$values = [
102+
'locale' => 'en_US',
103+
'abstract' => ['en_US' => '<p>First line<br />Second line</p>'],
104+
];
105+
106+
return $values[$key] ?? null;
107+
}
108+
};
109+
110+
$factory = new ThothAbstractFactory();
111+
$thothAbstracts = $factory->createFromPublication($publication, 'work-id', 'en_US');
112+
113+
$this->assertSame('<p>First line</p><p>Second line</p>', $thothAbstracts['EN_US']->getContent());
114+
}
115+
116+
public function testCreateFromPublicationRemovesBreaksInsideInlineMarkup()
117+
{
118+
$publication = new class () {
119+
public function getData($key)
120+
{
121+
$values = [
122+
'locale' => 'en_US',
123+
'abstract' => ['en_US' => '<p><strong>First<br />Second</strong></p>'],
124+
];
125+
126+
return $values[$key] ?? null;
127+
}
128+
};
129+
130+
$factory = new ThothAbstractFactory();
131+
$thothAbstracts = $factory->createFromPublication($publication, 'work-id', 'en_US');
132+
133+
$this->assertSame('<p><strong>First Second</strong></p>', $thothAbstracts['EN_US']->getContent());
134+
}
135+
136+
public function testCreateFromPublicationRemovesOmpPresentationWrapper()
137+
{
138+
$publication = new class () {
139+
public function getData($key)
140+
{
141+
$values = [
142+
'locale' => 'en_US',
143+
'abstract' => [
144+
'en_US' => '<h2 class="label">Synopsis</h2><div class="value">'
145+
. '<p>Publisher<br />Address<br />Country</p>'
146+
. '<p><strong>Open</strong> <a href="https://example.com">platform</a></p>'
147+
. '</div>',
148+
],
149+
];
150+
151+
return $values[$key] ?? null;
152+
}
153+
};
154+
155+
$factory = new ThothAbstractFactory();
156+
$thothAbstracts = $factory->createFromPublication($publication, 'work-id', 'en_US');
157+
158+
$this->assertSame(
159+
'<p>Publisher</p><p>Address</p><p>Country</p>'
160+
. '<p><strong>Open</strong> <a href="https://example.com">platform</a></p>',
161+
$thothAbstracts['EN_US']->getContent()
162+
);
163+
}
47164
}

0 commit comments

Comments
 (0)