Skip to content

Commit f82de99

Browse files
Merge branch 'fixTitleDetection-832' into 'main'
Trata casos onde título possui estilização See merge request softwares-pkp/plugins_ojs/verificacao-metadados-documento!31
2 parents 10a3a1c + 62c9e78 commit f82de99

8 files changed

Lines changed: 93 additions & 17 deletions

classes/ContentParser.php

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,4 +90,17 @@ public function createPatternFromString($string)
9090

9191
return $pattern;
9292
}
93+
94+
public function cleanStyledText($text)
95+
{
96+
$patternsToClean = [
97+
'<b>', '</b>', '<i>', '</i>', '<u>', '</u>'
98+
];
99+
100+
foreach ($patternsToClean as $pattern) {
101+
$text = str_replace($pattern, '', $text);
102+
}
103+
104+
return $text;
105+
}
93106
}

classes/DocumentChecker.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,9 @@ public function checkTitleInEnglish($title)
191191
}
192192

193193
$parser = new ContentParser();
194-
$patternTitle = $parser->createPatternFromString($title);
194+
$cleanedTitle = $parser->cleanStyledText($title);
195+
$patternTitle = $parser->createPatternFromString($cleanedTitle);
196+
195197
return $this->checkForPattern(array($patternTitle), count($patternTitle), 75, 0.75);
196198
}
197199

tests/ContentParserTest.php

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
<?php
2+
3+
namespace APP\plugins\generic\contentAnalysis\tests;
4+
5+
use PHPUnit\Framework\TestCase;
6+
use APP\plugins\generic\contentAnalysis\classes\ContentParser;
7+
8+
class ContentParserTest extends TestCase
9+
{
10+
private const FIXTURES_PATH = __DIR__ . DIRECTORY_SEPARATOR . 'fixtures' . DIRECTORY_SEPARATOR;
11+
private $contentParser;
12+
13+
public function setUp(): void
14+
{
15+
$this->contentParser = new ContentParser();
16+
}
17+
18+
public function testParsingOfDocument(): void
19+
{
20+
$expectedParsedWords = [
21+
'lorem', 'ipsum', 'dolor', 'sit', 'amet,', 'consectetur', 'adipiscing', 'elit.', 'proin', 'arcu', 'diam,',
22+
'elementum', 'id', 'quam', 'id,', 'finibus', 'porttitor', 'dolor.', 'donec', 'porta', 'ullamcorper',
23+
'volutpat.'
24+
];
25+
$parsedWords = $this->contentParser->parseDocument(self::FIXTURES_PATH . 'dummy_document.pdf');
26+
$parsedWords = array_slice($parsedWords, 0, count($expectedParsedWords));
27+
28+
$this->assertEquals($expectedParsedWords, $parsedWords);
29+
}
30+
31+
public function testParsingOfLineNumberedDocument(): void
32+
{
33+
$expectedParsedWords = [
34+
'lorem', 'ipsum', 'dolor', 'sit', 'amet,', 'consectetur', 'adipiscing', 'elit.',
35+
'etiam', 'ex', 'libero,', 'porttitor', 'a', 'elit', 'eget,', 'maximus', 'viverra', 'arcu.'
36+
];
37+
$parsedWords = $this->contentParser->parseDocument(self::FIXTURES_PATH . 'dummy_document_numbered.pdf');
38+
$parsedWords = array_slice($parsedWords, 0, count($expectedParsedWords));
39+
40+
$this->assertEquals($expectedParsedWords, $parsedWords);
41+
}
42+
43+
public function testCreatePatternFromString(): void
44+
{
45+
$string = 'Innovations and new advances for this world: a survey';
46+
$expectedPattern = ['innovations', 'and', 'new', 'advances', 'for', 'this', 'world:', 'a', 'survey'];
47+
48+
$patternCreated = $this->contentParser->createPatternFromString($string);
49+
$this->assertEquals($expectedPattern, $patternCreated);
50+
}
51+
52+
public function testCleansStylingFromTitle(): void
53+
{
54+
$styledTitle = '<b>Innovations</b> and <i>new</i> advances for <u>this world</u>: a survey';
55+
$expectedCleanedTitle = 'Innovations and new advances for this world: a survey';
56+
57+
$cleanedTitle = $this->contentParser->cleanStyledText($styledTitle);
58+
$this->assertEquals($expectedCleanedTitle, $cleanedTitle);
59+
}
60+
}

tests/DetectionOnDocumentTest.php

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,16 @@
44

55
use PHPUnit\Framework\TestCase;
66
use APP\plugins\generic\contentAnalysis\classes\DocumentChecker;
7-
use APP\plugins\generic\contentAnalysis\classes\ContentParser;
87

9-
class DetectionOnDocumentTest extends TestCase
8+
abstract class DetectionOnDocumentTest extends TestCase
109
{
10+
protected const FIXTURES_PATH = __DIR__ . DIRECTORY_SEPARATOR . 'fixtures' . DIRECTORY_SEPARATOR;
1111
protected $documentChecker;
1212
protected $dummyDocumentPath;
1313

1414
public function setUp(): void
1515
{
16-
$this->dummyDocumentPath = dirname(__FILE__) . DIRECTORY_SEPARATOR . "dummy_document.pdf";
16+
$this->dummyDocumentPath = self::FIXTURES_PATH . 'dummy_document.pdf';
1717
$this->documentChecker = new DocumentChecker($this->dummyDocumentPath);
1818
}
1919

@@ -32,15 +32,4 @@ protected function insertStringIntoTextHtml($string, $textHtml)
3232
{
3333
return $textHtml . " " . $string;
3434
}
35-
36-
public function testParserRemovesLineNumbering(): void
37-
{
38-
$this->dummyDocumentPath = dirname(__FILE__) . DIRECTORY_SEPARATOR . "dummy_document_numbered.pdf";
39-
$this->documentChecker = new DocumentChecker($this->dummyDocumentPath);
40-
41-
$expectedWordsFirstLine = ["lorem", "ipsum", "dolor", "sit", "amet,", "consectetur", "adipiscing", "elit."];
42-
$parsedWordsFirstLine = array_slice($this->documentChecker->words, 0, count($expectedWordsFirstLine));
43-
44-
$this->assertEquals($expectedWordsFirstLine, $parsedWordsFirstLine);
45-
}
4635
}

tests/MetadataEnglishTest.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ class MetadataEnglishTest extends DetectionOnDocumentTest
99
private $patternKeywords = array("keywords");
1010
private $patternAbstract = array("abstract");
1111
private $title = "A beautiful title";
12+
private $titleWithStyling = "<b>A</b> <i>beautiful</i> <u>title</u>";
1213

1314
public function setUp(): void
1415
{
@@ -31,6 +32,17 @@ public function testDetectionAbstract(): void
3132
$this->assertEquals("Success", $statusAbstract);
3233
}
3334

35+
public function testDetectionTitleWithStyling(): void
36+
{
37+
$parser = new ContentParser();
38+
$patternTitle = $parser->createPatternFromString($this->title);
39+
40+
$this->documentChecker->words = $this->insertWordsIntoDocWordList($patternTitle, $this->documentChecker->words);
41+
$statusTitle = $this->documentChecker->checkTitleInEnglish($this->titleWithStyling);
42+
43+
$this->assertEquals("Success", $statusTitle);
44+
}
45+
3446
public function testDetectionTitle(): void
3547
{
3648
$parser = new ContentParser();
File renamed without changes.

version.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
<version>
1313
<application>contentAnalysis</application>
1414
<type>plugins.generic</type>
15-
<release>2.2.2.0</release>
16-
<date>2025-09-12</date>
15+
<release>2.2.3.0</release>
16+
<date>2025-09-17</date>
1717
<lazy-load>1</lazy-load>
1818
<class>ContentAnalysisPlugin</class>
1919
</version>

0 commit comments

Comments
 (0)