Skip to content

Commit 8cd955c

Browse files
Merge branch 'fixTitleError-888' into 'main'
Corrige problema de detecção de título em inglês See merge request softwares-pkp/plugins_ojs/verificacao-metadados-documento!35
2 parents ec7e592 + 01638ee commit 8cd955c

6 files changed

Lines changed: 28 additions & 12 deletions

File tree

.gitlab-ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ variables:
33

44
include:
55
- project: 'documentacao-e-tarefas/modelosparaintegracaocontinua'
6-
ref: main
6+
ref: stable-3_4_0
77
file:
88
- 'templates/groups/pkp_plugin.yml'
99
- 'templates/groups/ops/unit_tests.yml'

classes/ContentParser.php

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,16 @@
1313

1414
class ContentParser
1515
{
16+
private const ZERO_WIDTH_SPACE = "\x{200B}";
17+
private const MIN_WORD_LENGTH = 2;
18+
1619
private function cleanWord($word)
1720
{
1821
$patternsToReplace = [
1922
'' => '"',
20-
'' => '"'
23+
'' => '"',
24+
'' => "'",
25+
'' => "'",
2126
];
2227

2328
return $this->replacePatternsInText($word, $patternsToReplace);
@@ -40,8 +45,10 @@ private function parseWordsFromString($string)
4045
}
4146

4247
$word = mb_strtolower(substr($string, $wordStart, $wordEnd - $wordStart));
48+
if (strlen($word) >= self::MIN_WORD_LENGTH) {
49+
$words[] = $this->cleanWord($word);
50+
}
4351

44-
$words[] = $this->cleanWord($word);
4552
$i = $wordEnd;
4653
}
4754
}
@@ -51,7 +58,7 @@ private function parseWordsFromString($string)
5158

5259
private function parseLine($line)
5360
{
54-
$zeroWidthSpacePattern = '/\x{200B}/u';
61+
$zeroWidthSpacePattern = '/' . self::ZERO_WIDTH_SPACE . '/u';
5562
$line = preg_replace($zeroWidthSpacePattern, '', $line);
5663
$lineWords = $this->parseWordsFromString($line);
5764

@@ -95,7 +102,11 @@ public function createPatternFromString($string)
95102
$end++;
96103
}
97104

98-
$pattern[] = mb_strtolower(substr($string, $start, $end - $start));
105+
$word = mb_strtolower(substr($string, $start, $end - $start));
106+
if (strlen($word) >= self::MIN_WORD_LENGTH) {
107+
$pattern[] = $word;
108+
}
109+
99110
$i = $end;
100111
}
101112
}
@@ -113,7 +124,9 @@ public function cleanStyledText($text)
113124
'<u>' => '',
114125
'</u>' => '',
115126
'' => '"',
116-
'' => '"'
127+
'' => '"',
128+
'' => "'",
129+
'' => "'",
117130
];
118131

119132
return $this->replacePatternsInText($text, $patternsToReplace);

classes/DocumentChecker.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ private function checkForPattern($patterns, $limit, $limiarForWord, $limiarForPa
138138

139139
if ($similarity < $limiarForWord && $similarWords == 0) {
140140
break;
141-
} else if ($similarity >= $limiarForWord) {
141+
} elseif ($similarity >= $limiarForWord) {
142142
$similarWords++;
143143
}
144144
}

tests/ContentParserTest.php

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ public function testParsingOfDocument(): void
2323
'dolor',
2424
'sit',
2525
'amet",',
26-
'consectetur',
26+
"'consectetur'",
2727
'adipiscing',
2828
'elit.',
2929
'proin',
@@ -62,7 +62,6 @@ public function testParsingOfLineNumberedDocument(): void
6262
'ex',
6363
'libero,',
6464
'porttitor',
65-
'a',
6665
'elit',
6766
'eget,',
6867
'maximus',
@@ -78,7 +77,7 @@ public function testParsingOfLineNumberedDocument(): void
7877
public function testCreatePatternFromString(): void
7978
{
8079
$string = 'Innovations and new advances for this world: a survey';
81-
$expectedPattern = ['innovations', 'and', 'new', 'advances', 'for', 'this', 'world:', 'a', 'survey'];
80+
$expectedPattern = ['innovations', 'and', 'new', 'advances', 'for', 'this', 'world:', 'survey'];
8281

8382
$patternCreated = $this->contentParser->createPatternFromString($string);
8483
$this->assertEquals($expectedPattern, $patternCreated);
@@ -97,7 +96,11 @@ public function testCleansOtherCharactersFromTitle(): void
9796
{
9897
$title = 'Reflections on “Arrival” and brazilian sign language (LIBRAS)';
9998
$expectedCleanedTitle = 'Reflections on "Arrival" and brazilian sign language (LIBRAS)';
99+
$cleanedTitle = $this->contentParser->cleanStyledText($title);
100+
$this->assertEquals($expectedCleanedTitle, $cleanedTitle);
100101

102+
$title = 'Schindler’s List: ‘absolut cinema’';
103+
$expectedCleanedTitle = "Schindler's List: 'absolut cinema'";
101104
$cleanedTitle = $this->contentParser->cleanStyledText($title);
102105
$this->assertEquals($expectedCleanedTitle, $cleanedTitle);
103106
}

tests/fixtures/dummy_document.pdf

216 Bytes
Binary file not shown.

version.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
<version>
1313
<application>contentAnalysis</application>
1414
<type>plugins.generic</type>
15-
<release>2.2.4.0</release>
16-
<date>2026-01-06</date>
15+
<release>2.2.5.0</release>
16+
<date>2026-02-27</date>
1717
<lazy-load>1</lazy-load>
1818
<class>ContentAnalysisPlugin</class>
1919
</version>

0 commit comments

Comments
 (0)