Skip to content

Commit e752f26

Browse files
Merge branch 'fixNumbLineCheck350-912' into 'main'
Corrige detecção de documentos com linhas numeradas - 3.5.0 See merge request softwares-pkp/plugins_ojs/verificacao-metadados-documento!42
2 parents 9a48fd5 + 8329ffc commit e752f26

9 files changed

Lines changed: 61 additions & 14 deletions

classes/ContentParser.php

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ class ContentParser
1515
{
1616
private const ZERO_WIDTH_SPACE = "\x{200B}";
1717
private const MIN_WORD_LENGTH = 2;
18+
private const NUM_DOC_LINES_SAMPLE = 5;
1819

1920
private function cleanWord($word)
2021
{
@@ -30,7 +31,7 @@ private function cleanWord($word)
3031

3132
private function parseWordsFromString($string)
3233
{
33-
$words = array();
34+
$words = [];
3435

3536
for ($i = 0; $i < strlen($string); $i++) {
3637
while ($i < strlen($string) && ctype_space($string[$i])) {
@@ -56,19 +57,32 @@ private function parseWordsFromString($string)
5657
return $words;
5758
}
5859

59-
private function parseLine($line)
60+
private function parseLine(string $line, bool $docIsNumbered)
6061
{
6162
$zeroWidthSpacePattern = '/' . self::ZERO_WIDTH_SPACE . '/u';
6263
$line = preg_replace($zeroWidthSpacePattern, '', $line);
6364
$lineWords = $this->parseWordsFromString($line);
6465

65-
if (!empty($lineWords) && is_numeric($lineWords[0])) {
66+
if ($docIsNumbered && !empty($lineWords) && is_numeric($lineWords[0])) {
6667
array_shift($lineWords);
6768
}
6869

6970
return $lineWords;
7071
}
7172

73+
public function checkDocumentIsNumbered(array $docLines): bool
74+
{
75+
for ($i = 0; $i < self::NUM_DOC_LINES_SAMPLE; $i++) {
76+
$parsedLine = explode(' ', $docLines[$i]);
77+
$firstWord = $parsedLine[0];
78+
79+
if (!is_numeric($firstWord)) {
80+
return false;
81+
}
82+
}
83+
return true;
84+
}
85+
7286
public function parseDocument($pathFile, $useRawMode = true)
7387
{
7488
$pathTxt = substr($pathFile, 0, -3) . 'txt';
@@ -77,19 +91,21 @@ public function parseDocument($pathFile, $useRawMode = true)
7791

7892
$docText = file_get_contents($pathTxt);
7993
$docLines = preg_split("/\r\n|\n|\r/", $docText);
80-
$docWords = array();
94+
$docWords = [];
8195
unlink($pathTxt);
8296

97+
$docIsNumbered = $this->checkDocumentIsNumbered($docLines);
98+
8399
foreach ($docLines as $line) {
84-
$docWords = array_merge($docWords, $this->parseLine($line));
100+
$docWords = array_merge($docWords, $this->parseLine($line, $docIsNumbered));
85101
}
86102

87103
return $docWords;
88104
}
89105

90106
public function createPatternFromString($string)
91107
{
92-
$pattern = array();
108+
$pattern = [];
93109

94110
for ($i = 0; $i < strlen($string); $i++) {
95111
while ($i < strlen($string) && ctype_space($string[$i])) {

classes/DocumentChecker.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ public function checkTitleInEnglish($title)
212212
$cleanedTitle = $parser->cleanStyledText($title);
213213
$patternTitle = $parser->createPatternFromString($cleanedTitle);
214214

215-
return $this->checkForPatterns(array($patternTitle), count($patternTitle), 75, 0.75);
215+
return $this->checkForPatterns([$patternTitle], count($patternTitle), 75, 0.75);
216216
}
217217

218218
public function checkEthicsCommittee()

classes/DocumentChecklist.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ public function __construct($path)
2424

2525
public function executeChecklist($submission)
2626
{
27-
$dataChecklist = array();
27+
$dataChecklist = [];
2828
$submissionIsArticle = !$submission->getData('nonArticle');
2929

3030
if ($submissionIsArticle) {

tests/AuthorsContributionTest.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
class AuthorsContributionTest extends DetectionOnDocumentTest
77
{
8-
private $patternContribution = array("contribuição", "dos", "autores");
8+
private $patternContribution = ["contribuição", "dos", "autores"];
99

1010
public function setUp(): void
1111
{

tests/ContentParserTest.php

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ public function testParsingOfDocument(): void
3232
'elementum',
3333
'id',
3434
'quam',
35+
'10',
3536
'id,',
3637
'finibus',
3738
'porttitor',
@@ -47,6 +48,36 @@ public function testParsingOfDocument(): void
4748
$this->assertEquals($expectedParsedWords, $parsedWords);
4849
}
4950

51+
public function testDetectsDocumentLinesAreNumbered(): void
52+
{
53+
$nonNumberedDummyDocLines = [
54+
'Lorem ipsum dolor',
55+
'sit amet consectetur',
56+
'adipiscing elit',
57+
'Proin arcu diam',
58+
'elementum id quam'
59+
];
60+
$this->assertFalse($this->contentParser->checkDocumentIsNumbered($nonNumberedDummyDocLines));
61+
62+
$partiallyNumberedDummyDocLines = [
63+
'1 Lorem ipsum dolor',
64+
'sit amet consectetur',
65+
'3 adipiscing elit',
66+
'Proin arcu diam',
67+
'elementum id quam'
68+
];
69+
$this->assertFalse($this->contentParser->checkDocumentIsNumbered($partiallyNumberedDummyDocLines));
70+
71+
$numberedDummyDocLines = [
72+
'1 Lorem ipsum dolor',
73+
'2 sit amet consectetur',
74+
'3 adipiscing elit',
75+
'4 Proin arcu diam',
76+
'5 elementum id quam'
77+
];
78+
$this->assertTrue($this->contentParser->checkDocumentIsNumbered($numberedDummyDocLines));
79+
}
80+
5081
public function testParsingOfLineNumberedDocument(): void
5182
{
5283
$expectedParsedWords = [

tests/EthicsCommitteeTest.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
class EthicsCommitteeTest extends DetectionOnDocumentTest
77
{
8-
private $patternCommittee = array("aprovação", "do", "comitê", "de", "ética");
8+
private $patternCommittee = ["aprovação", "do", "comitê", "de", "ética"];
99

1010
public function setUp(): void
1111
{

tests/MetadataEnglishTest.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66

77
class MetadataEnglishTest extends DetectionOnDocumentTest
88
{
9-
private $patternKeywords = array("keywords");
10-
private $patternAbstract = array("abstract");
9+
private $patternKeywords = ["keywords"];
10+
private $patternAbstract = ["abstract"];
1111
private $title = "A beautiful title";
1212
private $titleWithStyling = "<b>A</b> <i>beautiful</i> <u>title</u>";
1313

tests/fixtures/dummy_document.pdf

272 Bytes
Binary file not shown.

version.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
<version>
1313
<application>contentAnalysis</application>
1414
<type>plugins.generic</type>
15-
<release>3.0.1.0</release>
16-
<date>2026-04-24</date>
15+
<release>3.0.2.0</release>
16+
<date>2026-05-27</date>
1717
<lazy-load>1</lazy-load>
1818
<class>ContentAnalysisPlugin</class>
1919
</version>

0 commit comments

Comments
 (0)