|
| 1 | +<?php |
| 2 | + |
| 3 | +namespace APP\plugins\generic\contentAnalysis\tests; |
| 4 | + |
| 5 | +use PHPUnit\Framework\TestCase; |
| 6 | +use APP\plugins\generic\contentAnalysis\classes\ContentParser; |
| 7 | + |
| 8 | +class ContentParserTest extends TestCase |
| 9 | +{ |
| 10 | + private const FIXTURES_PATH = __DIR__ . DIRECTORY_SEPARATOR . 'fixtures' . DIRECTORY_SEPARATOR; |
| 11 | + private $contentParser; |
| 12 | + |
| 13 | + public function setUp(): void |
| 14 | + { |
| 15 | + $this->contentParser = new ContentParser(); |
| 16 | + } |
| 17 | + |
| 18 | + public function testParsingOfDocument(): void |
| 19 | + { |
| 20 | + $expectedParsedWords = [ |
| 21 | + 'lorem', 'ipsum', 'dolor', 'sit', 'amet,', 'consectetur', 'adipiscing', 'elit.', 'proin', 'arcu', 'diam,', |
| 22 | + 'elementum', 'id', 'quam', 'id,', 'finibus', 'porttitor', 'dolor.', 'donec', 'porta', 'ullamcorper', |
| 23 | + 'volutpat.' |
| 24 | + ]; |
| 25 | + $parsedWords = $this->contentParser->parseDocument(self::FIXTURES_PATH . 'dummy_document.pdf'); |
| 26 | + $parsedWords = array_slice($parsedWords, 0, count($expectedParsedWords)); |
| 27 | + |
| 28 | + $this->assertEquals($expectedParsedWords, $parsedWords); |
| 29 | + } |
| 30 | + |
| 31 | + public function testParsingOfLineNumberedDocument(): void |
| 32 | + { |
| 33 | + $expectedParsedWords = [ |
| 34 | + 'lorem', 'ipsum', 'dolor', 'sit', 'amet,', 'consectetur', 'adipiscing', 'elit.', |
| 35 | + 'etiam', 'ex', 'libero,', 'porttitor', 'a', 'elit', 'eget,', 'maximus', 'viverra', 'arcu.' |
| 36 | + ]; |
| 37 | + $parsedWords = $this->contentParser->parseDocument(self::FIXTURES_PATH . 'dummy_document_numbered.pdf'); |
| 38 | + $parsedWords = array_slice($parsedWords, 0, count($expectedParsedWords)); |
| 39 | + |
| 40 | + $this->assertEquals($expectedParsedWords, $parsedWords); |
| 41 | + } |
| 42 | + |
| 43 | + public function testCreatePatternFromString(): void |
| 44 | + { |
| 45 | + $string = 'Innovations and new advances for this world: a survey'; |
| 46 | + $expectedPattern = ['innovations', 'and', 'new', 'advances', 'for', 'this', 'world:', 'a', 'survey']; |
| 47 | + |
| 48 | + $patternCreated = $this->contentParser->createPatternFromString($string); |
| 49 | + $this->assertEquals($expectedPattern, $patternCreated); |
| 50 | + } |
| 51 | + |
| 52 | + public function testCleansStylingFromTitle(): void |
| 53 | + { |
| 54 | + $styledTitle = '<b>Innovations</b> and <i>new</i> advances for <u>this world</u>: a survey'; |
| 55 | + $expectedCleanedTitle = 'Innovations and new advances for this world: a survey'; |
| 56 | + |
| 57 | + $cleanedTitle = $this->contentParser->cleanStyledText($styledTitle); |
| 58 | + $this->assertEquals($expectedCleanedTitle, $cleanedTitle); |
| 59 | + } |
| 60 | +} |
0 commit comments