Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/InvoiceSplitterAutoExtractionExample.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ function parseInvoice(string $filePath, Client $mindeeClient)
{
$inputSource = new PathInput($filePath);

if ($inputSource->isPdf() && $inputSource->countDocPages() > 1) {
if ($inputSource->isPdf() && $inputSource->getPageCount() > 1) {
parseMultiPage($inputSource, $mindeeClient);
} else {
parseSinglePage($inputSource, $mindeeClient);
Expand Down
2 changes: 1 addition & 1 deletion examples/MultiReceiptsAutoExtractionExample.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function processReceipts($client, $inputPath) {
$imageExtractor = new ImageExtractor($inputSource);

$multiReceiptsResult = $client->parse(MultiReceiptsDetectorV1::class, $inputSource);
$pageCount = $inputSource->countDocPages();
$pageCount = $inputSource->getPageCount();

$totalExtractedReceipts = [];

Expand Down
19 changes: 14 additions & 5 deletions src/Input/LocalInputSource.php
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ public function isPDF(): bool
* @throws MindeePDFException Throws if the source pdf can't be properly processed.
* @throws MindeeSourceException Throws if the source isn't a pdf.
*/
public function countDocPages(): int
public function getPageCount(): int
{
if (!$this->isPDF()) {
throw new MindeeSourceException(
Expand All @@ -135,6 +135,15 @@ public function countDocPages(): int
}
}

/**
* @return integer
* @deprecated
*/
public function countDocPages(): int
{
return $this->getPageCount();
}

/**
* @param string $fileBytes Raw data as bytes.
* @return void
Expand Down Expand Up @@ -326,15 +335,15 @@ public function applyPageOptions(?PageOptions $pageOptions): void
ErrorCode::USER_INPUT_ERROR
);
}
if ($this->countDocPages() < $pageOptions->onMinPage) {
if ($this->getPageCount() < $pageOptions->onMinPage) {
return;
}
$allPages = range(0, $this->countDocPages() - 1);
$allPages = range(0, $this->getPageCount() - 1);
$pagesToKeep = [];
if ($pageOptions->operation == KEEP_ONLY) {
foreach ($pageOptions->pageIndexes as $pageId) {
if ($pageId < 0) {
$pageId = $this->countDocPages() + $pageId;
$pageId = $this->getPageCount() + $pageId;
}
if (!in_array($pageId, $allPages)) {
error_log("Page index '" . $pageId . "' is not present in source document");
Expand All @@ -346,7 +355,7 @@ public function applyPageOptions(?PageOptions $pageOptions): void
$pagesToRemove = [];
foreach ($pageOptions->pageIndexes as $pageId) {
if ($pageId < 0) {
$pageId = $this->countDocPages() + $pageId;
$pageId = $this->getPageCount() + $pageId;
}
if (!in_array($pageId, $allPages)) {
error_log("Page index '" . $pageId . "' is not present in source document");
Expand Down
2 changes: 1 addition & 1 deletion tests/ClientTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ public function testCutOptions()
$this->expectException(MindeeHttpClientException::class);
$pageOptions = new PageOptions(range(0, 4));
$this->dummyClient->parse(ReceiptV5::class, $inputDoc, null, $pageOptions);
$this->assertEquals(5, $inputDoc->countDocPages());
$this->assertEquals(5, $inputDoc->getPageCount());
}

public function testAsyncWrongInitialDelay()
Expand Down
21 changes: 11 additions & 10 deletions tests/Extraction/InvoiceSplitterAutoExtractionTestFunctional.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,11 @@ class PdfExtractorTest extends TestCase

private function prepareInvoiceReturn(string $rstFilePath, Document $invoicePrediction): string
{
$rstContent = file_get_contents($rstFilePath);
$fileContent = file_get_contents($rstFilePath);
$parsingVersion = $invoicePrediction->inference->product->version;
$parsingId = $invoicePrediction->id;
$rstContent = str_replace(TestingUtilities::getVersion($rstContent), $parsingVersion, $rstContent);
$rstContent = str_replace(TestingUtilities::getId($rstContent), $parsingId, $rstContent);
return $rstContent;
$rstContent = str_replace(TestingUtilities::getVersion($fileContent), $parsingVersion, $fileContent);
return str_replace(TestingUtilities::getId($rstContent), $parsingId, $rstContent);
}

/**
Expand All @@ -31,7 +30,8 @@ private function prepareInvoiceReturn(string $rstFilePath, Document $invoicePred
public function testPdfShouldExtractInvoicesStrict()
{
$client = new Client();
$invoiceSplitterInput = new PathInput((getenv('GITHUB_WORKSPACE') ?: ".") . self::PRODUCT_DATA_DIR . '/invoice_splitter/default_sample.pdf');
$rootDir = (getenv('GITHUB_WORKSPACE') ?: ".") . self::PRODUCT_DATA_DIR;
$invoiceSplitterInput = new PathInput(filePath: $rootDir . '/invoice_splitter/default_sample.pdf');
$response = $client->enqueueAndParse(InvoiceSplitterV1::class, $invoiceSplitterInput);
$inference = $response->document->inference;
$pdfExtractor = new PdfExtractor($invoiceSplitterInput);
Expand All @@ -44,17 +44,18 @@ public function testPdfShouldExtractInvoicesStrict()
$this->assertEquals('default_sample_002-002.pdf', $extractedPdfsStrict[1]->getFilename());

$invoice0 = $client->parse(InvoiceV4::class, $extractedPdfsStrict[0]->asInputSource());
$testStringRstInvoice0 = $this->prepareInvoiceReturn((getenv('GITHUB_WORKSPACE') ?: ".") .
self::PRODUCT_DATA_DIR . '/invoices/response_v4/summary_full_invoice_p1.rst',
$invoice0->document

$testStringRstInvoice0 = $this->prepareInvoiceReturn(
rstFilePath: $rootDir . '/invoices/response_v4/summary_full_invoice_p1.rst',
invoicePrediction: $invoice0->document
);

$this->assertGreaterThan(
0.97,
0.90,
TestingUtilities::levenshteinRatio(
$testStringRstInvoice0,
(string)$invoice0->document
)
);
}
}
}
19 changes: 10 additions & 9 deletions tests/Input/LocalInputSourceTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,14 @@ protected function tearDown(): void
public function testPDFCountPages()
{
$inputObj = new PathInput($this->fileTypesDir . "pdf/multipage.pdf");
$this->assertEquals(12, $inputObj->countDocPages());
$this->assertEquals(12, $inputObj->getPageCount());
}

public function testPDFReconstructOK()
{
$inputObj = new PathInput($this->fileTypesDir . "pdf/multipage.pdf");
$inputObj->applyPageOptions(new PageOptions([0, 1, 2, 3, 4], KEEP_ONLY, 2));
$this->assertEquals(5, $inputObj->countDocPages());
$this->assertEquals(5, $inputObj->getPageCount());
}

public function testPDFReadContents()
Expand All @@ -112,7 +112,7 @@ public function testPDFCutNPages(array $indexes)
$pageCountBasePdf = $basePdf->setSourceFile($inputObj->fileObject->getFilename());
$basePdf->Close();
$cutPdf->Close();
$this->assertEquals(count($indexes), $inputObj->countDocPages());
$this->assertEquals(count($indexes), $inputObj->getPageCount());
$this->assertEquals($pageCountCutPdf, $pageCountBasePdf);

$basePdf = new FPDI();
Expand Down Expand Up @@ -148,36 +148,36 @@ public function testPDFKeep5FirstPages()
{
$inputObj = new PathInput($this->fileTypesDir . "pdf/multipage.pdf");
$inputObj->applyPageOptions(new PageOptions([0, 1, 2, 3, 4], KEEP_ONLY, 2));
$this->assertEquals(5, $inputObj->countDocPages());
$this->assertEquals(5, $inputObj->getPageCount());
}

public function testPDFKeepInvalidPages()
{
$inputObj = new PathInput($this->fileTypesDir . "pdf/multipage.pdf");
$inputObj->applyPageOptions(new PageOptions([0, 1, 17], KEEP_ONLY, 2));
$this->assertEquals(2, $inputObj->countDocPages());
$this->assertEquals(2, $inputObj->getPageCount());
}

public function testPDFRemove5LastPages()
{

$inputObj = new PathInput($this->fileTypesDir . "pdf/multipage.pdf");
$inputObj->applyPageOptions(new PageOptions([-5, -4, -3, -2, -1], REMOVE, 2));
$this->assertEquals(7, $inputObj->countDocPages());
$this->assertEquals(7, $inputObj->getPageCount());
}

public function testPDFRemove5FirstPages()
{
$inputObj = new PathInput($this->fileTypesDir . "pdf/multipage.pdf");
$inputObj->applyPageOptions(new PageOptions([0, 1, 2, 3, 4], REMOVE, 2));
$this->assertEquals(7, $inputObj->countDocPages());
$this->assertEquals(7, $inputObj->getPageCount());
}

public function testPDFRemoveInvalidPages()
{
$inputObj = new PathInput($this->fileTypesDir . "pdf/multipage.pdf");
$inputObj->applyPageOptions(new PageOptions([16], REMOVE, 2));
$this->assertEquals(12, $inputObj->countDocPages());
$this->assertEquals(12, $inputObj->getPageCount());
}

public function testPDFKeepNoPages()
Expand All @@ -191,7 +191,8 @@ public function testPDFRemoveAllPages()
{
$inputObj = new PathInput($this->fileTypesDir . "pdf/multipage.pdf");
$this->expectException(MindeePDFException::class);
$inputObj->applyPageOptions(new PageOptions(range(0, $inputObj->countDocPages() - 1), REMOVE, 2));
$pageOptions = new PageOptions(range(0, $inputObj->getPageCount() - 1), REMOVE, 2);
$inputObj->applyPageOptions(pageOptions: $pageOptions);
}

public function testPDFInputFromFile()
Expand Down