Skip to content

Commit 3f494f9

Browse files
authored
♻️ harmonize getting page count from a local input source (#151)
1 parent bc1ff92 commit 3f494f9

6 files changed

Lines changed: 38 additions & 27 deletions

examples/InvoiceSplitterAutoExtractionExample.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ function parseInvoice(string $filePath, Client $mindeeClient)
1010
{
1111
$inputSource = new PathInput($filePath);
1212

13-
if ($inputSource->isPdf() && $inputSource->countDocPages() > 1) {
13+
if ($inputSource->isPdf() && $inputSource->getPageCount() > 1) {
1414
parseMultiPage($inputSource, $mindeeClient);
1515
} else {
1616
parseSinglePage($inputSource, $mindeeClient);

examples/MultiReceiptsAutoExtractionExample.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ function processReceipts($client, $inputPath) {
1616
$imageExtractor = new ImageExtractor($inputSource);
1717

1818
$multiReceiptsResult = $client->parse(MultiReceiptsDetectorV1::class, $inputSource);
19-
$pageCount = $inputSource->countDocPages();
19+
$pageCount = $inputSource->getPageCount();
2020

2121
$totalExtractedReceipts = [];
2222

src/Input/LocalInputSource.php

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ public function isPDF(): bool
115115
* @throws MindeePDFException Throws if the source pdf can't be properly processed.
116116
* @throws MindeeSourceException Throws if the source isn't a pdf.
117117
*/
118-
public function countDocPages(): int
118+
public function getPageCount(): int
119119
{
120120
if (!$this->isPDF()) {
121121
throw new MindeeSourceException(
@@ -135,6 +135,15 @@ public function countDocPages(): int
135135
}
136136
}
137137

138+
/**
139+
* @return integer
140+
* @deprecated
141+
*/
142+
public function countDocPages(): int
143+
{
144+
return $this->getPageCount();
145+
}
146+
138147
/**
139148
* @param string $fileBytes Raw data as bytes.
140149
* @return void
@@ -326,15 +335,15 @@ public function applyPageOptions(?PageOptions $pageOptions): void
326335
ErrorCode::USER_INPUT_ERROR
327336
);
328337
}
329-
if ($this->countDocPages() < $pageOptions->onMinPage) {
338+
if ($this->getPageCount() < $pageOptions->onMinPage) {
330339
return;
331340
}
332-
$allPages = range(0, $this->countDocPages() - 1);
341+
$allPages = range(0, $this->getPageCount() - 1);
333342
$pagesToKeep = [];
334343
if ($pageOptions->operation == KEEP_ONLY) {
335344
foreach ($pageOptions->pageIndexes as $pageId) {
336345
if ($pageId < 0) {
337-
$pageId = $this->countDocPages() + $pageId;
346+
$pageId = $this->getPageCount() + $pageId;
338347
}
339348
if (!in_array($pageId, $allPages)) {
340349
error_log("Page index '" . $pageId . "' is not present in source document");
@@ -346,7 +355,7 @@ public function applyPageOptions(?PageOptions $pageOptions): void
346355
$pagesToRemove = [];
347356
foreach ($pageOptions->pageIndexes as $pageId) {
348357
if ($pageId < 0) {
349-
$pageId = $this->countDocPages() + $pageId;
358+
$pageId = $this->getPageCount() + $pageId;
350359
}
351360
if (!in_array($pageId, $allPages)) {
352361
error_log("Page index '" . $pageId . "' is not present in source document");

tests/ClientTest.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ public function testCutOptions()
104104
$this->expectException(MindeeHttpClientException::class);
105105
$pageOptions = new PageOptions(range(0, 4));
106106
$this->dummyClient->parse(ReceiptV5::class, $inputDoc, null, $pageOptions);
107-
$this->assertEquals(5, $inputDoc->countDocPages());
107+
$this->assertEquals(5, $inputDoc->getPageCount());
108108
}
109109

110110
public function testAsyncWrongInitialDelay()

tests/Extraction/InvoiceSplitterAutoExtractionTestFunctional.php

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,11 @@ class PdfExtractorTest extends TestCase
1616

1717
private function prepareInvoiceReturn(string $rstFilePath, Document $invoicePrediction): string
1818
{
19-
$rstContent = file_get_contents($rstFilePath);
19+
$fileContent = file_get_contents($rstFilePath);
2020
$parsingVersion = $invoicePrediction->inference->product->version;
2121
$parsingId = $invoicePrediction->id;
22-
$rstContent = str_replace(TestingUtilities::getVersion($rstContent), $parsingVersion, $rstContent);
23-
$rstContent = str_replace(TestingUtilities::getId($rstContent), $parsingId, $rstContent);
24-
return $rstContent;
22+
$rstContent = str_replace(TestingUtilities::getVersion($fileContent), $parsingVersion, $fileContent);
23+
return str_replace(TestingUtilities::getId($rstContent), $parsingId, $rstContent);
2524
}
2625

2726
/**
@@ -31,7 +30,8 @@ private function prepareInvoiceReturn(string $rstFilePath, Document $invoicePred
3130
public function testPdfShouldExtractInvoicesStrict()
3231
{
3332
$client = new Client();
34-
$invoiceSplitterInput = new PathInput((getenv('GITHUB_WORKSPACE') ?: ".") . self::PRODUCT_DATA_DIR . '/invoice_splitter/default_sample.pdf');
33+
$rootDir = (getenv('GITHUB_WORKSPACE') ?: ".") . self::PRODUCT_DATA_DIR;
34+
$invoiceSplitterInput = new PathInput(filePath: $rootDir . '/invoice_splitter/default_sample.pdf');
3535
$response = $client->enqueueAndParse(InvoiceSplitterV1::class, $invoiceSplitterInput);
3636
$inference = $response->document->inference;
3737
$pdfExtractor = new PdfExtractor($invoiceSplitterInput);
@@ -44,17 +44,18 @@ public function testPdfShouldExtractInvoicesStrict()
4444
$this->assertEquals('default_sample_002-002.pdf', $extractedPdfsStrict[1]->getFilename());
4545

4646
$invoice0 = $client->parse(InvoiceV4::class, $extractedPdfsStrict[0]->asInputSource());
47-
$testStringRstInvoice0 = $this->prepareInvoiceReturn((getenv('GITHUB_WORKSPACE') ?: ".") .
48-
self::PRODUCT_DATA_DIR . '/invoices/response_v4/summary_full_invoice_p1.rst',
49-
$invoice0->document
47+
48+
$testStringRstInvoice0 = $this->prepareInvoiceReturn(
49+
rstFilePath: $rootDir . '/invoices/response_v4/summary_full_invoice_p1.rst',
50+
invoicePrediction: $invoice0->document
5051
);
5152

5253
$this->assertGreaterThan(
53-
0.97,
54+
0.90,
5455
TestingUtilities::levenshteinRatio(
5556
$testStringRstInvoice0,
5657
(string)$invoice0->document
5758
)
5859
);
5960
}
60-
}
61+
}

tests/Input/LocalInputSourceTest.php

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,14 +79,14 @@ protected function tearDown(): void
7979
public function testPDFCountPages()
8080
{
8181
$inputObj = new PathInput($this->fileTypesDir . "pdf/multipage.pdf");
82-
$this->assertEquals(12, $inputObj->countDocPages());
82+
$this->assertEquals(12, $inputObj->getPageCount());
8383
}
8484

8585
public function testPDFReconstructOK()
8686
{
8787
$inputObj = new PathInput($this->fileTypesDir . "pdf/multipage.pdf");
8888
$inputObj->applyPageOptions(new PageOptions([0, 1, 2, 3, 4], KEEP_ONLY, 2));
89-
$this->assertEquals(5, $inputObj->countDocPages());
89+
$this->assertEquals(5, $inputObj->getPageCount());
9090
}
9191

9292
public function testPDFReadContents()
@@ -112,7 +112,7 @@ public function testPDFCutNPages(array $indexes)
112112
$pageCountBasePdf = $basePdf->setSourceFile($inputObj->fileObject->getFilename());
113113
$basePdf->Close();
114114
$cutPdf->Close();
115-
$this->assertEquals(count($indexes), $inputObj->countDocPages());
115+
$this->assertEquals(count($indexes), $inputObj->getPageCount());
116116
$this->assertEquals($pageCountCutPdf, $pageCountBasePdf);
117117

118118
$basePdf = new FPDI();
@@ -148,36 +148,36 @@ public function testPDFKeep5FirstPages()
148148
{
149149
$inputObj = new PathInput($this->fileTypesDir . "pdf/multipage.pdf");
150150
$inputObj->applyPageOptions(new PageOptions([0, 1, 2, 3, 4], KEEP_ONLY, 2));
151-
$this->assertEquals(5, $inputObj->countDocPages());
151+
$this->assertEquals(5, $inputObj->getPageCount());
152152
}
153153

154154
public function testPDFKeepInvalidPages()
155155
{
156156
$inputObj = new PathInput($this->fileTypesDir . "pdf/multipage.pdf");
157157
$inputObj->applyPageOptions(new PageOptions([0, 1, 17], KEEP_ONLY, 2));
158-
$this->assertEquals(2, $inputObj->countDocPages());
158+
$this->assertEquals(2, $inputObj->getPageCount());
159159
}
160160

161161
public function testPDFRemove5LastPages()
162162
{
163163

164164
$inputObj = new PathInput($this->fileTypesDir . "pdf/multipage.pdf");
165165
$inputObj->applyPageOptions(new PageOptions([-5, -4, -3, -2, -1], REMOVE, 2));
166-
$this->assertEquals(7, $inputObj->countDocPages());
166+
$this->assertEquals(7, $inputObj->getPageCount());
167167
}
168168

169169
public function testPDFRemove5FirstPages()
170170
{
171171
$inputObj = new PathInput($this->fileTypesDir . "pdf/multipage.pdf");
172172
$inputObj->applyPageOptions(new PageOptions([0, 1, 2, 3, 4], REMOVE, 2));
173-
$this->assertEquals(7, $inputObj->countDocPages());
173+
$this->assertEquals(7, $inputObj->getPageCount());
174174
}
175175

176176
public function testPDFRemoveInvalidPages()
177177
{
178178
$inputObj = new PathInput($this->fileTypesDir . "pdf/multipage.pdf");
179179
$inputObj->applyPageOptions(new PageOptions([16], REMOVE, 2));
180-
$this->assertEquals(12, $inputObj->countDocPages());
180+
$this->assertEquals(12, $inputObj->getPageCount());
181181
}
182182

183183
public function testPDFKeepNoPages()
@@ -191,7 +191,8 @@ public function testPDFRemoveAllPages()
191191
{
192192
$inputObj = new PathInput($this->fileTypesDir . "pdf/multipage.pdf");
193193
$this->expectException(MindeePDFException::class);
194-
$inputObj->applyPageOptions(new PageOptions(range(0, $inputObj->countDocPages() - 1), REMOVE, 2));
194+
$pageOptions = new PageOptions(range(0, $inputObj->getPageCount() - 1), REMOVE, 2);
195+
$inputObj->applyPageOptions(pageOptions: $pageOptions);
195196
}
196197

197198
public function testPDFInputFromFile()

0 commit comments

Comments
 (0)