Skip to content

Commit c0c0ad8

Browse files
♻️ move page count to attribute
1 parent f1f0130 commit c0c0ad8

12 files changed

Lines changed: 65 additions & 49 deletions

src/Image/ImageExtractor.php

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ class ImageExtractor
4444
* @var LocalInputSource Local input object used by the ImageExtractor.
4545
*/
4646
protected LocalInputSource $inputSource;
47+
/**
48+
* @var integer Number of pages in the document.
49+
*/
50+
public int $pageCount;
4751

4852
/**
4953
* @param LocalInputSource $localInput Local input, accepts all compatible formats.
@@ -84,6 +88,7 @@ public function __construct(LocalInputSource $localInput, ?string $saveFormat =
8488
}
8589
$this->pageImages[] = $image;
8690
}
91+
$this->pageCount = count($this->pageImages);
8792
}
8893

8994
/**
@@ -117,15 +122,6 @@ public static function pdfToImages(string $fileBytes): array
117122
}
118123
}
119124

120-
/**
121-
* Gets the number of pages in the file.
122-
* @return integer Page count.
123-
*/
124-
public function getPageCount(): int
125-
{
126-
return count($this->pageImages);
127-
}
128-
129125

130126
/**
131127
* Extracts images from a page.

src/Input/LocalInputSource.php

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@
1010

1111
use CURLFile;
1212
use Exception;
13+
use Mindee\Dependency\DependencyChecker;
1314
use Mindee\Error\ErrorCode;
1415
use Mindee\Error\MindeeMimeTypeException;
1516
use Mindee\Error\MindeePdfException;
1617
use Mindee\Error\MindeeSourceException;
18+
use Mindee\Error\MindeeUnhandledException;
1719
use Mindee\Image\ImageCompressor;
1820
use Mindee\Pdf\PdfCompressor;
1921
use Mindee\Pdf\PdfUtils;
@@ -63,6 +65,11 @@ abstract class LocalInputSource extends InputSource
6365
*/
6466
public ?string $filePath = null;
6567

68+
/**
69+
* @var integer|null Page count.
70+
*/
71+
public ?int $pageCount = null;
72+
6673
/**
6774
* Checks if the file needs fixing.
6875
*/
@@ -101,6 +108,16 @@ private function checkMimeType(): void
101108
public function __construct()
102109
{
103110
$this->checkMimeType();
111+
try {
112+
DependencyChecker::isGhostscriptAvailable();
113+
if ($this->isPdf()) {
114+
$this->pageCount = $this->getPageCount();
115+
} else {
116+
$this->pageCount = 1;
117+
}
118+
} catch (MindeeUnhandledException) {
119+
error_log("PDF-handling features not available, page count set to null.");
120+
}
104121
}
105122

106123
/**
@@ -121,7 +138,7 @@ public function isPdf(): bool
121138
* @throws MindeePdfException Throws if the source pdf can't be properly processed.
122139
* @throws MindeeSourceException Throws if the source isn't a pdf.
123140
*/
124-
public function getPageCount(): int
141+
protected function getPageCount(): int
125142
{
126143
if (!$this->isPdf()) {
127144
throw new MindeeSourceException(
@@ -141,15 +158,6 @@ public function getPageCount(): int
141158
}
142159
}
143160

144-
/**
145-
* @return integer
146-
* @deprecated
147-
*/
148-
public function countDocPages(): int
149-
{
150-
return $this->getPageCount();
151-
}
152-
153161
/**
154162
* @param string $fileBytes Raw data as bytes.
155163
*/
@@ -377,5 +385,6 @@ public function applyPageOptions(?PageOptions $pageOptions): void
377385
);
378386
}
379387
$this->mergePdfPages($pagesToKeep);
388+
$this->pageCount = $this->getPageCount();
380389
}
381390
}

src/Pdf/ExtractedPdf.php

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@
1919
*/
2020
class ExtractedPdf
2121
{
22+
/**
23+
* The number of pages in the file.
24+
*/
25+
public int $pageCount;
26+
2227
/**
2328
* Initializes a new instance of the ExtractedPdf class.
2429
*
@@ -31,6 +36,7 @@ public function __construct(protected string $pdfBytes, public string $filename)
3136
{
3237
DependencyChecker::isImageMagickAvailable();
3338
DependencyChecker::isGhostscriptAvailable();
39+
$this->pageCount = $this->getPageCount();
3440
}
3541

3642
/**
@@ -40,7 +46,7 @@ public function __construct(protected string $pdfBytes, public string $filename)
4046
*
4147
* @throws MindeePdfException Throws if FPDI is unable to process the file.
4248
*/
43-
public function getPageCount(): int
49+
private function getPageCount(): int
4450
{
4551
try {
4652
$pdfHandle = new Fpdi();

src/Pdf/PdfExtractor.php

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ class PdfExtractor
3333
* @var string name of the file
3434
*/
3535
private readonly string $fileName;
36+
/**
37+
* @var integer number of pages in the file
38+
*/
39+
public int $pageCount;
3640

3741
/**
3842
* @param LocalInputSource $localInput Local Input, accepts all compatible formats.
@@ -58,6 +62,7 @@ public function __construct(LocalInputSource $localInput)
5862
$image->setImageFormat('pdf');
5963
$this->pdfBytes = $image->getImageBlob();
6064
}
65+
$this->pageCount = $this->getPageCount();
6166
}
6267

6368
/**
@@ -67,7 +72,7 @@ public function __construct(LocalInputSource $localInput)
6772
*
6873
* @throws MindeePdfException Throws if FPDI is unable to process the file.
6974
*/
70-
public function getPageCount(): int
75+
private function getPageCount(): int
7176
{
7277
try {
7378
$pdfHandle = new Fpdi();

src/V1/Product/InvoiceSplitter/InvoiceSplitterV1InvoicePageGroup.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class InvoiceSplitterV1InvoicePageGroup implements Stringable
1818
use FieldPositionMixin;
1919

2020
/**
21-
* @var int[] List of page indexes that belong to the same invoice (group).
21+
* @var integer[] List of page indexes that belong to the same invoice (group).
2222
*/
2323
public array $pageIndexes;
2424

tests/ClientTest.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ public function testCutOptions(): void
103103
$this->expectException(MindeeV1HttpException::class);
104104
$pageOptions = new PageOptions(range(0, 4));
105105
$this->dummyClient->parse(ReceiptV5::class, $inputDoc, null, $pageOptions);
106-
self::assertSame(5, $inputDoc->getPageCount());
106+
self::assertSame(5, $inputDoc->pageCount);
107107
}
108108

109109
public function testAsyncWrongInitialDelay(): void

tests/Input/LocalInputSourceTest.php

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,14 @@ protected function tearDown(): void
7272
public function testPdfCountPages(): void
7373
{
7474
$inputObj = new PathInput(TestingUtilities::getFileTypesDir() . "/pdf/multipage.pdf");
75-
self::assertSame(12, $inputObj->getPageCount());
75+
self::assertSame(12, $inputObj->pageCount);
7676
}
7777

7878
public function testPdfReconstructOK(): void
7979
{
8080
$inputObj = new PathInput(TestingUtilities::getFileTypesDir() . "/pdf/multipage.pdf");
8181
$inputObj->applyPageOptions(new PageOptions([0, 1, 2, 3, 4], KEEP_ONLY, 2));
82-
self::assertSame(5, $inputObj->getPageCount());
82+
self::assertSame(5, $inputObj->pageCount);
8383
}
8484

8585
public function testPdfReadContents(): void
@@ -105,7 +105,7 @@ public function testPdfCutNPages(array $indexes): void
105105
$pageCountBasePdf = $basePdf->setSourceFile($inputObj->fileObject->getFilename());
106106
$basePdf->Close();
107107
$cutPdf->Close();
108-
self::assertSame(count($indexes), $inputObj->getPageCount());
108+
self::assertSame(count($indexes), $inputObj->pageCount);
109109
self::assertSame($pageCountCutPdf, $pageCountBasePdf);
110110

111111
$basePdf = new Fpdi();
@@ -138,36 +138,36 @@ public function testPdfKeep5FirstPages(): void
138138
{
139139
$inputObj = new PathInput(TestingUtilities::getFileTypesDir() . "/pdf/multipage.pdf");
140140
$inputObj->applyPageOptions(new PageOptions([0, 1, 2, 3, 4], KEEP_ONLY, 2));
141-
self::assertSame(5, $inputObj->getPageCount());
141+
self::assertSame(5, $inputObj->pageCount);
142142
}
143143

144144
public function testPdfKeepInvalidPages(): void
145145
{
146146
$inputObj = new PathInput(TestingUtilities::getFileTypesDir() . "/pdf/multipage.pdf");
147147
$inputObj->applyPageOptions(new PageOptions([0, 1, 17], KEEP_ONLY, 2));
148-
self::assertSame(2, $inputObj->getPageCount());
148+
self::assertSame(2, $inputObj->pageCount);
149149
}
150150

151151
public function testPdfRemove5LastPages(): void
152152
{
153153

154154
$inputObj = new PathInput(TestingUtilities::getFileTypesDir() . "/pdf/multipage.pdf");
155155
$inputObj->applyPageOptions(new PageOptions([-5, -4, -3, -2, -1], REMOVE, 2));
156-
self::assertSame(7, $inputObj->getPageCount());
156+
self::assertSame(7, $inputObj->pageCount);
157157
}
158158

159159
public function testPdfRemove5FirstPages(): void
160160
{
161161
$inputObj = new PathInput(TestingUtilities::getFileTypesDir() . "/pdf/multipage.pdf");
162162
$inputObj->applyPageOptions(new PageOptions([0, 1, 2, 3, 4], REMOVE, 2));
163-
self::assertSame(7, $inputObj->getPageCount());
163+
self::assertSame(7, $inputObj->pageCount);
164164
}
165165

166166
public function testPdfRemoveInvalidPages(): void
167167
{
168168
$inputObj = new PathInput(TestingUtilities::getFileTypesDir() . "/pdf/multipage.pdf");
169169
$inputObj->applyPageOptions(new PageOptions([16], REMOVE, 2));
170-
self::assertSame(12, $inputObj->getPageCount());
170+
self::assertSame(12, $inputObj->pageCount);
171171
}
172172

173173
public function testPdfKeepNoPages(): void
@@ -181,7 +181,7 @@ public function testPdfRemoveAllPages(): void
181181
{
182182
$inputObj = new PathInput(TestingUtilities::getFileTypesDir() . "/pdf/multipage.pdf");
183183
$this->expectException(MindeePdfException::class);
184-
$pageOptions = new PageOptions(range(0, $inputObj->getPageCount() - 1), REMOVE, 2);
184+
$pageOptions = new PageOptions(range(0, $inputObj->pageCount - 1), REMOVE, 2);
185185
$inputObj->applyPageOptions(pageOptions: $pageOptions);
186186
}
187187

tests/V1/Image/ImageExtractorTest.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ public function testGivenAnImageShouldExtractPositionFields(): void
3333
$inference = $response->document->inference;
3434

3535
$extractor = new ImageExtractor($image);
36-
self::assertSame(1, $extractor->getPageCount());
36+
self::assertSame(1, $extractor->pageCount);
3737

3838
foreach ($inference->pages as $page) {
3939
$subImages = $extractor->extractImagesFromPage($page->prediction->receipts, $page->id);
@@ -64,7 +64,7 @@ public function testGivenAnImageShouldExtractValueFields(): void
6464
$inference = $response->document->inference;
6565

6666
$extractor = new ImageExtractor($image);
67-
self::assertSame(1, $extractor->getPageCount());
67+
self::assertSame(1, $extractor->pageCount);
6868

6969
foreach ($inference->pages as $page) {
7070
$codes1D = $extractor->extractImagesFromPage($page->prediction->codes1D, $page->id, "barcodes_1D.jpg");
@@ -96,7 +96,7 @@ public function testGivenAPdfShouldExtractPositionFields(): void
9696
self::assertNotEmpty($imageInput->readContents()[1]);
9797

9898
$extractor = new ImageExtractor($imageInput);
99-
self::assertSame(2, $extractor->getPageCount());
99+
self::assertSame(2, $extractor->pageCount);
100100

101101
foreach ($inference->pages as $page) {
102102
$subImages = $extractor->extractImagesFromPage($page->prediction->receipts, $page->id);

tests/V1/Image/InvoiceSplitterAutoExtractionTestFunctional.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ public function testPdfShouldExtractInvoicesStrict(): void
4141
$response = $client->enqueueAndParse(InvoiceSplitterV1::class, $invoiceSplitterInput);
4242
$inference = $response->document->inference;
4343
$pdfExtractor = new PdfExtractor($invoiceSplitterInput);
44-
self::assertSame(2, $pdfExtractor->getPageCount());
44+
self::assertSame(2, $pdfExtractor->pageCount);
4545

4646
$extractedPdfsStrict = $pdfExtractor->extractInvoices($inference->prediction->invoicePageGroups);
4747

tests/V1/Pdf/PdfExtractorTest.php

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public function testGivenAnImageShouldExtractAPdf(): void
2626
$localInput = new PathInput($jpg);
2727
self::assertFalse($localInput->isPdf());
2828
$extractor = new PdfExtractor($localInput);
29-
self::assertSame(1, $extractor->getPageCount());
29+
self::assertSame(1, $extractor->pageCount);
3030
}
3131

3232
/**
@@ -39,16 +39,16 @@ public function testGivenAPdfShouldExtractInvoicesNoStrict(): void
3939
self::assertNotNull($response);
4040
$inference = $response->document->inference;
4141
$extractor = new PdfExtractor($pdf);
42-
self::assertSame(5, $extractor->getPageCount());
42+
self::assertSame(5, $extractor->pageCount);
4343

4444
$extractedPdfSNoStrict = $extractor->extractInvoices($inference->prediction->invoicePageGroups);
4545
self::assertCount(3, $extractedPdfSNoStrict);
4646
self::assertSame("invoice_5p_001-001.pdf", $extractedPdfSNoStrict[0]->getFileName());
47-
self::assertSame(1, $extractedPdfSNoStrict[0]->getPageCount());
47+
self::assertSame(1, $extractedPdfSNoStrict[0]->pageCount);
4848
self::assertSame("invoice_5p_002-004.pdf", $extractedPdfSNoStrict[1]->getFileName());
49-
self::assertSame(3, $extractedPdfSNoStrict[1]->getPageCount());
49+
self::assertSame(3, $extractedPdfSNoStrict[1]->pageCount);
5050
self::assertSame("invoice_5p_005-005.pdf", $extractedPdfSNoStrict[2]->getFileName());
51-
self::assertSame(1, $extractedPdfSNoStrict[2]->getPageCount());
51+
self::assertSame(1, $extractedPdfSNoStrict[2]->pageCount);
5252
}
5353

5454
/**
@@ -62,14 +62,14 @@ public function testGivenAPdfShouldExtractInvoicesStrict(): void
6262
$inference = $response->document->inference;
6363

6464
$extractor = new PdfExtractor($pdf);
65-
self::assertSame(5, $extractor->getPageCount());
65+
self::assertSame(5, $extractor->pageCount);
6666

6767
$extractedPdfStrict = $extractor->extractInvoices($inference->prediction->invoicePageGroups, true);
6868
self::assertCount(2, $extractedPdfStrict);
6969
self::assertSame("invoice_5p_001-001.pdf", $extractedPdfStrict[0]->getFileName());
70-
self::assertSame(1, $extractedPdfStrict[0]->getPageCount());
70+
self::assertSame(1, $extractedPdfStrict[0]->pageCount);
7171
self::assertSame("invoice_5p_002-005.pdf", $extractedPdfStrict[1]->getFileName());
72-
self::assertSame(4, $extractedPdfStrict[1]->getPageCount());
72+
self::assertSame(4, $extractedPdfStrict[1]->pageCount);
7373
}
7474

7575
private function getPrediction()

0 commit comments

Comments
 (0)