Skip to content

Commit 3081324

Browse files
♻️ move more internals into v1/v2 (#194)
1 parent f4cc0ef commit 3081324

16 files changed

Lines changed: 210 additions & 150 deletions

bin/MindeeCliCommand.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@
55
namespace Mindee\Cli;
66

77
use Exception;
8-
use Mindee\Error\V1\MindeeV1HttpException;
98
use Mindee\Input\InputSource;
109
use Mindee\Input\PageOptions;
1110
use Mindee\Input\PathInput;
1211
use Mindee\Input\UrlInputSource;
1312
use Mindee\V1\Client;
1413
use Mindee\V1\ClientOptions\PredictMethodOptions;
1514
use Mindee\V1\ClientOptions\PredictOptions;
15+
use Mindee\V1\Error\MindeeV1HttpException;
1616
use Mindee\V1\Parsing\Common\AsyncPredictResponse;
1717
use Mindee\V1\Parsing\Common\PredictResponse;
1818
use Symfony\Component\Console\Command\Command;

src/Image/ImageExtractor.php

Lines changed: 0 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,13 @@
99
use ImagickException;
1010
use Mindee\Dependency\DependencyChecker;
1111
use Mindee\Error\ErrorCode;
12-
use Mindee\Error\MindeeGeometryException;
1312
use Mindee\Error\MindeeImageException;
1413
use Mindee\Error\MindeePdfException;
1514
use Mindee\Geometry\BBox;
1615
use Mindee\Geometry\BBoxUtils;
1716
use Mindee\Geometry\Point;
1817
use Mindee\Geometry\Polygon;
1918
use Mindee\Input\LocalInputSource;
20-
use Mindee\V1\Parsing\Standard\BaseField;
2119

2220
use function count;
2321
use function sprintf;
@@ -128,20 +126,6 @@ public function getPageCount(): int
128126
return count($this->pageImages);
129127
}
130128

131-
/**
132-
* Extract multiple images on a given page from a list of fields having position data.
133-
*
134-
* @param array<BaseField<string|float|integer|boolean|Polygon>> $fields List of Fields to extract.
135-
* @param integer $pageIndex The page index to extract, begins at 0.
136-
* @param null|string $outputName The base output filename, must have an image extension.
137-
*
138-
* @return array<ExtractedImage> a list of extracted images
139-
*/
140-
public function extractImagesFromPage(array $fields, int $pageIndex, ?string $outputName = null): array
141-
{
142-
$outputName ??= $this->filename;
143-
return $this->extractFromPage($fields, $pageIndex, $outputName);
144-
}
145129

146130
/**
147131
* Extracts images from a page.
@@ -212,47 +196,6 @@ public function extractPolygonFromPage(
212196
return new ExtractedImage($extractedImageData, $filename, $format, $pageIndex, $index);
213197
}
214198

215-
/**
216-
* Extracts a single image from a Position field.
217-
*
218-
* @param BaseField<string|float|integer|boolean|Polygon> $field The field to extract.
219-
* @param integer $pageIndex The page index to extract, begins at 0.
220-
* @param integer $index The index to use for naming the extracted image.
221-
* @param string $filename The output filename.
222-
* @param string $format The output format.
223-
*
224-
* @return null|ExtractedImage The extracted image, or null if the field does not have valid position data.
225-
*
226-
* @throws MindeeGeometryException Throws if a field does not contain positional data.
227-
*/
228-
public function extractImage(
229-
BaseField $field,
230-
int $pageIndex,
231-
int $index,
232-
string $filename,
233-
string $format
234-
): ?ExtractedImage {
235-
$polygon = null;
236-
237-
if (!empty($field->polygon)) {
238-
$polygon = $field->polygon;
239-
} elseif (!empty($field->boundingBox)) {
240-
$polygon = $field->boundingBox;
241-
} elseif (!empty($field->quadrangle)) {
242-
$polygon = $field->quadrangle;
243-
} elseif (!empty($field->rectangle)) {
244-
$polygon = $field->rectangle;
245-
}
246-
247-
if (null === $polygon) {
248-
throw new MindeeGeometryException(
249-
'Provided field has no valid position data.',
250-
ErrorCode::GEOMETRIC_OPERATION_FAILED
251-
);
252-
}
253-
254-
return $this->extractPolygonFromPage($polygon, $pageIndex, $index, $filename, $format);
255-
}
256199

257200
/**
258201
* Getter for the local input source.
@@ -262,33 +205,6 @@ public function getInputSource(): LocalInputSource
262205
return $this->inputSource;
263206
}
264207

265-
/**
266-
* Extracts images from a page.
267-
*
268-
* @param array<BaseField<string|float|integer|boolean|Polygon>> $fields List of Fields to extract.
269-
* @param integer $pageIndex The page index to extract, begins at 0.
270-
* @param string $outputName Name of the created file.
271-
* @param string $format The output format.
272-
*
273-
* @return array<ExtractedImage> An array of created images
274-
*/
275-
protected function extractFromPage(array $fields, int $pageIndex, string $outputName, string $format = 'jpg'): array
276-
{
277-
$extractedImages = [];
278-
279-
$i = 0;
280-
foreach ($fields as $field) {
281-
$filename = sprintf('%s_page%d-%d.%s', $outputName, $pageIndex, $i, $format);
282-
$extractedImage = $this->extractImage($field, $pageIndex, $i, $filename, $format);
283-
if (null !== $extractedImage) {
284-
$extractedImages[] = $extractedImage;
285-
}
286-
++$i;
287-
}
288-
289-
return $extractedImages;
290-
}
291-
292208
/**
293209
* Extracts an image from a set of coordinates.
294210
*

src/Pdf/PdfExtractor.php

Lines changed: 7 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,13 @@
1010
use Mindee\Dependency\DependencyChecker;
1111
use Mindee\Error\MindeePdfException;
1212
use Mindee\Input\LocalInputSource;
13-
use Mindee\V1\Product\InvoiceSplitter\InvoiceSplitterV1InvoicePageGroups;
1413
use setasign\Fpdi\Fpdi;
1514
use setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException;
1615
use setasign\Fpdi\PdfParser\Filter\FilterException;
1716
use setasign\Fpdi\PdfParser\PdfParserException;
1817
use setasign\Fpdi\PdfReader\PdfReaderException;
1918

2019
use function count;
21-
use function is_array;
2220
use function sprintf;
2321

2422
/**
@@ -39,8 +37,8 @@ class PdfExtractor
3937
/**
4038
* @param LocalInputSource $localInput Local Input, accepts all compatible formats.
4139
*
42-
* @throws MindeePdfException Throws if PDF operations aren't supported, or if the file
43-
* can't be read, respectively.
40+
* @throws MindeePdfException|ImagickException Throws if PDF operations aren't supported, or if the file
41+
* can't be read, respectively.
4442
*/
4543
public function __construct(LocalInputSource $localInput)
4644
{
@@ -86,14 +84,14 @@ public function getPageCount(): int
8684
/**
8785
* Extracts sub-documents from the source document using list of page indexes.
8886
*
89-
* @param array<array<integer>>|InvoiceSplitterV1InvoicePageGroups $pageIndexes List of sub-lists of pages to keep.
87+
* @param array<array<integer>> $pageIndexes List of sub-lists of pages to keep.
9088
*
9189
* @return ExtractedPdf[] list of extracted documents
9290
*
9391
* @throws MindeePdfException Throws if FDPF/FPDI wasn't able to handle the pdf during the extraction.
9492
* @throws InvalidArgumentException Throws if invalid indexes are provided.
9593
*/
96-
public function extractSubDocuments(array|InvoiceSplitterV1InvoicePageGroups $pageIndexes): array
94+
public function extractSubDocuments(array $pageIndexes): array
9795
{
9896
$extractedPdfs = [];
9997

@@ -141,58 +139,17 @@ public function extractSubDocuments(array|InvoiceSplitterV1InvoicePageGroups $pa
141139
/**
142140
* Extracts invoices as complete PDFs from the document.
143141
*
144-
* @param array<array<integer>>|InvoiceSplitterV1InvoicePageGroups $pageIndexes List of sub-lists of pages to keep.
142+
* @param array<array<integer>> $pageIndexes List of sub-lists of pages to keep.
145143
* @param boolean $strict Whether to trust confidence scores or not.
146144
*
147145
* @return ExtractedPdf[] a list of extracted invoices
148146
*/
149-
public function extractInvoices(array|InvoiceSplitterV1InvoicePageGroups $pageIndexes, bool $strict = false): array
147+
public function extractInvoices(array $pageIndexes, bool $strict = false): array
150148
{
151149
if (empty($pageIndexes)) {
152150
return [];
153151
}
154-
if (!$strict) {
155-
$indexes = array_map(static fn($invoicePageIndexes) => $invoicePageIndexes->pageIndexes, (array) $pageIndexes);
156-
157-
return $this->extractSubDocuments($indexes);
158-
}
159-
if (is_array($pageIndexes[0])) {
160-
return $this->extractSubDocuments($pageIndexes);
161-
}
162-
163-
$correctPageIndexes = [];
164-
$currentList = [];
165-
$previousConfidence = null;
166-
167-
$i = 0;
168-
foreach ($pageIndexes as $pageIndex) {
169-
$confidence = $pageIndex->confidence;
170-
$pageList = $pageIndex->pageIndexes;
171-
172-
if ($confidence >= 0.5 && null === $previousConfidence) {
173-
$currentList = $pageList;
174-
} elseif ($confidence >= 0.5 && $i !== count($pageIndexes) - 1) {
175-
if (!empty($currentList)) {
176-
$correctPageIndexes[] = $currentList;
177-
}
178-
$currentList = $pageList;
179-
} elseif ($confidence < 0.5 && $i === count($pageIndexes) - 1) {
180-
$currentList = array_merge($currentList, $pageList);
181-
if (!empty($currentList)) {
182-
$correctPageIndexes[] = $currentList;
183-
}
184-
} else {
185-
if (!empty($currentList)) {
186-
$correctPageIndexes[] = $currentList;
187-
}
188-
$correctPageIndexes[] = $pageList;
189-
}
190-
191-
$previousConfidence = $confidence;
192-
++$i;
193-
}
194-
195-
return $this->extractSubDocuments($correctPageIndexes);
152+
return $this->extractSubDocuments($pageIndexes);
196153
}
197154

198155
/**

src/V1/Client.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,14 @@
1616
use Mindee\Error\ErrorCode;
1717
use Mindee\Error\MindeeApiException;
1818
use Mindee\Error\MindeeException;
19-
use Mindee\Error\V1\MindeeV1ClientException;
20-
use Mindee\Error\V1\MindeeV1HttpException;
2119
use Mindee\Input\InputSource;
2220
use Mindee\Input\LocalInputSource;
2321
use Mindee\Input\LocalResponse;
2422
use Mindee\Input\PageOptions;
2523
use Mindee\V1\ClientOptions\PredictMethodOptions;
2624
use Mindee\V1\ClientOptions\WorkflowOptions;
25+
use Mindee\V1\Error\MindeeV1ClientException;
26+
use Mindee\V1\Error\MindeeV1HttpException;
2727
use Mindee\V1\Http\Endpoint;
2828
use Mindee\V1\Http\MindeeApi;
2929
use Mindee\V1\Http\MindeeWorkflowApi;
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Mindee Client Exceptions.
88
*/
99

10-
namespace Mindee\Error\V1;
10+
namespace Mindee\V1\Error;
1111

1212
use Mindee\Error\MindeeException;
1313

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Mindee HTTP Exceptions.
88
*/
99

10-
namespace Mindee\Error\V1;
10+
namespace Mindee\V1\Error;
1111

1212
use Mindee\Error\ErrorCode;
1313
use Mindee\Error\MindeeException;

src/V1/Image/ImageExtractor.php

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,102 @@
44

55
namespace Mindee\V1\Image;
66

7+
use Mindee\Error\ErrorCode;
8+
use Mindee\Error\MindeeGeometryException;
9+
use Mindee\Geometry\Polygon;
10+
use Mindee\Image\ExtractedImage;
711
use Mindee\Image\ImageExtractor as BaseImageExtractor;
12+
use Mindee\V1\Parsing\Standard\BaseField;
13+
14+
use function sprintf;
815

916
/**
1017
* Wrapper class for V1 of the BaseImageExtractor.
1118
*/
12-
class ImageExtractor extends BaseImageExtractor {}
19+
class ImageExtractor extends BaseImageExtractor
20+
{
21+
/**
22+
* Extract multiple images on a given page from a list of fields having position data.
23+
*
24+
* @param array<BaseField<string|float|integer|boolean|Polygon>> $fields List of Fields to extract.
25+
* @param integer $pageIndex The page index to extract, begins at 0.
26+
* @param null|string $outputName The base output filename, must have an image extension.
27+
*
28+
* @return array<ExtractedImage> a list of extracted images
29+
*/
30+
public function extractImagesFromPage(array $fields, int $pageIndex, ?string $outputName = null): array
31+
{
32+
$outputName ??= $this->filename;
33+
return $this->extractFromPage($fields, $pageIndex, $outputName);
34+
}
35+
36+
/**
37+
* Extracts a single image from a Position field.
38+
*
39+
* @param BaseField<string|float|integer|boolean|Polygon> $field The field to extract.
40+
* @param integer $pageIndex The page index to extract, begins at 0.
41+
* @param integer $index The index to use for naming the extracted image.
42+
* @param string $filename The output filename.
43+
* @param string $format The output format.
44+
*
45+
* @return null|ExtractedImage The extracted image, or null if the field does not have valid position data.
46+
*
47+
* @throws MindeeGeometryException Throws if a field does not contain positional data.
48+
*/
49+
public function extractImage(
50+
BaseField $field,
51+
int $pageIndex,
52+
int $index,
53+
string $filename,
54+
string $format
55+
): ?ExtractedImage {
56+
$polygon = null;
57+
58+
if (!empty($field->polygon)) {
59+
$polygon = $field->polygon;
60+
} elseif (!empty($field->boundingBox)) {
61+
$polygon = $field->boundingBox;
62+
} elseif (!empty($field->quadrangle)) {
63+
$polygon = $field->quadrangle;
64+
} elseif (!empty($field->rectangle)) {
65+
$polygon = $field->rectangle;
66+
}
67+
68+
if (null === $polygon) {
69+
throw new MindeeGeometryException(
70+
'Provided field has no valid position data.',
71+
ErrorCode::GEOMETRIC_OPERATION_FAILED
72+
);
73+
}
74+
75+
return $this->extractPolygonFromPage($polygon, $pageIndex, $index, $filename, $format);
76+
}
77+
78+
/**
79+
* Extracts images from a page.
80+
*
81+
* @param array<BaseField<string|float|integer|boolean|Polygon>> $fields List of Fields to extract.
82+
* @param integer $pageIndex The page index to extract, begins at 0.
83+
* @param string $outputName Name of the created file.
84+
* @param string $format The output format.
85+
*
86+
* @return array<ExtractedImage> An array of created images
87+
*/
88+
protected function extractFromPage(array $fields, int $pageIndex, string $outputName, string $format = 'jpg'): array
89+
{
90+
$extractedImages = [];
91+
92+
$i = 0;
93+
foreach ($fields as $field) {
94+
$filename = sprintf('%s_page%d-%d.%s', $outputName, $pageIndex, $i, $format);
95+
$extractedImage = $this->extractImage($field, $pageIndex, $i, $filename, $format);
96+
if (null !== $extractedImage) {
97+
$extractedImages[] = $extractedImage;
98+
}
99+
++$i;
100+
}
101+
102+
return $extractedImages;
103+
}
104+
105+
}

0 commit comments

Comments
 (0)