-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathImageExtractor.php
More file actions
244 lines (217 loc) · 7.48 KB
/
Copy pathImageExtractor.php
File metadata and controls
244 lines (217 loc) · 7.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
<?php
declare(strict_types=1);
namespace Mindee\Image;
use Exception;
use Imagick;
use ImagickException;
use Mindee\Dependency\DependencyChecker;
use Mindee\Error\ErrorCode;
use Mindee\Error\MindeeImageException;
use Mindee\Error\MindeePdfException;
use Mindee\Geometry\BBox;
use Mindee\Geometry\BBoxUtils;
use Mindee\Geometry\Point;
use Mindee\Geometry\Polygon;
use Mindee\Input\LocalInputSource;
use function count;
use function sprintf;
/**
* Extract sub-images from an image.
*/
class ImageExtractor
{
/**
* @var Imagick[] Array of extracted page images.
*/
protected array $pageImages = [];
/**
* @var string Name of the file.
*/
protected string $filename;
/**
* @var string Format to save the image as.
*/
protected string $saveFormat;
/**
* @var LocalInputSource Local input object used by the ImageExtractor.
*/
protected LocalInputSource $inputSource;
/**
* @param LocalInputSource $localInput Local input, accepts all compatible formats.
* @param null|string $saveFormat Save format, will be coerced to jpg by default.
*
* @throws MindeePdfException Throws if PDF operations aren't supported, or if the file can't be read, respectively.
*/
public function __construct(LocalInputSource $localInput, ?string $saveFormat = null)
{
DependencyChecker::isImageMagickAvailable();
DependencyChecker::isGhostscriptAvailable();
$this->filename = $localInput->fileName;
$this->inputSource = $localInput;
$extension = pathinfo($localInput->fileName, PATHINFO_EXTENSION);
if (null === $saveFormat) {
if ($extension && 'pdf' !== strtolower($extension)) {
$this->saveFormat = $extension;
} else {
$this->saveFormat = 'jpg';
}
} else {
$this->saveFormat = $saveFormat;
}
if ($this->inputSource->isPdf()) {
$this->pageImages = static::pdfToImages($this->inputSource->readContents()[1]);
} else {
try {
$image = new Imagick();
$image->readImageBlob($this->inputSource->readContents()[1]);
} catch (ImagickException $e) {
throw new MindeePdfException(
"Image couldn't be processed.",
ErrorCode::IMAGE_CANT_PROCESS,
$e
);
}
$this->pageImages[] = $image;
}
}
/**
* Renders the input PDF's pages as individual images.
*
* @param string $fileBytes Input pdf.
*
* @return Imagick[] A list of pages.
*
* @throws MindeeImageException Throws if the image can't be handled.
*/
public static function pdfToImages(string $fileBytes): array
{
try {
$images = [];
$imagick = new Imagick();
$imagick->readImageBlob($fileBytes);
foreach ($imagick as $page) {
$page->setImageFormat('jpg');
$images[] = $page;
}
return $images;
} catch (ImagickException $e) {
throw new MindeeImageException(
"Couldn't convert PDF to images.",
ErrorCode::FILE_OPERATION_ABORTED,
$e
);
}
}
/**
* Gets the number of pages in the file.
* @return integer Page count.
*/
public function getPageCount(): int
{
return count($this->pageImages);
}
/**
* Extracts images from a page.
*
* @param array<Polygon|array<Point>> $polygons List of polygons to extract.
* @param integer $pageIndex The page index to extract, begins at 0.
* @param null|string $filenamePrefix Output filename prefix.
* @param null|string $format Save format for extracted images. Defaults to the original format.
*
* @return array<ExtractedImage> An array of created images
* @throws MindeeImageException Throws if the image can't be processed.
*/
public function extractPolygonsFromPage(
array $polygons,
int $pageIndex,
?string $filenamePrefix = null,
?string $format = null
): array {
$saveFormat = $format ?? $this->saveFormat;
$extractedImages = [];
try {
foreach ($polygons as $i => $polygon) {
$filenamePrefix ??= $this->filename;
$outputFilename = sprintf('%s-%d.%s', $filenamePrefix, $i, $saveFormat);
$extractedImages[] = $this->extractPolygonFromPage(
$polygon,
$pageIndex,
$i,
$outputFilename,
$saveFormat
);
}
} catch (Exception $e) {
throw new MindeeImageException($e->getMessage(), $e->getCode(), $e);
}
return $extractedImages;
}
/**
* Extracts a cropped portion from an image.
*
* @param Polygon $polygon Polygon to extract.
* @param integer $pageIndex Page index to extract from.
* @param integer $index Index to use for naming the extracted image.
* @param null|string $filename Output filename.
* @param null|string $format Output format.
*
* @return ExtractedImage Extracted image data.
* @throws MindeeImageException Throws if the image can't be processed.
*/
public function extractPolygonFromPage(
Polygon $polygon,
int $pageIndex,
int $index,
?string $filename = null,
?string $format = null
): ExtractedImage {
$bbox = BBoxUtils::generateBBoxFromPolygon($polygon);
try {
$extractedImageData = $this->extractImageFromBbox($bbox, $pageIndex);
} catch (ImagickException $e) {
throw new MindeeImageException($e->getMessage(), $e->getCode(), $e);
}
$format ??= $this->saveFormat;
$filename ??= sprintf('%s_page%d-%d.%s', $this->filename, $pageIndex, $index, $format);
return new ExtractedImage($extractedImageData, $filename, $format, $pageIndex, $index);
}
/**
* Getter for the local input source.
*/
public function getInputSource(): LocalInputSource
{
return $this->inputSource;
}
/**
* Extracts an image from a set of coordinates.
*
* @param BBox $bbox BBox coordinates.
* @param integer|float $pageIndex The page index to extract, begins at 0.
* @throws ImagickException Throws if the image can't be processed.
*/
protected function extractImageFromBbox(BBox $bbox, int|float $pageIndex): Imagick
{
$image = $this->pageImages[$pageIndex]->clone();
$width = $image->getImageWidth();
$height = $image->getImageHeight();
$minX = round($bbox->getMinX() * $width);
$maxX = round($bbox->getMaxX() * $width);
$minY = round($bbox->getMinY() * $height);
$maxY = round($bbox->getMaxY() * $height);
$image->cropImage((int) ($maxX - $minX), (int) ($maxY - $minY), (int) $minX, (int) $minY);
return $image;
}
/**
* Splits the filename into name and extension.
*
* @param string $filename Name of the file.
* @return array{0: string, 1: string} An array containing the name and extension of the file.
*/
protected static function splitNameStrict(string $filename): array
{
return [
pathinfo($filename, PATHINFO_FILENAME),
pathinfo($filename, PATHINFO_EXTENSION),
];
}
}