-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathOCR.php
More file actions
53 lines (48 loc) · 1.33 KB
/
Copy pathOCR.php
File metadata and controls
53 lines (48 loc) · 1.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
<?php
namespace Mindee\V1\Parsing\Common\OCR;
/**
* OCR extraction from the entire document.
*/
class OCR
{
/**
* @var MVisionV1 Mindee Vision v1 results.
*/
public MVisionV1 $mvisionV1;
/**
* @param array $rawPrediction Raw prediction array.
*/
public function __construct(array $rawPrediction)
{
$this->mvisionV1 = new MVisionV1($rawPrediction['mvision-v1']);
}
/**
* @return string
*/
public function __toString(): string
{
return strval($this->mvisionV1);
}
/**
* Finds all lines matching the given regex in the OCR data, indexed by their page.
*
* @param string $regex The regular expression to match against.
* @return array All lines that match the regex, indexed by their page.
*/
public function findLineByRegex(string $regex): array
{
$matches = [];
for ($i = 0; $i < count($this->mvisionV1->pages); $i++) {
$page = $this->mvisionV1->pages[$i];
foreach ($page->getAllLines() as $line) {
if (preg_match($regex, strval($line))) {
if (!array_key_exists($i, $matches)) {
$matches[$i] = [];
}
$matches[$i][] = $line;
}
}
}
return $matches;
}
}