Skip to content

Commit afab053

Browse files
✨ add support for data schema parameter (#165)
1 parent 5c24473 commit afab053

11 files changed

Lines changed: 391 additions & 17 deletions

src/Http/MindeeApiV2.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,9 @@ private function documentEnqueuePost(
350350
if (isset($params->textContext)) {
351351
$postFields['text_context'] = $params->textContext;
352352
}
353+
if (isset($params->dataSchema)) {
354+
$postFields['data_schema'] = strval($params->dataSchema);
355+
}
353356

354357
$url = $this->baseUrl . '/inferences/enqueue';
355358
curl_setopt($ch, CURLOPT_URL, $url);

src/Input/DataSchema.php

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
<?php
2+
3+
namespace Mindee\Input;
4+
5+
use InvalidArgumentException;
6+
7+
/**
8+
* Modify the Data Schema.
9+
*/
10+
class DataSchema
11+
{
12+
/**
13+
* @var DataSchemaReplace|null If set, completely replaces the data schema of the model.
14+
*/
15+
public ?DataSchemaReplace $replace;
16+
17+
/**
18+
* @param array|string|DataSchema $dataSchema Raw server response array.
19+
* @throws InvalidArgumentException Throws if the data schema is invalid.
20+
*/
21+
public function __construct(DataSchema|array|string $dataSchema)
22+
{
23+
if (gettype($dataSchema) == 'string') {
24+
$jsonData = json_decode($dataSchema, true);
25+
} elseif (gettype($dataSchema) == 'array') {
26+
$jsonData = $dataSchema;
27+
} else {
28+
if (get_class($dataSchema) == DataSchema::class) {
29+
$this->replace = $dataSchema->replace;
30+
return;
31+
}
32+
throw new InvalidArgumentException('Unrecognized data schema format.');
33+
}
34+
$this->replace = new DataSchemaReplace($jsonData['replace']);
35+
}
36+
37+
/**
38+
* @return array JSON representation.
39+
*/
40+
public function toJson(): array
41+
{
42+
return ['replace' => $this->replace->toJson()];
43+
}
44+
45+
/**
46+
* Doubles the number of spaces in front of each line if it has at least two.
47+
* @param string $line Line to fix.
48+
* @return string Fixed line.
49+
*/
50+
private static function fixLineSpaces(string $line): string
51+
{
52+
if (!str_starts_with($line, " ")) {
53+
return $line;
54+
}
55+
$i = 0;
56+
foreach (str_split($line) as $char) {
57+
if ($char == ' ') {
58+
$i++;
59+
continue;
60+
}
61+
break;
62+
}
63+
return substr($line, $i / 2);
64+
}
65+
66+
/**
67+
* Ensures proper spacing in JSON string.
68+
* @return string Properly spaced JSON string.
69+
*/
70+
private function toJsonStringProperSpacing(): string
71+
{
72+
$jsonStr = json_encode($this->toJson(), JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
73+
$lines = explode("\n", $jsonStr);
74+
return implode("\n", array_map('self::fixLineSpaces', $lines)) . "\n";
75+
}
76+
77+
/**
78+
* @return string String representation.
79+
*/
80+
public function __toString(): string
81+
{
82+
return $this->toJsonStringProperSpacing();
83+
}
84+
}

src/Input/DataSchemaField.php

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
<?php
2+
3+
namespace Mindee\Input;
4+
5+
/**
6+
* Data Schema Field.
7+
*/
8+
class DataSchemaField
9+
{
10+
/**
11+
* @var string Name of the field in the data schema.
12+
*/
13+
public string $name;
14+
/**
15+
* @var string Display name for the field. Also impacts inference results.
16+
*/
17+
public string $title;
18+
/**
19+
* @var boolean Whether this field can contain multiple values.
20+
*/
21+
public bool $isArray;
22+
/**
23+
* @var string Data type of the field.
24+
*/
25+
public string $type;
26+
/**
27+
* @var string|null Detailed description of what this field represents.
28+
*/
29+
public ?string $description;
30+
/**
31+
* @var string|null Optional extraction guidelines.
32+
*/
33+
public ?string $guidelines;
34+
/**
35+
* @var boolean|null Whether to remove duplicate values in the array.
36+
*/
37+
public ?bool $uniqueValues;
38+
/**
39+
* @var array|null Subfields when type is `nested_object`. Leave empty for other types.
40+
*/
41+
public ?array $nestedFields;
42+
/**
43+
* @var array|null Allowed values when type is `classification`. Leave empty for other types.
44+
*/
45+
public ?array $classificationValues;
46+
47+
/**
48+
* @param array $serverResponse Raw server response array.
49+
*/
50+
public function __construct(array $serverResponse)
51+
{
52+
$this->name = $serverResponse['name'];
53+
$this->title = $serverResponse['title'];
54+
$this->isArray = $serverResponse['is_array'];
55+
$this->type = $serverResponse['type'];
56+
$this->description = $serverResponse['description'];
57+
$this->guidelines = $serverResponse['guidelines'];
58+
if (isset($serverResponse['unique_values'])) {
59+
$this->uniqueValues = $serverResponse['unique_values'];
60+
}
61+
if (isset($serverResponse['nested_fields'])) {
62+
$this->nestedFields = $serverResponse['nested_fields'];
63+
}
64+
if (isset($serverResponse['classification_values'])) {
65+
$this->classificationValues = $serverResponse['classification_values'];
66+
}
67+
}
68+
69+
/**
70+
* @return array JSON representation.
71+
*/
72+
public function toJson(): array
73+
{
74+
$out = [
75+
'name' => $this->name,
76+
'title' => $this->title,
77+
'is_array' => $this->isArray,
78+
'type' => $this->type,
79+
];
80+
if (isset($this->description)) {
81+
$out['description'] = $this->description;
82+
}
83+
if (isset($this->guidelines)) {
84+
$out['guidelines'] = $this->guidelines;
85+
}
86+
if (isset($this->uniqueValues)) {
87+
$out['unique_values'] = $this->uniqueValues;
88+
}
89+
if (isset($this->nestedFields)) {
90+
$out['nested_fields'] = $this->nestedFields;
91+
}
92+
if (isset($this->classificationValues)) {
93+
$out['classification_values'] = $this->classificationValues;
94+
}
95+
return $out;
96+
}
97+
98+
/**
99+
* @return string String representation.
100+
*/
101+
public function __toString(): string
102+
{
103+
104+
return json_encode(
105+
$this->toJson(),
106+
JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES
107+
);
108+
}
109+
}

src/Input/DataSchemaReplace.php

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
<?php
2+
3+
namespace Mindee\Input;
4+
5+
use InvalidArgumentException;
6+
7+
/**
8+
* The structure to completely replace the data schema of the model.
9+
*/
10+
class DataSchemaReplace
11+
{
12+
/**
13+
* @var DataSchemaField[] Fields to replace in the data schema.
14+
*/
15+
public array $fields;
16+
17+
/**
18+
* @param array $serverResponse Raw server response array.
19+
* @throws InvalidArgumentException Throws if the fields array is empty or the Data schema is incorrect.
20+
*/
21+
public function __construct(array $serverResponse)
22+
{
23+
if (
24+
!isset($serverResponse['fields']) ||
25+
!is_array($serverResponse['fields']) ||
26+
count($serverResponse['fields']) == 0
27+
) {
28+
throw new InvalidArgumentException('Data Schema replacement fields cannot be empty.');
29+
}
30+
$this->fields = array_map(fn ($field) => new DataSchemaField($field), $serverResponse['fields']);
31+
}
32+
33+
/**
34+
* @return array JSON representation.
35+
*/
36+
public function toJson(): array
37+
{
38+
return [ 'fields' => array_map(fn ($field) => $field->toJson(), $this->fields)];
39+
}
40+
41+
/**
42+
* @return string String representation.
43+
*/
44+
public function __toString(): string
45+
{
46+
return json_encode(
47+
$this->toJson(),
48+
JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES
49+
);
50+
}
51+
}

src/Input/InferenceParameters.php

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,21 +49,31 @@ class InferenceParameters
4949
*/
5050
public ?string $textContext;
5151

52+
/**
53+
* @var DataSchema|null Data schema for inference.
54+
*/
55+
public ?DataSchema $dataSchema;
56+
5257
/**
5358
* @var PollingOptions Polling options.
5459
*/
5560
public PollingOptions $pollingOptions;
5661

5762
/**
58-
* @param string $modelId ID of the model.
59-
* @param boolean|null $rag Whether to enable Retrieval-Augmented Generation.
60-
* @param boolean|null $rawText Whether to extract the full text content from the document as strings.
61-
* @param boolean|null $polygon Whether to calculate bounding box polygons for all fields.
62-
* @param boolean|null $confidence Whether to calculate confidence scores for all fields.
63-
* @param string|null $alias Optional file alias.
64-
* @param array<string>|null $webhooksIds List of webhook IDs.
65-
* @param string|null $textContext Additional text context used by the model during inference.
66-
* @param PollingOptions|null $pollingOptions Polling options.
63+
* @param string $modelId ID of the model.
64+
* @param boolean|null $rag Whether to enable Retrieval-Augmented Generation.
65+
* @param boolean|null $rawText Whether to extract the full text content from the
66+
* document as strings.
67+
* @param boolean|null $polygon Whether to calculate bounding box polygons for all
68+
* fields.
69+
* @param boolean|null $confidence Whether to calculate confidence scores for all fields.
70+
* @param string|null $alias Optional file alias.
71+
* @param array<string>|null $webhooksIds List of webhook IDs.
72+
* @param string|null $textContext Additional text context used by the model during
73+
* inference.
74+
* @param DataSchema|string|array|null $dataSchema Additional text context used by the model during
75+
* inference.
76+
* @param PollingOptions|null $pollingOptions Polling options.
6777
*/
6878
public function __construct(
6979
string $modelId,
@@ -74,6 +84,7 @@ public function __construct(
7484
?string $alias = null,
7585
?array $webhooksIds = null,
7686
?string $textContext = null,
87+
DataSchema|string|array|null $dataSchema = null,
7788
?PollingOptions $pollingOptions = null,
7889
) {
7990
$this->modelId = $modelId;
@@ -97,5 +108,8 @@ public function __construct(
97108
} else {
98109
$this->webhooksIds = [];
99110
}
111+
if (isset($dataSchema)) {
112+
$this->dataSchema = new DataSchema($dataSchema);
113+
}
100114
}
101115
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
<?php
2+
3+
namespace Mindee\Parsing\V2;
4+
5+
/**
6+
* Data schema options activated during the inference.
7+
*/
8+
class DataSchemaActiveOption
9+
{
10+
/**
11+
* @var boolean Whether the Data Schema has been replaced.
12+
*/
13+
public bool $replace;
14+
15+
/**
16+
* @param array $serverResponse Raw server response array.
17+
*/
18+
public function __construct(array $serverResponse)
19+
{
20+
$this->replace = $serverResponse['replace'];
21+
}
22+
23+
/**
24+
* @return string String representation.
25+
*/
26+
public function __toString(): string
27+
{
28+
return "Data Schema\n-----------\n:Replace: " . ($this->replace ? 'True' : 'False');
29+
}
30+
}

src/Parsing/V2/InferenceActiveOptions.php

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ class InferenceActiveOptions
4141
*/
4242
public bool $textContext;
4343

44+
/**
45+
* @var DataSchemaActiveOption Data schema options provided for the inference.
46+
*/
47+
public DataSchemaActiveOption $dataSchema;
48+
4449
/**
4550
* @param array $serverResponse Raw server response array.
4651
*/
@@ -51,6 +56,7 @@ public function __construct(array $serverResponse)
5156
$this->polygon = $serverResponse['polygon'];
5257
$this->confidence = $serverResponse['confidence'];
5358
$this->textContext = $serverResponse['text_context'];
59+
$this->dataSchema = new DataSchemaActiveOption($serverResponse['data_schema']);
5460
}
5561

5662
/**
@@ -62,6 +68,8 @@ public function __toString(): string
6268
. ':Raw Text: ' . SummaryHelper::formatForDisplay($this->rawText) . "\n"
6369
. ':Polygon: ' . SummaryHelper::formatForDisplay($this->polygon) . "\n"
6470
. ':Confidence: ' . SummaryHelper::formatForDisplay($this->confidence) . "\n"
65-
. ':RAG: ' . SummaryHelper::formatForDisplay($this->rag) . "\n";
71+
. ':RAG: ' . SummaryHelper::formatForDisplay($this->rag) . "\n"
72+
. ':Text Context: ' . SummaryHelper::formatForDisplay($this->textContext) . "\n\n"
73+
. $this->dataSchema . "\n";
6674
}
6775
}

0 commit comments

Comments
 (0)