Skip to content
This repository was archived by the owner on Mar 6, 2026. It is now read-only.

Commit 383e105

Browse files
holtskinnergalz10
andauthored
feat: Added Export Images functionality (#96)
Co-authored-by: Gal Zahavi <38544478+galz10@users.noreply.github.com>
1 parent c607136 commit 383e105

9 files changed

Lines changed: 224 additions & 4 deletions

File tree

google/cloud/documentai_toolbox/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,5 @@
3737
"image/tiff",
3838
"image/webp",
3939
}
40+
41+
IMAGE_ENTITIES = {"Portrait"}

google/cloud/documentai_toolbox/wrappers/document.py

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,17 @@ def _entities_from_shards(
6060
"""
6161
result = []
6262
for shard in shards:
63-
for entity in shard.entities:
64-
result.append(Entity(documentai_entity=entity))
65-
for prop in entity.properties:
66-
result.append(Entity(documentai_entity=prop))
63+
for documentai_entity in shard.entities:
64+
entity = Entity(documentai_entity=documentai_entity)
65+
entity.crop_image(shard)
66+
result.append(entity)
67+
for documentai_prop in documentai_entity.properties:
68+
prop = Entity(documentai_entity=documentai_prop)
69+
prop.crop_image(shard)
70+
result.append(prop)
71+
72+
if len(result) > 1 and result[0].documentai_entity.id:
73+
result.sort(key=lambda x: int(x.documentai_entity.id))
6774
return result
6875

6976

@@ -504,3 +511,37 @@ def convert_document_to_annotate_file_response(self) -> AnnotateFileResponse:
504511
Proto with TextAnnotations.
505512
"""
506513
return _convert_to_vision_annotate_file_response(self.text, self.pages)
514+
515+
def export_images(
516+
self, output_path: str, output_file_prefix: str, output_file_extension: str
517+
) -> List[str]:
518+
r"""Exports images from `Document` to files.
519+
520+
Args:
521+
output_path (str):
522+
Required. The path to the output directory.
523+
output_file_prefix (str):
524+
Required. The output file name prefix.
525+
output_file_extension (str):
526+
Required. The output file extension.
527+
528+
Format: `png`, `jpg`, etc.
529+
Returns:
530+
List[str]:
531+
A list of output image file names.
532+
Format: `{output_path}/{output_file_prefix}_{index}_{Entity.type_}.{output_file_extension}`
533+
"""
534+
output_filenames: List[str] = []
535+
index = 0
536+
for entity in self.entities:
537+
if not entity.image:
538+
continue
539+
540+
output_filename = (
541+
f"{output_file_prefix}_{index}_{entity.type_}.{output_file_extension}"
542+
)
543+
entity.image.save(os.path.join(output_path, output_filename))
544+
output_filenames.append(output_filename)
545+
index += 1
546+
547+
return output_filenames

google/cloud/documentai_toolbox/wrappers/entity.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@
1717

1818
import dataclasses
1919

20+
from io import BytesIO
21+
2022
from google.cloud import documentai
23+
from google.cloud.documentai_toolbox import constants
24+
from PIL import Image
2125

2226

2327
@dataclasses.dataclass
@@ -38,10 +42,14 @@ class Entity:
3842
type_: str = dataclasses.field(init=False)
3943
mention_text: str = dataclasses.field(init=False, default="")
4044
normalized_text: str = dataclasses.field(init=False, default="")
45+
4146
# Only Populated for Splitter/Classifier Output
4247
start_page: int = dataclasses.field(init=False)
4348
end_page: int = dataclasses.field(init=False)
4449

50+
# Only Populated for Identity Documents
51+
image: Image.Image = dataclasses.field(init=False, default=None)
52+
4553
def __post_init__(self):
4654
self.type_ = self.documentai_entity.type_
4755
self.mention_text = self.documentai_entity.mention_text
@@ -54,3 +62,29 @@ def __post_init__(self):
5462
if self.documentai_entity.page_anchor.page_refs:
5563
self.start_page = int(self.documentai_entity.page_anchor.page_refs[0].page)
5664
self.end_page = int(self.documentai_entity.page_anchor.page_refs[-1].page)
65+
66+
def crop_image(self, documentai_document: documentai.Document):
67+
r"""Return image cropped from page image for detected entity.
68+
69+
Args:
70+
documentai_document (documentai.Document):
71+
Required. The `Document` containing the `Entity`.
72+
Returns:
73+
PIL.Image.Image:
74+
Image from `Document.Entity`. Returns `None` if there is no image.
75+
"""
76+
if self.type_ not in constants.IMAGE_ENTITIES or self.mention_text:
77+
return
78+
79+
page_ref = self.documentai_entity.page_anchor.page_refs[0]
80+
doc_page = documentai_document.pages[page_ref.page]
81+
image_content = doc_page.image.content
82+
83+
doc_image = Image.open(BytesIO(image_content))
84+
w, h = doc_image.size
85+
vertices = [
86+
(int(v.x * w + 0.5), int(v.y * h + 0.5))
87+
for v in page_ref.bounding_poly.normalized_vertices
88+
]
89+
(top, left), (bottom, right) = vertices[0], vertices[2]
90+
self.image = doc_image.crop((top, left, bottom, right))
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
16+
17+
# [START documentai_toolbox_export_images]
18+
19+
from google.cloud.documentai_toolbox import document
20+
21+
# TODO(developer): Uncomment these variables before running the sample.
22+
# Given a local document.proto or sharded document.proto from an identity processor in path
23+
# document_path = "path/to/local/document.json"
24+
# output_path = "resources/output/"
25+
# output_file_prefix = "exported_photo"
26+
# output_file_extension = "png"
27+
28+
29+
def export_images_sample(
30+
document_path: str,
31+
output_path: str,
32+
output_file_prefix: str,
33+
output_file_extension: str,
34+
) -> None:
35+
wrapped_document = document.Document.from_document_path(document_path=document_path)
36+
37+
output_files = wrapped_document.export_images(
38+
output_path=output_path,
39+
output_file_prefix=output_file_prefix,
40+
output_file_extension=output_file_extension,
41+
)
42+
print("Images Successfully Exported")
43+
for output_file in output_files:
44+
print(output_file)
45+
46+
47+
# [END documentai_toolbox_export_images]
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
16+
import os
17+
import shutil
18+
19+
import pytest
20+
from samples.snippets import export_images_sample
21+
22+
document_path = "../../tests/unit/resources/images/dl3-0.json"
23+
output_path = "resources/output/"
24+
output_file_prefix = "exported_photo"
25+
output_file_extension = "png"
26+
27+
28+
def test_export_images_sample(capsys: pytest.CaptureFixture) -> None:
29+
os.makedirs(output_path)
30+
current_directory = os.path.dirname(__file__)
31+
rel_document_path = os.path.relpath(document_path, current_directory)
32+
33+
export_images_sample.export_images_sample(
34+
document_path=rel_document_path,
35+
output_path=output_path,
36+
output_file_prefix=output_file_prefix,
37+
output_file_extension=output_file_extension,
38+
)
39+
40+
out, _ = capsys.readouterr()
41+
42+
assert "Images Successfully Exported" in out
43+
assert "exported_photo_0_Portrait.png" in out
44+
45+
assert os.path.exists(output_path)
46+
shutil.rmtree(output_path)

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
"pikepdf >= 6.2.9, < 8.0.0",
6060
"pikepdf >= 6.2.9, < 7.0.0; python_version<'3.8'",
6161
"immutabledict >= 2.0.0, < 3.0.0dev",
62+
"Pillow >= 9.5.0, < 10.0.0",
6263
),
6364
python_requires=">=3.7",
6465
classifiers=[

tests/unit/resources/images/dl3-0.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

tests/unit/test_document.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# limitations under the License.
1616

1717
import os
18+
import shutil
1819

1920
# try/except added for compatibility with python < 3.8
2021
try:
@@ -75,6 +76,13 @@ def get_bytes_splitter_mock():
7576
yield byte_factory
7677

7778

79+
@pytest.fixture
80+
def get_bytes_images_mock():
81+
with mock.patch.object(document, "_get_bytes") as byte_factory:
82+
byte_factory.return_value = get_bytes("tests/unit/resources/images")
83+
yield byte_factory
84+
85+
7886
def test_get_shards_with_gcs_uri_contains_file_type():
7987
with pytest.raises(ValueError, match="gcs_prefix cannot contain file types"):
8088
document._get_shards(
@@ -379,3 +387,26 @@ def test_convert_document_to_annotate_file_response():
379387
actual = doc.convert_document_to_annotate_file_response()
380388

381389
assert actual != AnnotateFileResponse()
390+
391+
392+
def test_export_images(get_bytes_images_mock):
393+
doc = document.Document.from_gcs(
394+
gcs_bucket_name="test-directory", gcs_prefix="documentai/output/123456789/0"
395+
)
396+
output_path = "resources/output/"
397+
398+
os.makedirs(output_path)
399+
400+
actual = doc.export_images(
401+
output_path=output_path,
402+
output_file_prefix="exported_photo",
403+
output_file_extension="png",
404+
)
405+
get_bytes_images_mock.assert_called_once()
406+
407+
assert os.path.exists(output_path)
408+
shutil.rmtree(output_path)
409+
410+
assert actual == [
411+
"exported_photo_0_Portrait.png",
412+
]

tests/unit/test_entity.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,20 @@
1414
# limitations under the License.
1515
#
1616

17+
import pytest
18+
1719
from google.cloud import documentai
20+
21+
from google.cloud.documentai_toolbox import document
1822
from google.cloud.documentai_toolbox import entity
1923

2024

25+
@pytest.fixture
26+
def docproto():
27+
with open("tests/unit/resources/images/dl3-0.json", "r", encoding="utf-8") as f:
28+
return documentai.Document.from_json(f.read())
29+
30+
2131
def test_Entity():
2232
documentai_entity = documentai.Document.Entity(
2333
type_="some_entity_type", mention_text="some_mention_text"
@@ -58,3 +68,10 @@ def test_Entity_splitter():
5868
assert wrapper_entity.type_ == "invoice_statement"
5969
assert wrapper_entity.start_page == 0
6070
assert wrapper_entity.end_page == 2
71+
72+
73+
def test_crop_image(docproto):
74+
doc = document.Document.from_documentai_document(docproto)
75+
doc.entities[0].crop_image(documentai_document=docproto)
76+
77+
assert doc.entities[0].image

0 commit comments

Comments
 (0)