1717
1818import dataclasses
1919
20+ from io import BytesIO
21+
2022from google .cloud import documentai
23+ from google .cloud .documentai_toolbox import constants
24+ from PIL import Image
2125
2226
2327@dataclasses .dataclass
@@ -38,10 +42,14 @@ class Entity:
3842 type_ : str = dataclasses .field (init = False )
3943 mention_text : str = dataclasses .field (init = False , default = "" )
4044 normalized_text : str = dataclasses .field (init = False , default = "" )
45+
4146 # Only Populated for Splitter/Classifier Output
4247 start_page : int = dataclasses .field (init = False )
4348 end_page : int = dataclasses .field (init = False )
4449
50+ # Only Populated for Identity Documents
51+ image : Image .Image = dataclasses .field (init = False , default = None )
52+
4553 def __post_init__ (self ):
4654 self .type_ = self .documentai_entity .type_
4755 self .mention_text = self .documentai_entity .mention_text
@@ -54,3 +62,29 @@ def __post_init__(self):
5462 if self .documentai_entity .page_anchor .page_refs :
5563 self .start_page = int (self .documentai_entity .page_anchor .page_refs [0 ].page )
5664 self .end_page = int (self .documentai_entity .page_anchor .page_refs [- 1 ].page )
65+
66+ def crop_image (self , documentai_document : documentai .Document ):
67+ r"""Return image cropped from page image for detected entity.
68+
69+ Args:
70+ documentai_document (documentai.Document):
71+ Required. The `Document` containing the `Entity`.
72+ Returns:
73+ PIL.Image.Image:
74+ Image from `Document.Entity`. Returns `None` if there is no image.
75+ """
76+ if self .type_ not in constants .IMAGE_ENTITIES or self .mention_text :
77+ return
78+
79+ page_ref = self .documentai_entity .page_anchor .page_refs [0 ]
80+ doc_page = documentai_document .pages [page_ref .page ]
81+ image_content = doc_page .image .content
82+
83+ doc_image = Image .open (BytesIO (image_content ))
84+ w , h = doc_image .size
85+ vertices = [
86+ (int (v .x * w + 0.5 ), int (v .y * h + 0.5 ))
87+ for v in page_ref .bounding_poly .normalized_vertices
88+ ]
89+ (top , left ), (bottom , right ) = vertices [0 ], vertices [2 ]
90+ self .image = doc_image .crop ((top , left , bottom , right ))
0 commit comments