Skip to content
This repository was archived by the owner on Mar 6, 2026. It is now read-only.

Commit 27196bb

Browse files
galz10gcf-owl-bot[bot]holtskinner
authored
feat: added text_annotation to vision conversion (#114)
* feat: added text_annotation to vision conversion * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * added resource for vision convert test --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: Holt Skinner <13262395+holtskinner@users.noreply.github.com>
1 parent 1e22c9a commit 27196bb

4 files changed

Lines changed: 10127 additions & 5 deletions

File tree

google/cloud/documentai_toolbox/converters/vision_helpers.py

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,16 @@
2121
import immutabledict
2222

2323
from google.cloud.documentai import Document
24-
from google.cloud.vision import TextAnnotation, Symbol, Word, Paragraph, Block, Page
24+
from google.cloud.vision_v1.types import geometry
25+
from google.cloud.vision import (
26+
EntityAnnotation,
27+
TextAnnotation,
28+
Symbol,
29+
Word,
30+
Paragraph,
31+
Block,
32+
Page,
33+
)
2534
from google.cloud import vision
2635

2736

@@ -227,6 +236,54 @@ def _convert_document_token(
227236
return vision_words
228237

229238

239+
def _generate_entity_annotations(
240+
page_info: PageInfo,
241+
) -> List[EntityAnnotation]:
242+
"""Generate a list of EntityAnnotations from Document.
243+
244+
Args:
245+
page_info: Current page information, including document page to be converted
246+
, its text, and the position of reading cursor.
247+
248+
Returns:
249+
A list of EntityAnnotations with descriptions and bounding box populated. A
250+
EntityAnnotation has a word level information.
251+
"""
252+
entity_annotations: List[EntityAnnotation] = []
253+
for token in page_info.page.tokens:
254+
v: vision.Vertex = []
255+
bounding_box = geometry.BoundingPoly()
256+
if token.layout.bounding_poly.vertices:
257+
for vertex in token.layout.bounding_poly.vertices:
258+
v.append({"x": int(vertex.x), "y": int(vertex.y)})
259+
else:
260+
for normalized_vertex in token.layout.bounding_poly.normalized_vertices:
261+
v.append(
262+
{
263+
"x": int(normalized_vertex.x * page_info.page.dimension.width),
264+
"y": int(normalized_vertex.y * page_info.page.dimension.height),
265+
}
266+
)
267+
bounding_box = geometry.BoundingPoly(vertices=v)
268+
269+
text_start_index = token.layout.text_anchor.text_segments[0].start_index
270+
text_end_index = token.layout.text_anchor.text_segments[0].end_index
271+
# The word in docai response contains the break text. Remove the break text.
272+
if (
273+
token.detected_break
274+
!= Document.Page.Token.DetectedBreak.Type.TYPE_UNSPECIFIED
275+
):
276+
text_end_index -= 1
277+
278+
entity_annotations.append(
279+
EntityAnnotation(
280+
description=page_info.text[text_start_index:text_end_index],
281+
bounding_poly=bounding_box,
282+
)
283+
)
284+
return entity_annotations
285+
286+
230287
def _convert_document_paragraph(
231288
page_info: PageInfo,
232289
) -> List[Paragraph]:

google/cloud/documentai_toolbox/wrappers/document.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040

4141
from google.cloud.documentai_toolbox.converters.vision_helpers import (
4242
_convert_document_page,
43+
_generate_entity_annotations,
4344
_get_text_anchor_substring,
4445
PageInfo,
4546
)
@@ -169,13 +170,17 @@ def _convert_to_vision_annotate_file_response(text: str, pages: List[page.Page])
169170
page_idx = 0
170171
while page_idx < len(pages):
171172
page_info = PageInfo(pages[page_idx].documentai_page, text)
172-
page_vision_annotation = _convert_document_page(page_info)
173-
page_vision_annotation.text = _get_text_anchor_substring(
173+
174+
full_text_annotation = _convert_document_page(page_info)
175+
full_text_annotation.text = _get_text_anchor_substring(
174176
text, pages[page_idx].documentai_page.layout.text_anchor
175177
)
178+
text_annotations = _generate_entity_annotations(page_info)
179+
176180
responses.append(
177181
AnnotateImageResponse(
178-
full_text_annotation=page_vision_annotation,
182+
full_text_annotation=full_text_annotation,
183+
text_annotations=text_annotations,
179184
context=ImageAnnotationContext(page_number=page_idx + 1),
180185
)
181186
)

0 commit comments

Comments
 (0)