Skip to content
This repository was archived by the owner on Mar 6, 2026. It is now read-only.

Commit f6dd89a

Browse files
authored
feat: added docproto to AnnotateFile convertor (#63)
* feat: added docproto to AnnotateFile convertor * fixed documentai_toolbox page import * added document_to_vision sample * fixed sample print statement * fixed sample * fixed failing test
1 parent fc155a9 commit f6dd89a

9 files changed

Lines changed: 871 additions & 0 deletions

File tree

google/cloud/documentai_toolbox/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,13 @@
2424
entity,
2525
)
2626

27+
from .converters import (
28+
converters,
29+
)
30+
2731
__all__ = (
2832
document,
2933
page,
3034
entity,
35+
converters,
3136
)
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# -*- coding: utf-8 -*-
2+
# Copyright 2023 Google LLC
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
"""Document.proto converters."""
17+
18+
from typing import List
19+
from google.cloud.vision import AnnotateFileResponse, ImageAnnotationContext
20+
from google.cloud.vision import AnnotateImageResponse
21+
22+
from google.cloud.documentai_toolbox.wrappers import page
23+
24+
from google.cloud.documentai_toolbox.converters.vision_helpers import (
25+
_convert_document_page,
26+
_get_text_anchor_substring,
27+
PageInfo,
28+
)
29+
30+
31+
def _convert_to_vision_annotate_file_response(text: str, pages: List[page.Page]):
32+
"""Convert OCR data from Document proto to AnnotateFileResponse proto (Vision API).
33+
34+
Args:
35+
text (str):
36+
Contents of document.
37+
List[Page]:
38+
A list of Pages.
39+
40+
Returns:
41+
AnnotateFileResponse proto with a TextAnnotation per page.
42+
"""
43+
responses = []
44+
vision_file_response = AnnotateFileResponse()
45+
page_idx = 0
46+
while page_idx < len(pages):
47+
page_info = PageInfo(pages[page_idx].documentai_page, text)
48+
page_vision_annotation = _convert_document_page(page_info)
49+
page_vision_annotation.text = _get_text_anchor_substring(
50+
text, pages[page_idx].documentai_page.layout.text_anchor
51+
)
52+
responses.append(
53+
AnnotateImageResponse(
54+
full_text_annotation=page_vision_annotation,
55+
context=ImageAnnotationContext(page_number=page_idx + 1),
56+
)
57+
)
58+
page_idx += 1
59+
60+
vision_file_response.responses = responses
61+
62+
return vision_file_response

0 commit comments

Comments
 (0)