Skip to content
This repository was archived by the owner on Mar 6, 2026. It is now read-only.

Commit 1ac6f5e

Browse files
authored
fix: Add handling for documents missing all layout elements. (#161)
* fix: Add handling for documents missing all layout elements. - Bounding boxes will show up as 0,0,0,0 - Fixes #160 * docs: Add information about return value of `get_bounding_box()` * fix: Addressed coment and moved if statement
1 parent a702231 commit 1ac6f5e

3 files changed

Lines changed: 16 additions & 0 deletions

File tree

google/cloud/documentai_toolbox/utilities/docai_utilities.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,19 @@ def get_bounding_box(
3535
Returns:
3636
Tuple[int, int, int, int]:
3737
Bounding box coordinates in order (top, left, bottom, right).
38+
Returns `0, 0, 0, 0` if `bounding_poly.normalized_vertices` is empty.
3839
"""
40+
if not bounding_poly.normalized_vertices:
41+
return 0, 0, 0, 0
42+
3943
vertices = [
4044
(
4145
int(vertex.x * page_dimension.width + 0.5),
4246
int(vertex.y * page_dimension.height + 0.5),
4347
)
4448
for vertex in bounding_poly.normalized_vertices
4549
]
50+
4651
top, left = vertices[0]
4752
bottom, right = vertices[2]
4853
return top, left, bottom, right

tests/unit/resources/blank_document.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

tests/unit/test_document.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,16 @@ def test_export_hocr_str():
661661
assert actual_hocr == expected
662662

663663

664+
def test_export_hocr_str_with_blank_document():
665+
wrapped_document = document.Document.from_document_path(
666+
document_path="tests/unit/resources/blank_document.json"
667+
)
668+
669+
actual_hocr = wrapped_document.export_hocr_str(title="hocr_blank")
670+
671+
assert actual_hocr
672+
673+
664674
def test_document_to_merged_documentai_document(get_bytes_multiple_files_mock):
665675
wrapped_document = document.Document.from_gcs(
666676
gcs_bucket_name="test-directory", gcs_prefix="documentai/output/123456789/1/"

0 commit comments

Comments
 (0)