Skip to content

Commit 64cf050

Browse files
src/ tests/: fix various minor test failures.
1 parent 532e350 commit 64cf050

5 files changed

Lines changed: 24 additions & 15 deletions

File tree

src/TableGridExtractorV4.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,18 @@
1919

2020
from __future__ import annotations
2121

22-
import sys
2322
from pathlib import Path
2423
from typing import Optional
25-
import pymupdf
24+
from dataclasses import dataclass
25+
2626
import numpy as np
2727
import onnxruntime as ort
2828

29+
import pymupdf
30+
2931
# ---------------------------------------------------------------------------
3032
# Inline GridPrediction / CellInfo (standalone mode)
3133
# ---------------------------------------------------------------------------
32-
from dataclasses import dataclass, field
3334

3435

3536
@dataclass
@@ -402,7 +403,7 @@ def predict(
402403

403404
h_tuples = []
404405
h_cls_list = (
405-
grid.h_cls.tolist() if grid.h_cls is not None else [1] * len(grid.h_lines)
406+
grid.h_cls.tolist() if grid.h_cls is not None else [1] * len(grid.h_lines) # pylint: disable=no-member
406407
)
407408
for y, c in zip(grid.h_lines, h_cls_list):
408409
y_norm = y / orig_h

src/table.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,10 @@
8181
from dataclasses import dataclass
8282
from operator import itemgetter
8383
import weakref
84+
import pathlib
85+
8486
import pymupdf
8587
from pymupdf import mupdf
86-
import pathlib
8788

8889
# -------------------------------------------------------------------
8990
# Start of PyMuPDF interface code
@@ -94,12 +95,12 @@
9495
# Optionally use the TGIF table grid finder.
9596
# This replace fz_find_table_within_bounds.
9697
USE_TGIF = os.getenv("USE_TGIF", "0")
98+
EXTRACTOR_V4 = None # Keep pylint happy.
9799
if USE_TGIF == "1":
98100
print("Using TGIFVx for table grid extraction.")
99-
import pymupdf.tgif
101+
import pymupdf.tgif # pylint: disable=import-error
100102
elif USE_TGIF == "4":
101103
print("Using TGEV4 for table grid extraction.")
102-
import numpy as np
103104
from pymupdf.TableGridExtractorV4 import TableGridExtractorV4
104105

105106
EXTRACTOR_V4 = TableGridExtractorV4(
@@ -112,7 +113,8 @@
112113
# filter_empty_lines=not args.no_filter_empty,
113114
)
114115
else:
115-
print("Using legacy table grid extraction.")
116+
if os.environ.get('PYMUPDF_LEGACY_TABLE_DIAGNOSTIC') != '0':
117+
print("Using legacy table grid extraction.")
116118

117119
EDGES = [] # vector graphics from PyMuPDF
118120
CHARS = [] # text characters from PyMuPDF
@@ -191,14 +193,14 @@ def get_table_cells_from_rect_tgif1(page, word_rects, rect):
191193
bound = mupdf.FzRect(*rect)
192194

193195
try:
194-
r, xpos, ypos = pymupdf.tgif.fz_visual_table_grid_finder(page, bound)
196+
r, xpos, ypos = pymupdf.tgif.fz_visual_table_grid_finder(page, bound) # pylint: disable=no-member
195197
x_count = int(xpos.m_internal.len)
196198
x_values = [xpos.list(i).pos for i in range(x_count)]
197199
y_count = int(ypos.m_internal.len)
198200
y_values = [ypos.list(i).pos for i in range(y_count)]
199201
if xpos.m_internal.max_uncertainty > 0 or ypos.m_internal.max_uncertainty > 0:
200202
print(f"{page.number=}: grid with uncertainty for {bound=}")
201-
except:
203+
except Exception:
202204
return cells
203205
for i in range(y_count - 1):
204206
for j in range(x_count - 1):
@@ -2793,11 +2795,11 @@ def find_tables(
27932795
if my_boxes:
27942796
word_rects = [pymupdf.Rect(w[:4]) for w in TEXTPAGE.extractWORDS()]
27952797
tp2 = page.get_textpage(flags=TABLE_DETECTOR_FLAGS)
2796-
for rect in my_boxes:
2797-
cells = make_table_from_bbox(
2798-
page, tp2, word_rects, rect
2799-
) # pylint: disable=E0606
2800-
tbf.tables.append(Table(page, cells))
2798+
for rect in my_boxes:
2799+
cells = make_table_from_bbox(
2800+
page, tp2, word_rects, rect
2801+
) # pylint: disable=E0606
2802+
tbf.tables.append(Table(page, cells))
28012803
except Exception as e:
28022804
pymupdf.message("find_tables: exception occurred: %s" % str(e))
28032805
return None

tests/test_4767.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ def test_4767():
1515
print('test_4767(): not running on Pyodide - cannot run child processes.')
1616
return
1717

18+
os.environ['PYMUPDF_LEGACY_TABLE_DIAGNOSTIC'] = '0'
19+
1820
if (1
1921
and platform.system() == 'Windows'
2022
and os.environ.get('GITHUB_ACTIONS') == 'true'

tests/test_general.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1079,6 +1079,8 @@ def test_cli_out():
10791079
print('test_cli_out(): not running on Pyodide - cannot run child processes.')
10801080
return
10811081

1082+
os.environ['PYMUPDF_LEGACY_TABLE_DIAGNOSTIC'] = '0'
1083+
10821084
import platform
10831085
import re
10841086
import subprocess
@@ -1174,6 +1176,7 @@ def test_use_python_logging():
11741176
print('test_cli(): not running on Pyodide - cannot run child processes.')
11751177
return
11761178

1179+
os.environ['PYMUPDF_LEGACY_TABLE_DIAGNOSTIC'] = '0'
11771180
log_prefix = None
11781181
if os.environ.get('PYMUPDF_USE_EXTRA') == '0':
11791182
log_prefix = f'.+Using non-default setting from PYMUPDF_USE_EXTRA: \'0\''

tests/test_pylint.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def test_pylint():
115115
directory = f'{root}/src'
116116
directory = directory.replace('/', os.sep)
117117
leafs = [
118+
'TableGridExtractorV4.py',
118119
'__init__.py',
119120
'__main__.py',
120121
'_apply_pages.py',

0 commit comments

Comments
 (0)