-
Notifications
You must be signed in to change notification settings - Fork 539
Expand file tree
/
Copy pathpyproject.toml
More file actions
221 lines (197 loc) · 6.7 KB
/
pyproject.toml
File metadata and controls
221 lines (197 loc) · 6.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
[project]
name = "camelot-py"
version = "2.0.0rc1"
description = "PDF Table Extraction for Humans."
authors = [
{name = "Vinayak Mehta", email = "vmehta94@gmail.com"},
]
license = {text = "MIT"}
readme = "README.md"
classifiers = [
"Development Status :: 5 - Production/Stable",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
]
requires-python = ">=3.10"
dependencies = [
"click>=8.0.1",
"numpy>=1.26.1",
"openpyxl>=3.1.0",
"pandas>=2.2.2",
"tabulate>=0.9.0",
"typing-extensions>=4.12.2; python_version < '3.11'",
"opencv-python-headless>=4.7.0.68",
"pypdfium2>=4",
"pillow>=10.4.0",
"playa-pdf>=0.8.1",
]
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"
[tool.setuptools.packages.find]
where = ["."]
include = ["camelot*"]
exclude = ["activate"]
[project.urls]
Homepage = "https://github.com/camelot-dev/camelot"
Repository = "https://github.com/camelot-dev/camelot"
Documentation = "https://camelot-py.readthedocs.io/"
Changelog = "https://github.com/camelot-dev/camelot/releases"
[project.optional-dependencies]
plot = [
"matplotlib>=3.7.5; python_version < '3.12'",
"matplotlib>=3.8.0; python_version >= '3.12'",
]
# Optional neural backend for flavor='ml' (Table Transformer / TATR via
# HuggingFace transformers). Heavy (pulls torch); opt in explicitly with
# `pip install 'camelot-py[ml]'`. Imported lazily so the core install and
# the other flavors never load torch.
ml = [
"torch>=2.0",
# <5: transformers v5 strict-validates configs and rejects the TATR
# checkpoints (published for v4, e.g. `dilation: None`). The whole TATR
# ecosystem pins to v4; revisit when the checkpoints are v5-clean.
"transformers>=4.40,<5",
"timm>=0.9",
"pillow>=10.4.0",
]
# Optional OCR text source for flavor='ml' on scanned / image-only PDFs
# (structure still from TATR; text from OCR instead of the PDF text layer).
# Pip-only, no system tesseract binary. `pip install 'camelot-py[ocr]'`.
ocr = [
"rapidocr-onnxruntime>=1.3",
]
ghostscript = [
"ghostscript>=0.7",
]
dev = [
"Pygments>=2.10.0",
"coverage[toml]>=6.2",
"ruff>=0.12.10",
"sphinx-book-theme>=1.0.1",
"mypy>=0.930",
"pre-commit>=2.16.0",
"pre-commit-hooks>=4.1.0",
"pytest>=6.2.5",
"pytest-mpl>=0.17.0",
"safety>=2.2.3",
"sphinx>=4.3.2",
"sphinx-autobuild>=2021.3.14",
"sphinx-click>=3.0.2",
"sphinx-copybutton>=0.5.0",
"sphinx-prompt>=1.5.0",
"typeguard>=2.13.3",
"xdoctest[colors]>=0.15.10",
"myst-parser>=2.0.0",
"nox>=2024.10.9",
"twine",
"build",
]
[project.scripts]
camelot = "camelot.__main__:main"
# Alias matching the PyPI package name so `uvx camelot-py …` works without
# the `--from camelot-py` prefix. See #639.
camelot-py = "camelot.__main__:main"
[tool.coverage.paths]
source = ["camelot", "*\\camelot", "*/site-packages"]
tests = ["tests", "*/tests"]
[tool.coverage.run]
branch = true
source = ["camelot", "tests"]
[tool.coverage.report]
show_missing = true
fail_under = 90
[tool.ruff]
# Ruff replaces Black (formatter) and isort (import sorting). Match
# `requires-python = ">=3.10"` so the formatter targets the floor version
# (also keeps formatting stable across the 3.10–3.15 CI matrix).
target-version = "py310"
line-length = 88 # Black's default, which the codebase was formatted to.
# Notebooks were never linted/formatted by the old black/flake8 (types: [python]);
# keep that behaviour. Vendored Flask theme is third-party.
extend-exclude = ["*.ipynb", "docs/_themes/*"]
# pre-commit passes file paths explicitly; force-exclude makes Ruff honour
# the excludes above even then (otherwise excluded files still get linted).
force-exclude = true
[tool.ruff.lint]
# Ruff replaces flake8 + its plugins + isort + pyupgrade. Mapping of the
# old `.flake8` select (B,B9,C,D,DAR,E,F,N,RST,S,W):
# B -> flake8-bugbear S -> flake8-bandit
# C -> mccabe (C90) W/E -> pycodestyle
# D -> pydocstyle F -> pyflakes
# N -> pep8-naming I -> isort (was a separate tool)
# UP -> pyupgrade (was a separate hook)
# Not ported: RST (flake8-rst-docstrings) and DAR (darglint) have no Ruff
# equivalent. DAR was inert (darglint wasn't installed); RST docstring
# linting is dropped — the one capability lost in the move.
select = ["B", "C90", "D", "E", "F", "I", "N", "S", "UP", "W"]
ignore = [
"D401", # imperative-mood first line (was ignored under flake8)
"E203", # whitespace before ':' — Ruff-format/Black compatibility
"E501", # line too long — the formatter owns wrapping
]
[tool.ruff.lint.pydocstyle]
convention = "numpy"
[tool.ruff.lint.mccabe]
max-complexity = 10
[tool.ruff.lint.isort]
# Mirror the previous `isort` config (profile = "black", one import per line).
force-single-line = true
[tool.ruff.lint.per-file-ignores]
"tests/*" = [
"D100",
"D101",
"D102",
"D103",
"D104",
"D200",
"D401",
"S101",
"S106",
"F403",
"F405",
"F841",
]
"__init__.py" = ["D100", "D103", "D104", "F401"]
"__version__.py" = ["D100"]
"__main__.py" = ["D100", "D103"]
# Benchmark scripts are dev-only (not shipped); docstrings optional and some
# deliberately copy legacy complex code for apples-to-apples timing.
"bench/*" = ["C901", "D100", "D101", "D102", "D103", "S101"]
[tool.mypy]
warn_unreachable = true
pretty = true
show_column_numbers = true
show_error_codes = true
show_error_context = true
ignore_missing_imports = true
# strict = true
# Strict is too agressive for now and not passing.
# should uncomment the settings below one by one and make them pass.
# Then strict can be enabled. (Values taken from mypy docs. section existing_code)
# Start off with these
warn_unused_configs = true
warn_redundant_casts = true
# warn_unused_ignores = true
# Getting these passing should be easy
strict_equality = true
# extra_checks = true
# Strongly recommend enabling this one as soon as you can
# check_untyped_defs = true
# These shouldn't be too much additional work, but may be tricky to
# get passing if you use a lot of untyped libraries
disallow_subclassing_any = true
disallow_untyped_decorators = true
disallow_any_generics = true
# These next few are various gradations of forcing use of type annotations
# disallow_untyped_calls = true
# disallow_incomplete_defs = true
# disallow_untyped_defs = true
# This one isn't too hard to get passing, but return on investment is lower
# no_implicit_reexport = true
# This one can be tricky to get passing if you use a lot of untyped libraries
# warn_return_any = true