Skip to content

Commit 98659e6

Browse files
committed
types: mypy fix (ignore pdfminer stubs); normalize Path usage; annotate argparse Namespace
1 parent 41f7ab4 commit 98659e6

2 files changed

Lines changed: 8 additions & 4 deletions

File tree

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ python_version = "3.10"
5252
strict = true
5353
mypy_path = ["src"]
5454

55+
[[tool.mypy.overrides]]
56+
module = "pdfminer.*"
57+
ignore_missing_imports = true
58+
5559
[tool.pytest.ini_options]
5660
addopts = "-q"
5761
testpaths = ["tests"]

src/fek_extractor/core.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def extract_pdf_info(
4545
debug_pages: int | None = raw_dp if isinstance(raw_dp, int) and raw_dp > 0 else None
4646

4747
# 1) Extract full text (headers/footers filtered)
48-
full_text: str = extract_pdf_text(pdf_path, debug=debug, debug_pages=debug_pages)
48+
full_text: str = extract_pdf_text(p, debug=debug, debug_pages=debug_pages)
4949

5050
# Precompute normalized text once (used by decision + metrics)
5151
text_norm: str = normalize_text(full_text)
@@ -56,7 +56,7 @@ def extract_pdf_info(
5656
# 2) Build a light "masthead" blob from first couple of pages
5757
masthead_lines: list[str] = []
5858
try:
59-
for i, layout in enumerate(extract_pages(str(pdf_path))):
59+
for i, layout in enumerate(extract_pages(str(p))):
6060
if not isinstance(layout, LTPage):
6161
continue
6262
if i >= 2:
@@ -92,8 +92,8 @@ def extract_pdf_info(
9292
# 5) Compose record
9393
record: dict[str, Any] = {
9494
"filename": p.name,
95-
"path": str(pdf_path),
96-
"pages": count_pages(pdf_path),
95+
"path": str(p),
96+
"pages": count_pages(p),
9797
**header,
9898
"articles": articles_ordered,
9999
}

0 commit comments

Comments
 (0)