Skip to content

Commit 58ee1f7

Browse files
committed
chore!: paddleocr - drop Python 3.9 and use X|Y typing
1 parent 3628695 commit 58ee1f7

2 files changed

Lines changed: 32 additions & 38 deletions

File tree

integrations/paddleocr/pyproject.toml

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ name = "paddleocr-haystack"
77
dynamic = ["version"]
88
description = 'An integration of PaddleOCR with Haystack'
99
readme = "README.md"
10-
requires-python = ">=3.9"
10+
requires-python = ">=3.10"
1111
license = "Apache-2.0"
1212
keywords = []
1313
authors = [
@@ -16,15 +16,14 @@ authors = [
1616
classifiers = [
1717
"Development Status :: 4 - Beta",
1818
"Programming Language :: Python",
19-
"Programming Language :: Python :: 3.9",
2019
"Programming Language :: Python :: 3.10",
2120
"Programming Language :: Python :: 3.11",
2221
"Programming Language :: Python :: 3.12",
2322
"Programming Language :: Python :: Implementation :: CPython",
2423
"Programming Language :: Python :: Implementation :: PyPy",
2524
]
2625
dependencies = [
27-
"haystack-ai>=2.19.0",
26+
"haystack-ai>=2.22.0",
2827
"paddleocr>=3.3.2",
2928
"paddlex[serving]>=3.3.10",
3029
"requests>=2.25.0",
@@ -80,7 +79,6 @@ check_untyped_defs = true
8079
disallow_incomplete_defs = true
8180

8281
[tool.ruff]
83-
target-version = "py39"
8482
line-length = 120
8583

8684
[tool.ruff.lint]
@@ -127,10 +125,6 @@ ignore = [
127125
"B008",
128126
"S101",
129127
]
130-
unfixable = [
131-
# Don't touch unused imports
132-
"F401",
133-
]
134128

135129
[tool.ruff.lint.isort]
136130
known-first-party = ["haystack_integrations"]

integrations/paddleocr/src/haystack_integrations/components/converters/paddleocr/paddleocr_vl_document_converter.py

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# SPDX-License-Identifier: Apache-2.0
44
import base64
55
from pathlib import Path
6-
from typing import Any, Literal, Optional, Union
6+
from typing import Any, Literal
77

88
import requests
99
from haystack import Document, component, default_from_dict, default_to_dict, logging
@@ -24,7 +24,7 @@
2424
logger = logging.getLogger(__name__)
2525

2626

27-
FileTypeInput = Union[Literal["pdf", "image"], None]
27+
FileTypeInput = Literal["pdf", "image"] | None
2828

2929
# Supported image file extensions
3030
_IMAGE_EXTENSIONS = {
@@ -41,9 +41,9 @@
4141

4242

4343
def _infer_file_type_from_source(
44-
source: Union[str, Path, ByteStream],
45-
mime_type: Optional[str] = None,
46-
) -> Optional[FileType]:
44+
source: str | Path | ByteStream,
45+
mime_type: str | None = None,
46+
) -> FileType | None:
4747
"""
4848
Infer file type from file extension or MIME type.
4949
@@ -56,7 +56,7 @@ def _infer_file_type_from_source(
5656
determined.
5757
"""
5858
# Try to get extension from file path
59-
file_path: Optional[str] = None
59+
file_path: str | None = None
6060

6161
# Check if source is a file path
6262
if isinstance(source, (str, Path)):
@@ -86,7 +86,7 @@ def _infer_file_type_from_source(
8686
return None
8787

8888

89-
def _normalize_file_type(file_type: Optional[FileTypeInput]) -> Optional[FileType]:
89+
def _normalize_file_type(file_type: FileTypeInput | None) -> FileType | None:
9090
"""
9191
Normalize file type input to the numeric format expected by the API.
9292
@@ -145,26 +145,26 @@ def __init__(
145145
*,
146146
api_url: str,
147147
access_token: Secret = Secret.from_env_var("AISTUDIO_ACCESS_TOKEN"),
148-
file_type: Optional[FileTypeInput] = None,
149-
use_doc_orientation_classify: Optional[bool] = None,
150-
use_doc_unwarping: Optional[bool] = None,
151-
use_layout_detection: Optional[bool] = None,
152-
use_chart_recognition: Optional[bool] = None,
153-
layout_threshold: Optional[Union[float, dict]] = None,
154-
layout_nms: Optional[bool] = None,
155-
layout_unclip_ratio: Optional[Union[float, tuple[float, float], dict]] = None,
156-
layout_merge_bboxes_mode: Optional[Union[str, dict]] = None,
157-
prompt_label: Optional[str] = None,
158-
format_block_content: Optional[bool] = None,
159-
repetition_penalty: Optional[float] = None,
160-
temperature: Optional[float] = None,
161-
top_p: Optional[float] = None,
162-
min_pixels: Optional[int] = None,
163-
max_pixels: Optional[int] = None,
164-
prettify_markdown: Optional[bool] = None,
165-
show_formula_number: Optional[bool] = None,
166-
visualize: Optional[bool] = None,
167-
additional_params: Optional[dict[str, Any]] = None,
148+
file_type: FileTypeInput | None = None,
149+
use_doc_orientation_classify: bool | None = None,
150+
use_doc_unwarping: bool | None = None,
151+
use_layout_detection: bool | None = None,
152+
use_chart_recognition: bool | None = None,
153+
layout_threshold: float | dict | None = None,
154+
layout_nms: bool | None = None,
155+
layout_unclip_ratio: float | tuple[float, float] | dict | None = None,
156+
layout_merge_bboxes_mode: str | dict | None = None,
157+
prompt_label: str | None = None,
158+
format_block_content: bool | None = None,
159+
repetition_penalty: float | None = None,
160+
temperature: float | None = None,
161+
top_p: float | None = None,
162+
min_pixels: int | None = None,
163+
max_pixels: int | None = None,
164+
prettify_markdown: bool | None = None,
165+
show_formula_number: bool | None = None,
166+
visualize: bool | None = None,
167+
additional_params: dict[str, Any] | None = None,
168168
):
169169
"""
170170
Create a `PaddleOCRVLDocumentConverter` component.
@@ -421,8 +421,8 @@ def _parse(self, data: bytes, file_type: FileType) -> tuple[str, dict[str, Any]]
421421
@component.output_types(documents=list[Document], raw_paddleocr_responses=list[dict[str, Any]])
422422
def run(
423423
self,
424-
sources: list[Union[str, Path, ByteStream]],
425-
meta: Optional[Union[dict[str, Any], list[dict[str, Any]]]] = None,
424+
sources: list[str | Path | ByteStream],
425+
meta: dict[str, Any] | list[dict[str, Any]] | None = None,
426426
) -> dict[str, Any]:
427427
"""
428428
Convert image or PDF files to Documents.
@@ -448,7 +448,7 @@ def run(
448448

449449
meta_list = normalize_metadata(meta, sources_count=len(sources))
450450

451-
for source, metadata in zip(sources, meta_list):
451+
for source, metadata in zip(sources, meta_list, strict=True):
452452
try:
453453
bytestream = get_bytestream_from_source(source)
454454
except Exception as e:

0 commit comments

Comments
 (0)