diff --git a/.github/workflows/codestyle.yml b/.github/workflows/codestyle.yml index 1116e59992f..4bba9078857 100644 --- a/.github/workflows/codestyle.yml +++ b/.github/workflows/codestyle.yml @@ -34,3 +34,8 @@ jobs: - uses: pre-commit/action@v3.0.1 with: extra_args: '--all-files' + + - name: Type check with mypy + run: | + pip install mypy numpy + mypy paddleocr/ diff --git a/paddleocr/__main__.py b/paddleocr/__main__.py index abc3c1b4639..faddf2b32d3 100644 --- a/paddleocr/__main__.py +++ b/paddleocr/__main__.py @@ -18,7 +18,7 @@ from ._cli import main -def console_entry() -> int: +def console_entry() -> None: # See https://docs.python.org/3/library/signal.html#note-on-sigpipe try: # Flush output here to force SIGPIPE to be triggered while inside this diff --git a/paddleocr/_abstract.py b/paddleocr/_abstract.py index 773e4fbb725..0dddaccf028 100644 --- a/paddleocr/_abstract.py +++ b/paddleocr/_abstract.py @@ -13,13 +13,17 @@ # limitations under the License. import abc +import argparse +from typing import Any class CLISubcommandExecutor(metaclass=abc.ABCMeta): @abc.abstractmethod - def add_subparser(self, subparsers): + def add_subparser( + self, subparsers: argparse._SubParsersAction + ) -> argparse.ArgumentParser: raise NotImplementedError @abc.abstractmethod - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: raise NotImplementedError diff --git a/paddleocr/_cli.py b/paddleocr/_cli.py index 99fdeb2dce7..5d52b9559eb 100644 --- a/paddleocr/_cli.py +++ b/paddleocr/_cli.py @@ -19,6 +19,7 @@ import time import warnings from threading import Thread +from typing import Any import requests @@ -54,7 +55,7 @@ from ._utils.logging import logger -def _register_pipelines(subparsers): +def _register_pipelines(subparsers: argparse._SubParsersAction) -> None: for cls in [ DocPreprocessor, DocUnderstanding, @@ -67,12 +68,12 @@ def _register_pipelines(subparsers): SealRecognition, TableRecognitionPipelineV2, ]: - subcommand_executor = cls.get_cli_subcommand_executor() + subcommand_executor = cls.get_cli_subcommand_executor() # type: ignore[attr-defined] subparser = subcommand_executor.add_subparser(subparsers) subparser.set_defaults(executor=subcommand_executor.execute_with_args) -def _register_models(subparsers): +def _register_models(subparsers: argparse._SubParsersAction) -> None: for cls in [ ChartParsing, DocImgOrientationClassification, @@ -88,13 +89,13 @@ def _register_models(subparsers): TextLineOrientationClassification, TextRecognition, ]: - subcommand_executor = cls.get_cli_subcommand_executor() + subcommand_executor = cls.get_cli_subcommand_executor() # type: ignore[attr-defined] subparser = subcommand_executor.add_subparser(subparsers) subparser.set_defaults(executor=subcommand_executor.execute_with_args) -def _register_install_hpi_deps_command(subparsers): - def _install_hpi_deps(args): +def _register_install_hpi_deps_command(subparsers: argparse._SubParsersAction) -> None: + def _install_hpi_deps(args: argparse.Namespace) -> None: hpip = f"hpi-{args.variant}" try: subprocess.check_call(["paddlex", "--install", hpip]) @@ -107,8 +108,10 @@ def _install_hpi_deps(args): subparser.set_defaults(executor=_install_hpi_deps) -def _register_install_genai_server_deps_command(subparsers): - def _install_genai_server_deps(args): +def _register_install_genai_server_deps_command( + subparsers: argparse._SubParsersAction, +) -> None: + def _install_genai_server_deps(args: argparse.Namespace) -> None: try: subprocess.check_call( ["paddlex", "--install", f"genai-{args.variant}-server"] @@ -123,14 +126,14 @@ def _install_genai_server_deps(args): subparser.set_defaults(executor=_install_genai_server_deps) -def _register_genai_server_command(subparsers): +def _register_genai_server_command(subparsers: argparse._SubParsersAction) -> None: # TODO: Register the subparser whether the plugin is installed or not try: from paddlex.inference.genai.server import get_arg_parser, run_genai_server except RuntimeError: return - def _show_prompt_when_server_is_running(host, port, backend): + def _show_prompt_when_server_is_running(host: str, port: int, backend: str) -> None: if host == "0.0.0.0": host = "localhost" while True: @@ -147,7 +150,7 @@ def _show_prompt_when_server_is_running(host, port, backend): 2. Make HTTP requests directly, or using the OpenAI client library.""" logger.info(prompt) - def _run_genai_server(args): + def _run_genai_server(args: argparse.Namespace) -> None: Thread( target=_show_prompt_when_server_is_running, args=(args.host, args.port, args.backend), @@ -165,7 +168,7 @@ def _run_genai_server(args): subparser.set_defaults(executor=_run_genai_server) -def _get_parser(): +def _get_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser(prog="paddleocr") parser.add_argument( "-v", "--version", action="version", version=f"%(prog)s {version}" @@ -179,11 +182,11 @@ def _get_parser(): return parser -def _execute(args): +def _execute(args: argparse.Namespace) -> None: args.executor(args) -def main(): +def main() -> None: logger.setLevel(logging.INFO) warnings.filterwarnings("default", category=CLIDeprecationWarning) parser = _get_parser() diff --git a/paddleocr/_common_args.py b/paddleocr/_common_args.py index 6054e5ae6ad..af5b1597c44 100644 --- a/paddleocr/_common_args.py +++ b/paddleocr/_common_args.py @@ -12,6 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse +from typing import Any, Dict, Optional + from paddlex.inference import PaddlePredictorOption from paddlex.utils.device import get_default_device, parse_device @@ -28,7 +31,9 @@ from ._utils.cli import str2bool -def parse_common_args(kwargs, *, default_enable_hpi): +def parse_common_args( + kwargs: Dict[str, Any], *, default_enable_hpi: Optional[bool] +) -> Dict[str, Any]: default_vals = { "device": DEFAULT_DEVICE, "enable_hpi": default_enable_hpi, @@ -57,13 +62,15 @@ def parse_common_args(kwargs, *, default_enable_hpi): return kwargs -def prepare_common_init_args(model_name, common_args): +def prepare_common_init_args( + model_name: Optional[str], common_args: Dict[str, Any] +) -> Dict[str, Any]: device = common_args["device"] if device is None: device = get_default_device() device_type, _ = parse_device(device) - init_kwargs = {} + init_kwargs: Dict[str, Any] = {} init_kwargs["device"] = device init_kwargs["use_hpip"] = common_args["enable_hpi"] @@ -94,7 +101,12 @@ def prepare_common_init_args(model_name, common_args): return init_kwargs -def add_common_cli_opts(parser, *, default_enable_hpi, allow_multiple_devices): +def add_common_cli_opts( + parser: argparse.ArgumentParser, + *, + default_enable_hpi: Optional[bool], + allow_multiple_devices: bool, +) -> None: if allow_multiple_devices: help_ = "Device(s) to use for inference, e.g., `cpu`, `gpu`, `npu`, `gpu:0`, `gpu:0,1`. If multiple devices are specified, inference will be performed in parallel. Note that parallel inference is not always supported. By default, GPU 0 will be used if available; otherwise, the CPU will be used." else: diff --git a/paddleocr/_constants.py b/paddleocr/_constants.py index d96ba780ffa..a2292555401 100644 --- a/paddleocr/_constants.py +++ b/paddleocr/_constants.py @@ -1,3 +1,5 @@ +from typing import List, Optional + # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,11 +14,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -DEFAULT_DEVICE = None -DEFAULT_USE_TENSORRT = False -DEFAULT_PRECISION = "fp32" -DEFAULT_ENABLE_MKLDNN = True -DEFAULT_MKLDNN_CACHE_CAPACITY = 10 -DEFAULT_CPU_THREADS = 10 -SUPPORTED_PRECISION_LIST = ["fp32", "fp16"] -DEFAULT_USE_CINN = False +DEFAULT_DEVICE: Optional[str] = None +DEFAULT_USE_TENSORRT: bool = False +DEFAULT_PRECISION: str = "fp32" +DEFAULT_ENABLE_MKLDNN: bool = True +DEFAULT_MKLDNN_CACHE_CAPACITY: int = 10 +DEFAULT_CPU_THREADS: int = 10 +SUPPORTED_PRECISION_LIST: List[str] = ["fp32", "fp16"] +DEFAULT_USE_CINN: bool = False diff --git a/paddleocr/_env.py b/paddleocr/_env.py index 9f90e177168..87a31743ebd 100644 --- a/paddleocr/_env.py +++ b/paddleocr/_env.py @@ -14,6 +14,6 @@ import os -DISABLE_AUTO_LOGGING_CONFIG = ( +DISABLE_AUTO_LOGGING_CONFIG: bool = ( os.getenv("PADDLEOCR_DISABLE_AUTO_LOGGING_CONFIG", "0") == "1" ) diff --git a/paddleocr/_models/_doc_vlm.py b/paddleocr/_models/_doc_vlm.py index 18134fd0f4b..ca28dd1c6bd 100644 --- a/paddleocr/_models/_doc_vlm.py +++ b/paddleocr/_models/_doc_vlm.py @@ -13,6 +13,8 @@ # limitations under the License. import abc +import argparse +from typing import Any, Dict, Type from .._utils.cli import ( get_subcommand_args, @@ -25,13 +27,13 @@ class BaseDocVLM(PaddleXPredictorWrapper): def __init__( self, - *args, - **kwargs, - ): - self._extra_init_args = {} + *args: Any, + **kwargs: Any, + ) -> None: + self._extra_init_args: Dict[str, Any] = {} super().__init__(*args, **kwargs) - def _get_extra_paddlex_predictor_init_args(self): + def _get_extra_paddlex_predictor_init_args(self) -> Dict[str, Any]: return self._extra_init_args @@ -40,10 +42,10 @@ class BaseDocVLMSubcommandExecutor(PredictorCLISubcommandExecutor): @property @abc.abstractmethod - def wrapper_cls(self): + def wrapper_cls(self) -> Type[PaddleXPredictorWrapper]: raise NotImplementedError - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) params["input"] = self.input_validator(params["input"]) perform_simple_inference(self.wrapper_cls, params) diff --git a/paddleocr/_models/_image_classification.py b/paddleocr/_models/_image_classification.py index 2bad088deb0..5edfd2e750c 100644 --- a/paddleocr/_models/_image_classification.py +++ b/paddleocr/_models/_image_classification.py @@ -13,6 +13,8 @@ # limitations under the License. import abc +import argparse +from typing import Any, Dict, Optional, Type from .._utils.cli import ( add_simple_inference_args, @@ -26,20 +28,20 @@ class ImageClassification(PaddleXPredictorWrapper): def __init__( self, *, - topk=None, - **kwargs, - ): + topk: Optional[int] = None, + **kwargs: Any, + ) -> None: self._extra_init_args = { "topk": topk, } super().__init__(**kwargs) - def _get_extra_paddlex_predictor_init_args(self): + def _get_extra_paddlex_predictor_init_args(self) -> Dict[str, Any]: return self._extra_init_args class ImageClassificationSubcommandExecutor(PredictorCLISubcommandExecutor): - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( @@ -50,9 +52,9 @@ def _update_subparser(self, subparser): @property @abc.abstractmethod - def wrapper_cls(self): + def wrapper_cls(self) -> Type[PaddleXPredictorWrapper]: raise NotImplementedError - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(self.wrapper_cls, params) diff --git a/paddleocr/_models/_object_detection.py b/paddleocr/_models/_object_detection.py index f7615d9f8f0..aedf8b7e2c7 100644 --- a/paddleocr/_models/_object_detection.py +++ b/paddleocr/_models/_object_detection.py @@ -13,6 +13,8 @@ # limitations under the License. import abc +import argparse +from typing import Any, Dict, Optional, Tuple, Type, Union from .._utils.cli import ( add_simple_inference_args, @@ -27,13 +29,13 @@ class ObjectDetection(PaddleXPredictorWrapper): def __init__( self, *, - img_size=None, - threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - **kwargs, - ): + img_size: Optional[Union[int, Tuple[int, int]]] = None, + threshold: Optional[Union[float, dict]] = None, + layout_nms: Optional[bool] = None, + layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None, + layout_merge_bboxes_mode: Optional[Union[str, dict]] = None, + **kwargs: Any, + ) -> None: self._extra_init_args = { "img_size": img_size, "threshold": threshold, @@ -43,12 +45,12 @@ def __init__( } super().__init__(**kwargs) - def _get_extra_paddlex_predictor_init_args(self): + def _get_extra_paddlex_predictor_init_args(self) -> Dict[str, Any]: return self._extra_init_args class ObjectDetectionSubcommandExecutor(PredictorCLISubcommandExecutor): - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( @@ -79,9 +81,9 @@ def _update_subparser(self, subparser): @property @abc.abstractmethod - def wrapper_cls(self): + def wrapper_cls(self) -> Type[PaddleXPredictorWrapper]: raise NotImplementedError - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(self.wrapper_cls, params) diff --git a/paddleocr/_models/_text_detection.py b/paddleocr/_models/_text_detection.py index 22122d3a70d..edabf74d941 100644 --- a/paddleocr/_models/_text_detection.py +++ b/paddleocr/_models/_text_detection.py @@ -12,22 +12,27 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse +from typing import Any, Dict, Optional + class TextDetectionMixin: def __init__( self, *, - limit_side_len=None, - limit_type=None, - thresh=None, - box_thresh=None, - unclip_ratio=None, - input_shape=None, - **kwargs, - ): + limit_side_len: Optional[int] = None, + limit_type: Optional[str] = None, + max_side_limit: Optional[int] = None, + thresh: Optional[float] = None, + box_thresh: Optional[float] = None, + unclip_ratio: Optional[float] = None, + input_shape: Optional[tuple] = None, + **kwargs: Any, + ) -> None: self._extra_init_args = { "limit_side_len": limit_side_len, "limit_type": limit_type, + "max_side_limit": max_side_limit, "thresh": thresh, "box_thresh": box_thresh, "unclip_ratio": unclip_ratio, @@ -35,12 +40,12 @@ def __init__( } super().__init__(**kwargs) - def _get_extra_paddlex_predictor_init_args(self): + def _get_extra_paddlex_predictor_init_args(self) -> Dict[str, Any]: return self._extra_init_args class TextDetectionSubcommandExecutorMixin: - def _add_text_detection_args(self, subparser): + def _add_text_detection_args(self, subparser: argparse.ArgumentParser) -> None: subparser.add_argument( "--limit_side_len", type=int, @@ -51,6 +56,11 @@ def _add_text_detection_args(self, subparser): type=str, help="This determines how the side length limit is applied to the input image before feeding it into the model.", ) + subparser.add_argument( + "--max_side_limit", + type=int, + help="Maximum side length limit for the input image.", + ) subparser.add_argument( "--thresh", type=float, diff --git a/paddleocr/_models/base.py b/paddleocr/_models/base.py index 2dd88bc0db9..4298675fa45 100644 --- a/paddleocr/_models/base.py +++ b/paddleocr/_models/base.py @@ -13,6 +13,8 @@ # limitations under the License. import abc +import argparse +from typing import Any, Dict, Iterator, List, Optional from paddlex import create_predictor from paddlex.utils.deps import DependencyError @@ -23,18 +25,19 @@ parse_common_args, prepare_common_init_args, ) +from .._types import PredictResult -_DEFAULT_ENABLE_HPI = False +_DEFAULT_ENABLE_HPI: bool = False class PaddleXPredictorWrapper(metaclass=abc.ABCMeta): def __init__( self, *, - model_name=None, - model_dir=None, - **common_args, - ): + model_name: Optional[str] = None, + model_dir: Optional[str] = None, + **common_args: Any, + ) -> None: super().__init__() self._model_name = ( model_name if model_name is not None else self.default_model_name @@ -47,28 +50,28 @@ def __init__( @property @abc.abstractmethod - def default_model_name(self): + def default_model_name(self) -> str: raise NotImplementedError - def predict_iter(self, *args, **kwargs): + def predict_iter(self, *args: Any, **kwargs: Any) -> Iterator[PredictResult]: return self.paddlex_predictor.predict(*args, **kwargs) - def predict(self, *args, **kwargs): + def predict(self, *args: Any, **kwargs: Any) -> List[PredictResult]: result = list(self.predict_iter(*args, **kwargs)) return result - def close(self): + def close(self) -> None: self.paddlex_predictor.close() @classmethod @abc.abstractmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: raise NotImplementedError - def _get_extra_paddlex_predictor_init_args(self): + def _get_extra_paddlex_predictor_init_args(self) -> Dict[str, Any]: return {} - def _create_paddlex_predictor(self): + def _create_paddlex_predictor(self) -> Any: kwargs = prepare_common_init_args(self._model_name, self._common_args) kwargs = {**self._get_extra_paddlex_predictor_init_args(), **kwargs} # Should we check model names? @@ -85,10 +88,12 @@ def _create_paddlex_predictor(self): class PredictorCLISubcommandExecutor(CLISubcommandExecutor): @property @abc.abstractmethod - def subparser_name(self): + def subparser_name(self) -> str: raise NotImplementedError - def add_subparser(self, subparsers): + def add_subparser( + self, subparsers: argparse._SubParsersAction + ) -> argparse.ArgumentParser: subparser = subparsers.add_parser(name=self.subparser_name) self._update_subparser(subparser) subparser.add_argument("--model_name", type=str, help="Name of the model.") @@ -103,5 +108,5 @@ def add_subparser(self, subparsers): return subparser @abc.abstractmethod - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: raise NotImplementedError diff --git a/paddleocr/_models/chart_parsing.py b/paddleocr/_models/chart_parsing.py index 0078bff813a..9d2bc1bf8fc 100644 --- a/paddleocr/_models/chart_parsing.py +++ b/paddleocr/_models/chart_parsing.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse +from typing import Type + +from .._abstract import CLISubcommandExecutor from .._utils.cli import add_simple_inference_args +from .base import PaddleXPredictorWrapper from ._doc_vlm import ( BaseDocVLM, BaseDocVLMSubcommandExecutor, @@ -21,24 +26,24 @@ class ChartParsing(BaseDocVLM): @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-Chart2Table" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return ChartParsingSubcommandExecutor() class ChartParsingSubcommandExecutor(BaseDocVLMSubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "chart_parsing" @property - def wrapper_cls(self): + def wrapper_cls(self) -> Type[PaddleXPredictorWrapper]: return ChartParsing - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args( subparser, input_help='Input dict, e.g. `{"image": "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/chart_parsing_02.png"}`.', diff --git a/paddleocr/_models/doc_img_orientation_classification.py b/paddleocr/_models/doc_img_orientation_classification.py index 7339dfefa70..f5dc94a04f7 100644 --- a/paddleocr/_models/doc_img_orientation_classification.py +++ b/paddleocr/_models/doc_img_orientation_classification.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Type + +from .._abstract import CLISubcommandExecutor +from .base import PaddleXPredictorWrapper from ._image_classification import ( ImageClassification, ImageClassificationSubcommandExecutor, @@ -20,11 +24,11 @@ class DocImgOrientationClassification(ImageClassification): @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-LCNet_x1_0_doc_ori" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return DocImgOrientationClassificationSubcommandExecutor() @@ -32,9 +36,9 @@ class DocImgOrientationClassificationSubcommandExecutor( ImageClassificationSubcommandExecutor ): @property - def subparser_name(self): + def subparser_name(self) -> str: return "doc_img_orientation_classification" @property - def wrapper_cls(self): + def wrapper_cls(self) -> Type[PaddleXPredictorWrapper]: return DocImgOrientationClassification diff --git a/paddleocr/_models/doc_vlm.py b/paddleocr/_models/doc_vlm.py index b1452a3112d..b06ce5c2670 100644 --- a/paddleocr/_models/doc_vlm.py +++ b/paddleocr/_models/doc_vlm.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse +from typing import Type + +from .._abstract import CLISubcommandExecutor from .._utils.cli import add_simple_inference_args +from .base import PaddleXPredictorWrapper from ._doc_vlm import ( BaseDocVLM, BaseDocVLMSubcommandExecutor, @@ -21,24 +26,24 @@ class DocVLM(BaseDocVLM): @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-DocBee2-3B" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return DocVLMSubcommandExecutor() class DocVLMSubcommandExecutor(BaseDocVLMSubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "doc_vlm" @property - def wrapper_cls(self): + def wrapper_cls(self) -> Type[PaddleXPredictorWrapper]: return DocVLM - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args( subparser, input_help='Input dict, e.g. `{"image": "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/medal_table.png", "query": "Recognize this table"}`.', diff --git a/paddleocr/_models/formula_recognition.py b/paddleocr/_models/formula_recognition.py index 99d2ba702cb..9c6154228bd 100644 --- a/paddleocr/_models/formula_recognition.py +++ b/paddleocr/_models/formula_recognition.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse +from typing import Any, Dict + +from .._abstract import CLISubcommandExecutor from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -23,32 +27,32 @@ class FormulaRecognition(PaddleXPredictorWrapper): def __init__( self, - *args, - **kwargs, - ): - self._extra_init_args = {} + *args: Any, + **kwargs: Any, + ) -> None: + self._extra_init_args: Dict[str, Any] = {} super().__init__(*args, **kwargs) @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-FormulaNet_plus-M" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return FormulaRecognitionSubcommandExecutor() - def _get_extra_paddlex_predictor_init_args(self): + def _get_extra_paddlex_predictor_init_args(self) -> Dict[str, Any]: return self._extra_init_args class FormulaRecognitionSubcommandExecutor(PredictorCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "formula_recognition" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(FormulaRecognition, params) diff --git a/paddleocr/_models/layout_detection.py b/paddleocr/_models/layout_detection.py index 14427fba8bc..e5e2e89b398 100644 --- a/paddleocr/_models/layout_detection.py +++ b/paddleocr/_models/layout_detection.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Type + +from .._abstract import CLISubcommandExecutor +from .base import PaddleXPredictorWrapper from ._object_detection import ( ObjectDetection, ObjectDetectionSubcommandExecutor, @@ -20,19 +24,19 @@ class LayoutDetection(ObjectDetection): @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-DocLayout_plus-L" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return LayoutDetectionSubcommandExecutor() class LayoutDetectionSubcommandExecutor(ObjectDetectionSubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "layout_detection" @property - def wrapper_cls(self): + def wrapper_cls(self) -> Type[PaddleXPredictorWrapper]: return LayoutDetection diff --git a/paddleocr/_models/seal_text_detection.py b/paddleocr/_models/seal_text_detection.py index adfff705268..67da893f133 100644 --- a/paddleocr/_models/seal_text_detection.py +++ b/paddleocr/_models/seal_text_detection.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse +from typing import Any + +from .._abstract import CLISubcommandExecutor from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -23,11 +27,11 @@ class SealTextDetection(TextDetectionMixin, PaddleXPredictorWrapper): @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-OCRv4_mobile_seal_det" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return SealTextDetectionSubcommandExecutor() @@ -35,13 +39,13 @@ class SealTextDetectionSubcommandExecutor( TextDetectionSubcommandExecutorMixin, PredictorCLISubcommandExecutor ): @property - def subparser_name(self): + def subparser_name(self) -> str: return "seal_text_detection" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) self._add_text_detection_args(subparser) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(SealTextDetection, params) diff --git a/paddleocr/_models/table_cells_detection.py b/paddleocr/_models/table_cells_detection.py index afca6ed78b0..9bc7ef5078e 100644 --- a/paddleocr/_models/table_cells_detection.py +++ b/paddleocr/_models/table_cells_detection.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Type + +from .._abstract import CLISubcommandExecutor +from .base import PaddleXPredictorWrapper from ._object_detection import ( ObjectDetection, ObjectDetectionSubcommandExecutor, @@ -20,19 +24,19 @@ class TableCellsDetection(ObjectDetection): @property - def default_model_name(self): + def default_model_name(self) -> str: return "RT-DETR-L_wired_table_cell_det" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return TableCellsDetectionSubcommandExecutor() class TableCellsDetectionSubcommandExecutor(ObjectDetectionSubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "table_cells_detection" @property - def wrapper_cls(self): + def wrapper_cls(self) -> Type[PaddleXPredictorWrapper]: return TableCellsDetection diff --git a/paddleocr/_models/table_classification.py b/paddleocr/_models/table_classification.py index 028e8d830cd..ae8b1547950 100644 --- a/paddleocr/_models/table_classification.py +++ b/paddleocr/_models/table_classification.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Type + +from .._abstract import CLISubcommandExecutor +from .base import PaddleXPredictorWrapper from ._image_classification import ( ImageClassification, ImageClassificationSubcommandExecutor, @@ -20,19 +24,19 @@ class TableClassification(ImageClassification): @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-LCNet_x1_0_table_cls" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return TableClassificationSubcommandExecutor() class TableClassificationSubcommandExecutor(ImageClassificationSubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "table_classification" @property - def wrapper_cls(self): + def wrapper_cls(self) -> Type[PaddleXPredictorWrapper]: return TableClassification diff --git a/paddleocr/_models/table_structure_recognition.py b/paddleocr/_models/table_structure_recognition.py index 270bd7b27b3..b3148bbf96d 100644 --- a/paddleocr/_models/table_structure_recognition.py +++ b/paddleocr/_models/table_structure_recognition.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse +from typing import Any, Dict + +from .._abstract import CLISubcommandExecutor from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -23,32 +27,32 @@ class TableStructureRecognition(PaddleXPredictorWrapper): def __init__( self, - *args, - **kwargs, - ): - self._extra_init_args = {} + *args: Any, + **kwargs: Any, + ) -> None: + self._extra_init_args: Dict[str, Any] = {} super().__init__(*args, **kwargs) @property - def default_model_name(self): + def default_model_name(self) -> str: return "SLANet" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return TableStructureRecognitionSubcommandExecutor() - def _get_extra_paddlex_predictor_init_args(self): + def _get_extra_paddlex_predictor_init_args(self) -> Dict[str, Any]: return self._extra_init_args class TableStructureRecognitionSubcommandExecutor(PredictorCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "table_structure_recognition" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(TableStructureRecognition, params) diff --git a/paddleocr/_models/text_detection.py b/paddleocr/_models/text_detection.py index 809dbd23240..3d307f2c34e 100644 --- a/paddleocr/_models/text_detection.py +++ b/paddleocr/_models/text_detection.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse +from typing import Any + +from .._abstract import CLISubcommandExecutor from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -23,11 +27,11 @@ class TextDetection(TextDetectionMixin, PaddleXPredictorWrapper): @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-OCRv5_server_det" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return TextDetectionSubcommandExecutor() @@ -35,13 +39,13 @@ class TextDetectionSubcommandExecutor( TextDetectionSubcommandExecutorMixin, PredictorCLISubcommandExecutor ): @property - def subparser_name(self): + def subparser_name(self) -> str: return "text_detection" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) self._add_text_detection_args(subparser) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(TextDetection, params) diff --git a/paddleocr/_models/text_image_unwarping.py b/paddleocr/_models/text_image_unwarping.py index 6dbbe5698cd..1333ab09194 100644 --- a/paddleocr/_models/text_image_unwarping.py +++ b/paddleocr/_models/text_image_unwarping.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse +from typing import Any, Dict + +from .._abstract import CLISubcommandExecutor from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -23,32 +27,32 @@ class TextImageUnwarping(PaddleXPredictorWrapper): def __init__( self, - *args, - **kwargs, - ): - self._extra_init_args = {} + *args: Any, + **kwargs: Any, + ) -> None: + self._extra_init_args: Dict[str, Any] = {} super().__init__(*args, **kwargs) @property - def default_model_name(self): + def default_model_name(self) -> str: return "UVDoc" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return TextImageUnwarpingSubcommandExecutor() - def _get_extra_paddlex_predictor_init_args(self): + def _get_extra_paddlex_predictor_init_args(self) -> Dict[str, Any]: return self._extra_init_args class TextImageUnwarpingSubcommandExecutor(PredictorCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "text_image_unwarping" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(TextImageUnwarping, params) diff --git a/paddleocr/_models/text_recognition.py b/paddleocr/_models/text_recognition.py index 4f96f8e84de..7887a34700f 100644 --- a/paddleocr/_models/text_recognition.py +++ b/paddleocr/_models/text_recognition.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse +from typing import Any, Dict, Optional + +from .._abstract import CLISubcommandExecutor from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -24,32 +28,32 @@ class TextRecognition(PaddleXPredictorWrapper): def __init__( self, *, - input_shape=None, - **kwargs, - ): + input_shape: Optional[tuple] = None, + **kwargs: Any, + ) -> None: self._extra_init_args = { "input_shape": input_shape, } super().__init__(**kwargs) @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-OCRv5_server_rec" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return TextRecognitionSubcommandExecutor() - def _get_extra_paddlex_predictor_init_args(self): + def _get_extra_paddlex_predictor_init_args(self) -> Dict[str, Any]: return self._extra_init_args class TextRecognitionSubcommandExecutor(PredictorCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "text_recognition" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( "--input_shape", @@ -59,6 +63,6 @@ def _update_subparser(self, subparser): help="Input shape of the model.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(TextRecognition, params) diff --git a/paddleocr/_models/textline_orientation_classification.py b/paddleocr/_models/textline_orientation_classification.py index 908c0ddf165..f4de4a4db6b 100644 --- a/paddleocr/_models/textline_orientation_classification.py +++ b/paddleocr/_models/textline_orientation_classification.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Type + +from .._abstract import CLISubcommandExecutor +from .base import PaddleXPredictorWrapper from ._image_classification import ( ImageClassification, ImageClassificationSubcommandExecutor, @@ -20,11 +24,11 @@ class TextLineOrientationClassification(ImageClassification): @property - def default_model_name(self): + def default_model_name(self) -> str: return "PP-LCNet_x0_25_textline_ori" @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return TextLineOrientationClassificationSubcommandExecutor() @@ -32,9 +36,9 @@ class TextLineOrientationClassificationSubcommandExecutor( ImageClassificationSubcommandExecutor ): @property - def subparser_name(self): + def subparser_name(self) -> str: return "textline_orientation_classification" @property - def wrapper_cls(self): + def wrapper_cls(self) -> Type[PaddleXPredictorWrapper]: return TextLineOrientationClassification diff --git a/paddleocr/_pipelines/_patch_layout_parsing.py b/paddleocr/_pipelines/_patch_layout_parsing.py index 6b734009651..83a5e695a83 100644 --- a/paddleocr/_pipelines/_patch_layout_parsing.py +++ b/paddleocr/_pipelines/_patch_layout_parsing.py @@ -24,6 +24,7 @@ """ import logging +from typing import List, Sequence, Union import numpy as np @@ -32,7 +33,9 @@ _patched = False -def _fixed_calculate_overlap_ratio(bbox1, bbox2, mode="union"): +def _fixed_calculate_overlap_ratio( + bbox1: Sequence[float], bbox2: Sequence[float], mode: str = "union" +) -> float: """ Calculate the overlap ratio between two bounding boxes. @@ -73,7 +76,9 @@ def _fixed_calculate_overlap_ratio(bbox1, bbox2, mode="union"): return inter_area / ref_area -def _fixed_calculate_minimum_enclosing_bbox(bboxes): +def _fixed_calculate_minimum_enclosing_bbox( + bboxes: Union[List[Sequence[float]], np.ndarray], +) -> np.ndarray: """ Calculate the minimum enclosing bounding box for a list of bounding boxes. @@ -90,15 +95,15 @@ def _fixed_calculate_minimum_enclosing_bbox(bboxes): bboxes_array = np.array(bboxes) - min_x = np.min(bboxes_array[:, 0]) - min_y = np.min(bboxes_array[:, 1]) - max_x = np.max(bboxes_array[:, 2]) - max_y = np.max(bboxes_array[:, 3]) + min_x: np.floating = np.min(bboxes_array[:, 0]) + min_y: np.floating = np.min(bboxes_array[:, 1]) + max_x: np.floating = np.max(bboxes_array[:, 2]) + max_y: np.floating = np.max(bboxes_array[:, 3]) return np.array([min_x, min_y, max_x, max_y]) -def apply_patches(): +def apply_patches() -> None: """ Apply patches to paddlex layout parsing utilities to fix integer overflow and empty bounding box errors. diff --git a/paddleocr/_pipelines/base.py b/paddleocr/_pipelines/base.py index f400cab41e9..009db66fc8f 100644 --- a/paddleocr/_pipelines/base.py +++ b/paddleocr/_pipelines/base.py @@ -13,6 +13,8 @@ # limitations under the License. import abc +import argparse +from typing import Any, Dict, Optional, Union import yaml from paddlex import create_pipeline @@ -27,10 +29,10 @@ prepare_common_init_args, ) -_DEFAULT_ENABLE_HPI = None +_DEFAULT_ENABLE_HPI: Optional[bool] = None -def _merge_dicts(d1, d2): +def _merge_dicts(d1: Dict[str, Any], d2: Dict[str, Any]) -> Dict[str, Any]: res = d1.copy() for k, v in d2.items(): if k in res and isinstance(res[k], dict) and isinstance(v, dict): @@ -40,7 +42,7 @@ def _merge_dicts(d1, d2): return res -def _to_builtin(obj): +def _to_builtin(obj: Any) -> Any: if isinstance(obj, AttrDict): return {k: _to_builtin(v) for k, v in obj.items()} elif isinstance(obj, dict): @@ -55,9 +57,9 @@ class PaddleXPipelineWrapper(metaclass=abc.ABCMeta): def __init__( self, *, - paddlex_config=None, - **common_args, - ): + paddlex_config: Optional[Union[str, Dict[str, Any]]] = None, + **common_args: Any, + ) -> None: super().__init__() self._paddlex_config = paddlex_config self._common_args = parse_common_args( @@ -68,26 +70,26 @@ def __init__( @property @abc.abstractmethod - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: raise NotImplementedError - def export_paddlex_config_to_yaml(self, yaml_path): + def export_paddlex_config_to_yaml(self, yaml_path: str) -> None: with open(yaml_path, "w", encoding="utf-8") as f: config = _to_builtin(self._merged_paddlex_config) yaml.safe_dump(config, f) - def close(self): + def close(self) -> None: self.paddlex_pipeline.close() @classmethod @abc.abstractmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: raise NotImplementedError - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> Dict[str, Any]: return {} - def _get_merged_paddlex_config(self): + def _get_merged_paddlex_config(self) -> Dict[str, Any]: if self._paddlex_config is None: config = load_pipeline_config(self._paddlex_pipeline_name) elif isinstance(self._paddlex_config, str): @@ -99,7 +101,7 @@ def _get_merged_paddlex_config(self): return _merge_dicts(config, overrides) - def _create_paddlex_pipeline(self): + def _create_paddlex_pipeline(self) -> Any: kwargs = prepare_common_init_args(None, self._common_args) try: return create_pipeline(config=self._merged_paddlex_config, **kwargs) @@ -112,10 +114,12 @@ def _create_paddlex_pipeline(self): class PipelineCLISubcommandExecutor(CLISubcommandExecutor): @property @abc.abstractmethod - def subparser_name(self): + def subparser_name(self) -> str: raise NotImplementedError - def add_subparser(self, subparsers): + def add_subparser( + self, subparsers: argparse._SubParsersAction + ) -> argparse.ArgumentParser: subparser = subparsers.add_parser(name=self.subparser_name) self._update_subparser(subparser) add_common_cli_opts( @@ -131,5 +135,5 @@ def add_subparser(self, subparsers): return subparser @abc.abstractmethod - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: raise NotImplementedError diff --git a/paddleocr/_pipelines/doc_preprocessor.py b/paddleocr/_pipelines/doc_preprocessor.py index b8c34df3773..13959add172 100644 --- a/paddleocr/_pipelines/doc_preprocessor.py +++ b/paddleocr/_pipelines/doc_preprocessor.py @@ -12,6 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse +from typing import Any, Dict, Iterator, List, Optional + +from .._abstract import CLISubcommandExecutor +from .._types import InputType, PredictResult from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -25,14 +30,14 @@ class DocPreprocessor(PaddleXPipelineWrapper): def __init__( self, - doc_orientation_classify_model_name=None, - doc_orientation_classify_model_dir=None, - doc_unwarping_model_name=None, - doc_unwarping_model_dir=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - **kwargs, - ): + doc_orientation_classify_model_name: Optional[str] = None, + doc_orientation_classify_model_dir: Optional[str] = None, + doc_unwarping_model_name: Optional[str] = None, + doc_unwarping_model_dir: Optional[str] = None, + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + **kwargs: Any, + ) -> None: self._params = { "doc_orientation_classify_model_name": doc_orientation_classify_model_name, @@ -45,16 +50,16 @@ def __init__( super().__init__(**kwargs) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: return "doc_preprocessor" def predict_iter( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - ): + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + ) -> Iterator[PredictResult]: return self.paddlex_pipeline.predict( input, use_doc_orientation_classify=use_doc_orientation_classify, @@ -63,11 +68,11 @@ def predict_iter( def predict( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - ): + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + ) -> List[PredictResult]: return list( self.predict_iter( input, @@ -77,10 +82,10 @@ def predict( ) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return DocPreprocessorCLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> Dict[str, Any]: STRUCTURE = { "SubModules.DocOrientationClassify.model_name": self._params[ "doc_orientation_classify_model_name" @@ -104,10 +109,10 @@ def _get_paddlex_config_overrides(self): class DocPreprocessorCLISubcommandExecutor(PipelineCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "doc_preprocessor" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( @@ -141,7 +146,7 @@ def _update_subparser(self, subparser): help="Whether to use text image unwarping.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(DocPreprocessor, params) diff --git a/paddleocr/_pipelines/doc_understanding.py b/paddleocr/_pipelines/doc_understanding.py index 0cde72fc97b..fe691a796e4 100644 --- a/paddleocr/_pipelines/doc_understanding.py +++ b/paddleocr/_pipelines/doc_understanding.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse +from typing import Any, Dict, Iterator, List, Optional + from paddlex.utils.pipeline_arguments import custom_type +from .._abstract import CLISubcommandExecutor +from .._types import PredictResult from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -26,11 +31,11 @@ class DocUnderstanding(PaddleXPipelineWrapper): def __init__( self, - doc_understanding_model_name=None, - doc_understanding_model_dir=None, - doc_understanding_batch_size=None, - **kwargs, - ): + doc_understanding_model_name: Optional[str] = None, + doc_understanding_model_dir: Optional[str] = None, + doc_understanding_batch_size: Optional[int] = None, + **kwargs: Any, + ) -> None: self._params = { "doc_understanding_model_name": doc_understanding_model_name, @@ -40,24 +45,24 @@ def __init__( super().__init__(**kwargs) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: return "doc_understanding" - def predict_iter(self, input, **kwargs): + def predict_iter(self, input: dict, **kwargs: Any) -> Iterator[PredictResult]: return self.paddlex_pipeline.predict(input, **kwargs) def predict( self, - input, - **kwargs, - ): + input: dict, + **kwargs: Any, + ) -> List[PredictResult]: return list(self.predict_iter(input, **kwargs)) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return DocUnderstandingCLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> Dict[str, Any]: STRUCTURE = { "SubModules.DocUnderstanding.model_name": self._params[ "doc_understanding_model_name" @@ -76,10 +81,10 @@ class DocUnderstandingCLISubcommandExecutor(PipelineCLISubcommandExecutor): input_validator = staticmethod(custom_type(dict)) @property - def subparser_name(self): + def subparser_name(self) -> str: return "doc_understanding" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args( subparser, input_help='Input dict, e.g. `{"image": "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/medal_table.png", "query": "Recognize this table"}`.', @@ -97,11 +102,11 @@ def _update_subparser(self, subparser): ) subparser.add_argument( "--doc_understanding_batch_size", - type=str, + type=int, help="Batch size for the document understanding model.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) params["input"] = self.input_validator(params["input"]) perform_simple_inference(DocUnderstanding, params) diff --git a/paddleocr/_pipelines/formula_recognition.py b/paddleocr/_pipelines/formula_recognition.py index 7588f81ac80..8559ab65dd7 100644 --- a/paddleocr/_pipelines/formula_recognition.py +++ b/paddleocr/_pipelines/formula_recognition.py @@ -12,6 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse +from typing import Any, Dict, Iterator, List, Optional, Tuple, Union + +from .._abstract import CLISubcommandExecutor +from .._types import InputType, LayoutDetResult, PredictResult from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -25,27 +30,27 @@ class FormulaRecognitionPipeline(PaddleXPipelineWrapper): def __init__( self, - doc_orientation_classify_model_name=None, - doc_orientation_classify_model_dir=None, - doc_orientation_classify_batch_size=None, - doc_unwarping_model_name=None, - doc_unwarping_model_dir=None, - doc_unwarping_batch_size=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - layout_detection_model_name=None, - layout_detection_model_dir=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - layout_detection_batch_size=None, - use_layout_detection=None, - formula_recognition_model_name=None, - formula_recognition_model_dir=None, - formula_recognition_batch_size=None, - **kwargs, - ): + doc_orientation_classify_model_name: Optional[str] = None, + doc_orientation_classify_model_dir: Optional[str] = None, + doc_orientation_classify_batch_size: Optional[int] = None, + doc_unwarping_model_name: Optional[str] = None, + doc_unwarping_model_dir: Optional[str] = None, + doc_unwarping_batch_size: Optional[int] = None, + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + layout_detection_model_name: Optional[str] = None, + layout_detection_model_dir: Optional[str] = None, + layout_threshold: Optional[Union[float, dict]] = None, + layout_nms: Optional[bool] = None, + layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]] = None, + layout_merge_bboxes_mode: Optional[str] = None, + layout_detection_batch_size: Optional[int] = None, + use_layout_detection: Optional[bool] = None, + formula_recognition_model_name: Optional[str] = None, + formula_recognition_model_dir: Optional[str] = None, + formula_recognition_batch_size: Optional[int] = None, + **kwargs: Any, + ) -> None: params = locals().copy() params.pop("self") params.pop("kwargs") @@ -54,23 +59,23 @@ def __init__( super().__init__(**kwargs) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: return "formula_recognition" def predict_iter( self, - input, + input: InputType, *, - use_layout_detection=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - layout_det_res=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - **kwargs, - ): + use_layout_detection: Optional[bool] = None, + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + layout_det_res: LayoutDetResult = None, + layout_threshold: Optional[Union[float, dict]] = None, + layout_nms: Optional[bool] = None, + layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]] = None, + layout_merge_bboxes_mode: Optional[str] = None, + **kwargs: Any, + ) -> Iterator[PredictResult]: return self.paddlex_pipeline.predict( input, use_layout_detection=use_layout_detection, @@ -86,18 +91,18 @@ def predict_iter( def predict( self, - input, + input: InputType, *, - use_layout_detection=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - layout_det_res=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - **kwargs, - ): + use_layout_detection: Optional[bool] = None, + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + layout_det_res: LayoutDetResult = None, + layout_threshold: Optional[Union[float, dict]] = None, + layout_nms: Optional[bool] = None, + layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]] = None, + layout_merge_bboxes_mode: Optional[str] = None, + **kwargs: Any, + ) -> List[PredictResult]: return list( self.predict_iter( input, @@ -114,10 +119,10 @@ def predict( ) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return FormulaRecognitionPipelineCLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> Dict[str, Any]: STRUCTURE = { "use_layout_detection": self._params["use_layout_detection"], "SubModules.LayoutDetection.model_name": self._params[ @@ -178,10 +183,10 @@ def _get_paddlex_config_overrides(self): class FormulaRecognitionPipelineCLISubcommandExecutor(PipelineCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "formula_recognition_pipeline" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( @@ -280,6 +285,6 @@ def _update_subparser(self, subparser): help="Batch size for formula recognition.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(FormulaRecognitionPipeline, params) diff --git a/paddleocr/_pipelines/ocr.py b/paddleocr/_pipelines/ocr.py index 151b0fd6118..960cb750638 100644 --- a/paddleocr/_pipelines/ocr.py +++ b/paddleocr/_pipelines/ocr.py @@ -16,9 +16,13 @@ # arguments from the pipeline class, to reduce boilerplate and improve # maintainability? +import argparse import sys import warnings +from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple +from .._abstract import CLISubcommandExecutor +from .._types import InputType, PredictResult from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -55,34 +59,35 @@ class PaddleOCR(PaddleXPipelineWrapper): def __init__( self, - doc_orientation_classify_model_name=None, - doc_orientation_classify_model_dir=None, - doc_unwarping_model_name=None, - doc_unwarping_model_dir=None, - text_detection_model_name=None, - text_detection_model_dir=None, - textline_orientation_model_name=None, - textline_orientation_model_dir=None, - textline_orientation_batch_size=None, - text_recognition_model_name=None, - text_recognition_model_dir=None, - text_recognition_batch_size=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_det_input_shape=None, - text_rec_score_thresh=None, - return_word_box=None, - text_rec_input_shape=None, - lang=None, - ocr_version=None, - **kwargs, - ): + doc_orientation_classify_model_name: Optional[str] = None, + doc_orientation_classify_model_dir: Optional[str] = None, + doc_unwarping_model_name: Optional[str] = None, + doc_unwarping_model_dir: Optional[str] = None, + text_detection_model_name: Optional[str] = None, + text_detection_model_dir: Optional[str] = None, + textline_orientation_model_name: Optional[str] = None, + textline_orientation_model_dir: Optional[str] = None, + textline_orientation_batch_size: Optional[int] = None, + text_recognition_model_name: Optional[str] = None, + text_recognition_model_dir: Optional[str] = None, + text_recognition_batch_size: Optional[int] = None, + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_textline_orientation: Optional[bool] = None, + text_det_limit_side_len: Optional[int] = None, + text_det_limit_type: Optional[str] = None, + text_det_max_side_limit: Optional[int] = None, + text_det_thresh: Optional[float] = None, + text_det_box_thresh: Optional[float] = None, + text_det_unclip_ratio: Optional[float] = None, + text_det_input_shape: Optional[tuple] = None, + text_rec_score_thresh: Optional[float] = None, + return_word_box: Optional[bool] = None, + text_rec_input_shape: Optional[tuple] = None, + lang: Optional[str] = None, + ocr_version: Optional[str] = None, + **kwargs: Any, + ) -> None: if ocr_version is not None and ocr_version not in _SUPPORTED_OCR_VERSIONS: raise ValueError( f"Invalid OCR version: {ocr_version}. Supported values are {_SUPPORTED_OCR_VERSIONS}." @@ -134,6 +139,7 @@ def __init__( "use_textline_orientation": use_textline_orientation, "text_det_limit_side_len": text_det_limit_side_len, "text_det_limit_type": text_det_limit_type, + "text_det_max_side_limit": text_det_max_side_limit, "text_det_thresh": text_det_thresh, "text_det_box_thresh": text_det_box_thresh, "text_det_unclip_ratio": text_det_unclip_ratio, @@ -163,24 +169,25 @@ def __init__( super().__init__(**base_params) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: return "OCR" def predict_iter( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - return_word_box=None, - ): + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_textline_orientation: Optional[bool] = None, + text_det_limit_side_len: Optional[int] = None, + text_det_limit_type: Optional[str] = None, + text_det_max_side_limit: Optional[int] = None, + text_det_thresh: Optional[float] = None, + text_det_box_thresh: Optional[float] = None, + text_det_unclip_ratio: Optional[float] = None, + text_rec_score_thresh: Optional[float] = None, + return_word_box: Optional[bool] = None, + ) -> Iterator[PredictResult]: return self.paddlex_pipeline.predict( input, use_doc_orientation_classify=use_doc_orientation_classify, @@ -188,6 +195,7 @@ def predict_iter( use_textline_orientation=use_textline_orientation, text_det_limit_side_len=text_det_limit_side_len, text_det_limit_type=text_det_limit_type, + text_det_max_side_limit=text_det_max_side_limit, text_det_thresh=text_det_thresh, text_det_box_thresh=text_det_box_thresh, text_det_unclip_ratio=text_det_unclip_ratio, @@ -197,19 +205,20 @@ def predict_iter( def predict( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - return_word_box=None, - ): + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_textline_orientation: Optional[bool] = None, + text_det_limit_side_len: Optional[int] = None, + text_det_limit_type: Optional[str] = None, + text_det_max_side_limit: Optional[int] = None, + text_det_thresh: Optional[float] = None, + text_det_box_thresh: Optional[float] = None, + text_det_unclip_ratio: Optional[float] = None, + text_rec_score_thresh: Optional[float] = None, + return_word_box: Optional[bool] = None, + ) -> List[PredictResult]: return list( self.predict_iter( input, @@ -218,6 +227,7 @@ def predict( use_textline_orientation=use_textline_orientation, text_det_limit_side_len=text_det_limit_side_len, text_det_limit_type=text_det_limit_type, + text_det_max_side_limit=text_det_max_side_limit, text_det_thresh=text_det_thresh, text_det_box_thresh=text_det_box_thresh, text_det_unclip_ratio=text_det_unclip_ratio, @@ -227,14 +237,14 @@ def predict( ) @deprecated("Please use `predict` instead.") - def ocr(self, img, **kwargs): + def ocr(self, img: InputType, **kwargs: Any) -> List[PredictResult]: return self.predict(img, **kwargs) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return PaddleOCRCLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> Dict[str, Any]: STRUCTURE = { "SubPipelines.DocPreprocessor.SubModules.DocOrientationClassify.model_name": self._params[ "doc_orientation_classify_model_name" @@ -285,6 +295,9 @@ def _get_paddlex_config_overrides(self): "text_det_limit_side_len" ], "SubModules.TextDetection.limit_type": self._params["text_det_limit_type"], + "SubModules.TextDetection.max_side_limit": self._params[ + "text_det_max_side_limit" + ], "SubModules.TextDetection.thresh": self._params["text_det_thresh"], "SubModules.TextDetection.box_thresh": self._params["text_det_box_thresh"], "SubModules.TextDetection.unclip_ratio": self._params[ @@ -305,7 +318,9 @@ def _get_paddlex_config_overrides(self): } return create_config_from_structure(STRUCTURE) - def _get_ocr_model_names(self, lang, ppocr_version): + def _get_ocr_model_names( + self, lang: Optional[str], ppocr_version: Optional[str] + ) -> Tuple[Optional[str], Optional[str]]: LATIN_LANGS = [ "af", "az", @@ -513,10 +528,10 @@ def _get_ocr_model_names(self, lang, ppocr_version): class PaddleOCRCLISubcommandExecutor(PipelineCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "ocr" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( @@ -604,6 +619,11 @@ def _update_subparser(self, subparser): type=str, help="This determines how the side length limit is applied to the input image before feeding it into the text deteciton model.", ) + subparser.add_argument( + "--text_det_max_side_limit", + type=int, + help="Maximum side length limit for text detection input image.", + ) subparser.add_argument( "--text_det_thresh", type=float, @@ -653,7 +673,7 @@ def _update_subparser(self, subparser): help="PP-OCR version to use.", ) - deprecated_arg_types = { + deprecated_arg_types: Dict[str, Callable[[str], Any]] = { "det_model_dir": str, "det_limit_side_len": int, "det_limit_type": str, @@ -676,7 +696,7 @@ def _update_subparser(self, subparser): help=f"[Deprecated] Please use `--{new_name}` instead.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) for name, new_name in _DEPRECATED_PARAM_NAME_MAPPING.items(): assert name in params diff --git a/paddleocr/_pipelines/paddleocr_vl.py b/paddleocr/_pipelines/paddleocr_vl.py index 9e1fc9b0f73..d3def86547e 100644 --- a/paddleocr/_pipelines/paddleocr_vl.py +++ b/paddleocr/_pipelines/paddleocr_vl.py @@ -12,6 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse +from typing import Any, Dict, Iterator, List, Optional, Tuple, Union + +from .._abstract import CLISubcommandExecutor +from .._types import InputType, PredictResult from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -37,36 +42,36 @@ class PaddleOCRVL(PaddleXPipelineWrapper): def __init__( self, - pipeline_version=_DEFAULT_PIPELINE_VERSION, - layout_detection_model_name=None, - layout_detection_model_dir=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - vl_rec_model_name=None, - vl_rec_model_dir=None, - vl_rec_backend=None, - vl_rec_server_url=None, - vl_rec_max_concurrency=None, - vl_rec_api_model_name=None, - vl_rec_api_key=None, - doc_orientation_classify_model_name=None, - doc_orientation_classify_model_dir=None, - doc_unwarping_model_name=None, - doc_unwarping_model_dir=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_layout_detection=None, - use_chart_recognition=None, - use_seal_recognition=None, - use_ocr_for_image_block=None, - format_block_content=None, - merge_layout_blocks=None, - markdown_ignore_labels=None, - use_queues=None, - **kwargs, - ): + pipeline_version: str = _DEFAULT_PIPELINE_VERSION, + layout_detection_model_name: Optional[str] = None, + layout_detection_model_dir: Optional[str] = None, + layout_threshold: Optional[Union[float, dict]] = None, + layout_nms: Optional[bool] = None, + layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None, + layout_merge_bboxes_mode: Optional[str] = None, + vl_rec_model_name: Optional[str] = None, + vl_rec_model_dir: Optional[str] = None, + vl_rec_backend: Optional[str] = None, + vl_rec_server_url: Optional[str] = None, + vl_rec_max_concurrency: Optional[int] = None, + vl_rec_api_model_name: Optional[str] = None, + vl_rec_api_key: Optional[str] = None, + doc_orientation_classify_model_name: Optional[str] = None, + doc_orientation_classify_model_dir: Optional[str] = None, + doc_unwarping_model_name: Optional[str] = None, + doc_unwarping_model_dir: Optional[str] = None, + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_layout_detection: Optional[bool] = None, + use_chart_recognition: Optional[bool] = None, + use_seal_recognition: Optional[bool] = None, + use_ocr_for_image_block: Optional[bool] = None, + format_block_content: Optional[bool] = None, + merge_layout_blocks: Optional[bool] = None, + markdown_ignore_labels: Optional[List[str]] = None, + use_queues: Optional[bool] = None, + **kwargs: Any, + ) -> None: if pipeline_version not in _AVAILABLE_PIPELINE_VERSIONS: raise ValueError( f"Invalid pipeline version: {pipeline_version}. Supported versions are {_AVAILABLE_PIPELINE_VERSIONS}." @@ -87,7 +92,7 @@ def __init__( super().__init__(**kwargs) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: if self.pipeline_version == "v1": return "PaddleOCR-VL" elif self.pipeline_version == "v1.5": @@ -97,33 +102,33 @@ def _paddlex_pipeline_name(self): def predict_iter( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_layout_detection=None, - use_chart_recognition=None, - use_seal_recognition=None, - use_ocr_for_image_block=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - layout_shape_mode="auto", - use_queues=None, - prompt_label=None, - format_block_content=None, - repetition_penalty=None, - temperature=None, - top_p=None, - min_pixels=None, - max_pixels=None, - max_new_tokens=None, - merge_layout_blocks=None, - markdown_ignore_labels=None, - vlm_extra_args=None, - **kwargs, - ): + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_layout_detection: Optional[bool] = None, + use_chart_recognition: Optional[bool] = None, + use_seal_recognition: Optional[bool] = None, + use_ocr_for_image_block: Optional[bool] = None, + layout_threshold: Optional[Union[float, dict]] = None, + layout_nms: Optional[bool] = None, + layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None, + layout_merge_bboxes_mode: Optional[str] = None, + layout_shape_mode: str = "auto", + use_queues: Optional[bool] = None, + prompt_label: Optional[str] = None, + format_block_content: Optional[bool] = None, + repetition_penalty: Optional[float] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + min_pixels: Optional[int] = None, + max_pixels: Optional[int] = None, + max_new_tokens: Optional[int] = None, + merge_layout_blocks: Optional[bool] = None, + markdown_ignore_labels: Optional[List[str]] = None, + vlm_extra_args: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> Iterator[PredictResult]: return self.paddlex_pipeline.predict( input, use_doc_orientation_classify=use_doc_orientation_classify, @@ -154,33 +159,33 @@ def predict_iter( def predict( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_layout_detection=None, - use_chart_recognition=None, - use_seal_recognition=None, - use_ocr_for_image_block=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - layout_shape_mode="auto", - use_queues=None, - prompt_label=None, - format_block_content=None, - repetition_penalty=None, - temperature=None, - top_p=None, - min_pixels=None, - max_pixels=None, - max_new_tokens=None, - merge_layout_blocks=None, - markdown_ignore_labels=None, - vlm_extra_args=None, - **kwargs, - ): + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_layout_detection: Optional[bool] = None, + use_chart_recognition: Optional[bool] = None, + use_seal_recognition: Optional[bool] = None, + use_ocr_for_image_block: Optional[bool] = None, + layout_threshold: Optional[Union[float, dict]] = None, + layout_nms: Optional[bool] = None, + layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None, + layout_merge_bboxes_mode: Optional[str] = None, + layout_shape_mode: str = "auto", + use_queues: Optional[bool] = None, + prompt_label: Optional[str] = None, + format_block_content: Optional[bool] = None, + repetition_penalty: Optional[float] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + min_pixels: Optional[int] = None, + max_pixels: Optional[int] = None, + max_new_tokens: Optional[int] = None, + merge_layout_blocks: Optional[bool] = None, + markdown_ignore_labels: Optional[List[str]] = None, + vlm_extra_args: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[PredictResult]: return list( self.predict_iter( input, @@ -211,12 +216,16 @@ def predict( ) ) - def concatenate_markdown_pages(self, markdown_list): + def concatenate_markdown_pages(self, markdown_list: List[str]) -> str: return self.paddlex_pipeline.concatenate_markdown_pages(markdown_list) def restructure_pages( - self, res_list, merge_tables=True, relevel_titles=True, concatenate_pages=False - ): + self, + res_list: List[Any], + merge_tables: bool = True, + relevel_titles: bool = True, + concatenate_pages: bool = False, + ) -> List[Any]: return list( self.paddlex_pipeline.restructure_pages( res_list, @@ -227,10 +236,10 @@ def restructure_pages( ) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return PaddleOCRVLCLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> Dict[str, Any]: STRUCTURE = { "SubPipelines.DocPreprocessor.use_doc_orientation_classify": self._params[ "use_doc_orientation_classify" @@ -297,10 +306,10 @@ def _get_paddlex_config_overrides(self): class PaddleOCRVLCLISubcommandExecutor(PipelineCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "doc_parser" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( @@ -494,7 +503,7 @@ def _update_subparser(self, subparser): help="Maximum number of tokens generated by the VLM.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference( PaddleOCRVL, diff --git a/paddleocr/_pipelines/pp_chatocrv4_doc.py b/paddleocr/_pipelines/pp_chatocrv4_doc.py index c9f0f92afe8..e7b79f7a643 100644 --- a/paddleocr/_pipelines/pp_chatocrv4_doc.py +++ b/paddleocr/_pipelines/pp_chatocrv4_doc.py @@ -12,6 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse +from typing import Any, Dict, Iterator, List, Optional, Tuple, Union + +from .._abstract import CLISubcommandExecutor +from .._types import InputType, PredictResult from .._utils.cli import ( get_subcommand_args, str2bool, @@ -23,53 +28,53 @@ class PPChatOCRv4Doc(PaddleXPipelineWrapper): def __init__( self, - layout_detection_model_name=None, - layout_detection_model_dir=None, - doc_orientation_classify_model_name=None, - doc_orientation_classify_model_dir=None, - doc_unwarping_model_name=None, - doc_unwarping_model_dir=None, - text_detection_model_name=None, - text_detection_model_dir=None, - textline_orientation_model_name=None, - textline_orientation_model_dir=None, - textline_orientation_batch_size=None, - text_recognition_model_name=None, - text_recognition_model_dir=None, - text_recognition_batch_size=None, - table_structure_recognition_model_name=None, - table_structure_recognition_model_dir=None, - seal_text_detection_model_name=None, - seal_text_detection_model_dir=None, - seal_text_recognition_model_name=None, - seal_text_recognition_model_dir=None, - seal_text_recognition_batch_size=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - use_seal_recognition=None, - use_table_recognition=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - retriever_config=None, - mllm_chat_bot_config=None, - chat_bot_config=None, - **kwargs, - ): + layout_detection_model_name: Optional[str] = None, + layout_detection_model_dir: Optional[str] = None, + doc_orientation_classify_model_name: Optional[str] = None, + doc_orientation_classify_model_dir: Optional[str] = None, + doc_unwarping_model_name: Optional[str] = None, + doc_unwarping_model_dir: Optional[str] = None, + text_detection_model_name: Optional[str] = None, + text_detection_model_dir: Optional[str] = None, + textline_orientation_model_name: Optional[str] = None, + textline_orientation_model_dir: Optional[str] = None, + textline_orientation_batch_size: Optional[int] = None, + text_recognition_model_name: Optional[str] = None, + text_recognition_model_dir: Optional[str] = None, + text_recognition_batch_size: Optional[int] = None, + table_structure_recognition_model_name: Optional[str] = None, + table_structure_recognition_model_dir: Optional[str] = None, + seal_text_detection_model_name: Optional[str] = None, + seal_text_detection_model_dir: Optional[str] = None, + seal_text_recognition_model_name: Optional[str] = None, + seal_text_recognition_model_dir: Optional[str] = None, + seal_text_recognition_batch_size: Optional[int] = None, + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_textline_orientation: Optional[bool] = None, + use_seal_recognition: Optional[bool] = None, + use_table_recognition: Optional[bool] = None, + layout_threshold: Optional[Union[float, dict]] = None, + layout_nms: Optional[bool] = None, + layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None, + layout_merge_bboxes_mode: Optional[str] = None, + text_det_limit_side_len: Optional[int] = None, + text_det_limit_type: Optional[str] = None, + text_det_thresh: Optional[float] = None, + text_det_box_thresh: Optional[float] = None, + text_det_unclip_ratio: Optional[float] = None, + text_rec_score_thresh: Optional[float] = None, + seal_det_limit_side_len: Optional[int] = None, + seal_det_limit_type: Optional[str] = None, + seal_det_thresh: Optional[float] = None, + seal_det_box_thresh: Optional[float] = None, + seal_det_unclip_ratio: Optional[float] = None, + seal_rec_score_thresh: Optional[float] = None, + retriever_config: Optional[Dict[str, Any]] = None, + mllm_chat_bot_config: Optional[Dict[str, Any]] = None, + chat_bot_config: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> None: params = locals().copy() params.pop("self") params.pop("kwargs") @@ -78,56 +83,63 @@ def __init__( super().__init__(**kwargs) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: return "PP-ChatOCRv4-doc" - def save_vector(self, vector_info, save_path, retriever_config=None): + def save_vector( + self, + vector_info: Any, + save_path: str, + retriever_config: Optional[Dict[str, Any]] = None, + ) -> Any: return self.paddlex_pipeline.save_vector( vector_info=vector_info, save_path=save_path, retriever_config=retriever_config, ) - def load_vector(self, data_path, retriever_config=None): + def load_vector( + self, data_path: str, retriever_config: Optional[Dict[str, Any]] = None + ) -> Any: return self.paddlex_pipeline.load_vector( data_path=data_path, retriever_config=retriever_config ) - def load_visual_info_list(self, data_path): + def load_visual_info_list(self, data_path: str) -> Any: return self.paddlex_pipeline.load_visual_info_list(data_path=data_path) - def save_visual_info_list(self, visual_info, save_path): + def save_visual_info_list(self, visual_info: Any, save_path: str) -> Any: return self.paddlex_pipeline.save_visual_info_list( visual_info=visual_info, save_path=save_path ) def visual_predict_iter( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - use_seal_recognition=None, - use_table_recognition=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - **kwargs, - ): + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_textline_orientation: Optional[bool] = None, + use_seal_recognition: Optional[bool] = None, + use_table_recognition: Optional[bool] = None, + layout_threshold: Optional[Union[float, dict]] = None, + layout_nms: Optional[bool] = None, + layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None, + layout_merge_bboxes_mode: Optional[str] = None, + text_det_limit_side_len: Optional[int] = None, + text_det_limit_type: Optional[str] = None, + text_det_thresh: Optional[float] = None, + text_det_box_thresh: Optional[float] = None, + text_det_unclip_ratio: Optional[float] = None, + text_rec_score_thresh: Optional[float] = None, + seal_det_limit_side_len: Optional[int] = None, + seal_det_limit_type: Optional[str] = None, + seal_det_thresh: Optional[float] = None, + seal_det_box_thresh: Optional[float] = None, + seal_det_unclip_ratio: Optional[float] = None, + seal_rec_score_thresh: Optional[float] = None, + **kwargs: Any, + ) -> Iterator[PredictResult]: return self.paddlex_pipeline.visual_predict( input, use_doc_orientation_classify=use_doc_orientation_classify, @@ -156,31 +168,31 @@ def visual_predict_iter( def visual_predict( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - use_seal_recognition=None, - use_table_recognition=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - **kwargs, - ): + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_textline_orientation: Optional[bool] = None, + use_seal_recognition: Optional[bool] = None, + use_table_recognition: Optional[bool] = None, + layout_threshold: Optional[Union[float, dict]] = None, + layout_nms: Optional[bool] = None, + layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None, + layout_merge_bboxes_mode: Optional[str] = None, + text_det_limit_side_len: Optional[int] = None, + text_det_limit_type: Optional[str] = None, + text_det_thresh: Optional[float] = None, + text_det_box_thresh: Optional[float] = None, + text_det_unclip_ratio: Optional[float] = None, + text_rec_score_thresh: Optional[float] = None, + seal_det_limit_side_len: Optional[int] = None, + seal_det_limit_type: Optional[str] = None, + seal_det_thresh: Optional[float] = None, + seal_det_box_thresh: Optional[float] = None, + seal_det_unclip_ratio: Optional[float] = None, + seal_rec_score_thresh: Optional[float] = None, + **kwargs: Any, + ) -> List[PredictResult]: return list( self.visual_predict_iter( input, @@ -211,13 +223,13 @@ def visual_predict( def build_vector( self, - visual_info, + visual_info: Any, *, - min_characters=3500, - block_size=300, - flag_save_bytes_vector=False, - retriever_config=None, - ): + min_characters: int = 3500, + block_size: int = 300, + flag_save_bytes_vector: bool = False, + retriever_config: Optional[Dict[str, Any]] = None, + ) -> Any: return self.paddlex_pipeline.build_vector( visual_info, min_characters=min_characters, @@ -226,7 +238,13 @@ def build_vector( retriever_config=retriever_config, ) - def mllm_pred(self, input, key_list, *, mllm_chat_bot_config=None): + def mllm_pred( + self, + input: InputType, + key_list: List[str], + *, + mllm_chat_bot_config: Optional[Dict[str, Any]] = None, + ) -> Any: return self.paddlex_pipeline.mllm_pred( input, key_list, @@ -235,27 +253,27 @@ def mllm_pred(self, input, key_list, *, mllm_chat_bot_config=None): def chat( self, - key_list, - visual_info, + key_list: List[str], + visual_info: Any, *, - use_vector_retrieval=True, - vector_info=None, - min_characters=3500, - text_task_description=None, - text_output_format=None, - text_rules_str=None, - text_few_shot_demo_text_content=None, - text_few_shot_demo_key_value_list=None, - table_task_description=None, - table_output_format=None, - table_rules_str=None, - table_few_shot_demo_text_content=None, - table_few_shot_demo_key_value_list=None, - mllm_predict_info=None, - mllm_integration_strategy="integration", - chat_bot_config=None, - retriever_config=None, - ): + use_vector_retrieval: bool = True, + vector_info: Any = None, + min_characters: int = 3500, + text_task_description: Optional[str] = None, + text_output_format: Optional[str] = None, + text_rules_str: Optional[str] = None, + text_few_shot_demo_text_content: Optional[str] = None, + text_few_shot_demo_key_value_list: Optional[str] = None, + table_task_description: Optional[str] = None, + table_output_format: Optional[str] = None, + table_rules_str: Optional[str] = None, + table_few_shot_demo_text_content: Optional[str] = None, + table_few_shot_demo_key_value_list: Optional[str] = None, + mllm_predict_info: Any = None, + mllm_integration_strategy: str = "integration", + chat_bot_config: Optional[Dict[str, Any]] = None, + retriever_config: Optional[Dict[str, Any]] = None, + ) -> Any: return self.paddlex_pipeline.chat( key_list, visual_info, @@ -279,10 +297,10 @@ def chat( ) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return PPChatOCRv4DocCLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> Dict[str, Any]: STRUCTURE = { "SubPipelines.LayoutParser.SubModules.LayoutDetection.model_name": self._params[ "layout_detection_model_name" @@ -423,10 +441,10 @@ def _get_paddlex_config_overrides(self): class PPChatOCRv4DocCLISubcommandExecutor(PipelineCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "pp_chatocrv4_doc" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: subparser.add_argument( "-i", "--input", @@ -681,7 +699,7 @@ def _update_subparser(self, subparser): help="Configuration for the multimodal large language model.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) input = params.pop("input") keys = params.pop("keys") diff --git a/paddleocr/_pipelines/pp_doctranslation.py b/paddleocr/_pipelines/pp_doctranslation.py index 22b907c2e8b..bd420d3a0f7 100644 --- a/paddleocr/_pipelines/pp_doctranslation.py +++ b/paddleocr/_pipelines/pp_doctranslation.py @@ -12,6 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse +from typing import Any, Dict, Iterator, List, Optional, Tuple, Union + +from .._abstract import CLISubcommandExecutor +from .._types import InputType, PredictResult from .._utils.cli import ( get_subcommand_args, str2bool, @@ -24,72 +29,72 @@ class PPDocTranslation(PaddleXPipelineWrapper): def __init__( self, - layout_detection_model_name=None, - layout_detection_model_dir=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - chart_recognition_model_name=None, - chart_recognition_model_dir=None, - chart_recognition_batch_size=None, - region_detection_model_name=None, - region_detection_model_dir=None, - doc_orientation_classify_model_name=None, - doc_orientation_classify_model_dir=None, - doc_unwarping_model_name=None, - doc_unwarping_model_dir=None, - text_detection_model_name=None, - text_detection_model_dir=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - textline_orientation_model_name=None, - textline_orientation_model_dir=None, - textline_orientation_batch_size=None, - text_recognition_model_name=None, - text_recognition_model_dir=None, - text_recognition_batch_size=None, - text_rec_score_thresh=None, - table_classification_model_name=None, - table_classification_model_dir=None, - wired_table_structure_recognition_model_name=None, - wired_table_structure_recognition_model_dir=None, - wireless_table_structure_recognition_model_name=None, - wireless_table_structure_recognition_model_dir=None, - wired_table_cells_detection_model_name=None, - wired_table_cells_detection_model_dir=None, - wireless_table_cells_detection_model_name=None, - wireless_table_cells_detection_model_dir=None, - table_orientation_classify_model_name=None, - table_orientation_classify_model_dir=None, - seal_text_detection_model_name=None, - seal_text_detection_model_dir=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_text_recognition_model_name=None, - seal_text_recognition_model_dir=None, - seal_text_recognition_batch_size=None, - seal_rec_score_thresh=None, - formula_recognition_model_name=None, - formula_recognition_model_dir=None, - formula_recognition_batch_size=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - use_seal_recognition=None, - use_table_recognition=None, - use_formula_recognition=None, - use_chart_recognition=None, - use_region_detection=None, - chat_bot_config=None, - **kwargs, - ): + layout_detection_model_name: Optional[str] = None, + layout_detection_model_dir: Optional[str] = None, + layout_threshold: Optional[Union[float, dict]] = None, + layout_nms: Optional[bool] = None, + layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None, + layout_merge_bboxes_mode: Optional[str] = None, + chart_recognition_model_name: Optional[str] = None, + chart_recognition_model_dir: Optional[str] = None, + chart_recognition_batch_size: Optional[int] = None, + region_detection_model_name: Optional[str] = None, + region_detection_model_dir: Optional[str] = None, + doc_orientation_classify_model_name: Optional[str] = None, + doc_orientation_classify_model_dir: Optional[str] = None, + doc_unwarping_model_name: Optional[str] = None, + doc_unwarping_model_dir: Optional[str] = None, + text_detection_model_name: Optional[str] = None, + text_detection_model_dir: Optional[str] = None, + text_det_limit_side_len: Optional[int] = None, + text_det_limit_type: Optional[str] = None, + text_det_thresh: Optional[float] = None, + text_det_box_thresh: Optional[float] = None, + text_det_unclip_ratio: Optional[float] = None, + textline_orientation_model_name: Optional[str] = None, + textline_orientation_model_dir: Optional[str] = None, + textline_orientation_batch_size: Optional[int] = None, + text_recognition_model_name: Optional[str] = None, + text_recognition_model_dir: Optional[str] = None, + text_recognition_batch_size: Optional[int] = None, + text_rec_score_thresh: Optional[float] = None, + table_classification_model_name: Optional[str] = None, + table_classification_model_dir: Optional[str] = None, + wired_table_structure_recognition_model_name: Optional[str] = None, + wired_table_structure_recognition_model_dir: Optional[str] = None, + wireless_table_structure_recognition_model_name: Optional[str] = None, + wireless_table_structure_recognition_model_dir: Optional[str] = None, + wired_table_cells_detection_model_name: Optional[str] = None, + wired_table_cells_detection_model_dir: Optional[str] = None, + wireless_table_cells_detection_model_name: Optional[str] = None, + wireless_table_cells_detection_model_dir: Optional[str] = None, + table_orientation_classify_model_name: Optional[str] = None, + table_orientation_classify_model_dir: Optional[str] = None, + seal_text_detection_model_name: Optional[str] = None, + seal_text_detection_model_dir: Optional[str] = None, + seal_det_limit_side_len: Optional[int] = None, + seal_det_limit_type: Optional[str] = None, + seal_det_thresh: Optional[float] = None, + seal_det_box_thresh: Optional[float] = None, + seal_det_unclip_ratio: Optional[float] = None, + seal_text_recognition_model_name: Optional[str] = None, + seal_text_recognition_model_dir: Optional[str] = None, + seal_text_recognition_batch_size: Optional[int] = None, + seal_rec_score_thresh: Optional[float] = None, + formula_recognition_model_name: Optional[str] = None, + formula_recognition_model_dir: Optional[str] = None, + formula_recognition_batch_size: Optional[int] = None, + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_textline_orientation: Optional[bool] = None, + use_seal_recognition: Optional[bool] = None, + use_table_recognition: Optional[bool] = None, + use_formula_recognition: Optional[bool] = None, + use_chart_recognition: Optional[bool] = None, + use_region_detection: Optional[bool] = None, + chat_bot_config: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> None: params = locals().copy() params.pop("self") params.pop("kwargs") @@ -98,45 +103,45 @@ def __init__( super().__init__(**kwargs) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: return "PP-DocTranslation" def visual_predict_iter( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - use_seal_recognition=None, - use_table_recognition=None, - use_formula_recognition=None, - use_chart_recognition=None, - use_region_detection=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - use_wired_table_cells_trans_to_html=False, - use_wireless_table_cells_trans_to_html=False, - use_table_orientation_classify=True, - use_ocr_results_with_table_cells=True, - use_e2e_wired_table_rec_model=False, - use_e2e_wireless_table_rec_model=True, - **kwargs, - ): + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_textline_orientation: Optional[bool] = None, + use_seal_recognition: Optional[bool] = None, + use_table_recognition: Optional[bool] = None, + use_formula_recognition: Optional[bool] = None, + use_chart_recognition: Optional[bool] = None, + use_region_detection: Optional[bool] = None, + layout_threshold: Optional[Union[float, dict]] = None, + layout_nms: Optional[bool] = None, + layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None, + layout_merge_bboxes_mode: Optional[str] = None, + text_det_limit_side_len: Optional[int] = None, + text_det_limit_type: Optional[str] = None, + text_det_thresh: Optional[float] = None, + text_det_box_thresh: Optional[float] = None, + text_det_unclip_ratio: Optional[float] = None, + text_rec_score_thresh: Optional[float] = None, + seal_det_limit_side_len: Optional[int] = None, + seal_det_limit_type: Optional[str] = None, + seal_det_thresh: Optional[float] = None, + seal_det_box_thresh: Optional[float] = None, + seal_det_unclip_ratio: Optional[float] = None, + seal_rec_score_thresh: Optional[float] = None, + use_wired_table_cells_trans_to_html: bool = False, + use_wireless_table_cells_trans_to_html: bool = False, + use_table_orientation_classify: bool = True, + use_ocr_results_with_table_cells: bool = True, + use_e2e_wired_table_rec_model: bool = False, + use_e2e_wireless_table_rec_model: bool = True, + **kwargs: Any, + ) -> Iterator[PredictResult]: return self.paddlex_pipeline.visual_predict( input, use_doc_orientation_classify=use_doc_orientation_classify, @@ -174,40 +179,40 @@ def visual_predict_iter( def visual_predict( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - use_seal_recognition=None, - use_table_recognition=None, - use_formula_recognition=None, - use_chart_recognition=None, - use_region_detection=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - use_wired_table_cells_trans_to_html=False, - use_wireless_table_cells_trans_to_html=False, - use_table_orientation_classify=True, - use_ocr_results_with_table_cells=True, - use_e2e_wired_table_rec_model=False, - use_e2e_wireless_table_rec_model=True, - **kwargs, - ): + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_textline_orientation: Optional[bool] = None, + use_seal_recognition: Optional[bool] = None, + use_table_recognition: Optional[bool] = None, + use_formula_recognition: Optional[bool] = None, + use_chart_recognition: Optional[bool] = None, + use_region_detection: Optional[bool] = None, + layout_threshold: Optional[Union[float, dict]] = None, + layout_nms: Optional[bool] = None, + layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None, + layout_merge_bboxes_mode: Optional[str] = None, + text_det_limit_side_len: Optional[int] = None, + text_det_limit_type: Optional[str] = None, + text_det_thresh: Optional[float] = None, + text_det_box_thresh: Optional[float] = None, + text_det_unclip_ratio: Optional[float] = None, + text_rec_score_thresh: Optional[float] = None, + seal_det_limit_side_len: Optional[int] = None, + seal_det_limit_type: Optional[str] = None, + seal_det_thresh: Optional[float] = None, + seal_det_box_thresh: Optional[float] = None, + seal_det_unclip_ratio: Optional[float] = None, + seal_rec_score_thresh: Optional[float] = None, + use_wired_table_cells_trans_to_html: bool = False, + use_wireless_table_cells_trans_to_html: bool = False, + use_table_orientation_classify: bool = True, + use_ocr_results_with_table_cells: bool = True, + use_e2e_wired_table_rec_model: bool = False, + use_e2e_wireless_table_rec_model: bool = True, + **kwargs: Any, + ) -> List[PredictResult]: return list( self.visual_predict_iter( input, @@ -247,20 +252,20 @@ def visual_predict( def translate_iter( self, - ori_md_info_list, + ori_md_info_list: List[Any], *, - target_language="zh", - chunk_size=5000, - task_description=None, - output_format=None, - rules_str=None, - few_shot_demo_text_content=None, - few_shot_demo_key_value_list=None, - glossary=None, - llm_request_interval=0.0, - chat_bot_config=None, - **kwargs, - ): + target_language: str = "zh", + chunk_size: int = 5000, + task_description: Optional[str] = None, + output_format: Optional[str] = None, + rules_str: Optional[str] = None, + few_shot_demo_text_content: Optional[str] = None, + few_shot_demo_key_value_list: Optional[List[Any]] = None, + glossary: Optional[str] = None, + llm_request_interval: float = 0.0, + chat_bot_config: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> Iterator[Any]: return self.paddlex_pipeline.translate( ori_md_info_list, target_language=target_language, @@ -278,20 +283,20 @@ def translate_iter( def translate( self, - ori_md_info_list, + ori_md_info_list: List[Any], *, - target_language="zh", - chunk_size=5000, - task_description=None, - output_format=None, - rules_str=None, - few_shot_demo_text_content=None, - few_shot_demo_key_value_list=None, - glossary=None, - llm_request_interval=0.0, - chat_bot_config=None, - **kwargs, - ): + target_language: str = "zh", + chunk_size: int = 5000, + task_description: Optional[str] = None, + output_format: Optional[str] = None, + rules_str: Optional[str] = None, + few_shot_demo_text_content: Optional[str] = None, + few_shot_demo_key_value_list: Optional[List[Any]] = None, + glossary: Optional[str] = None, + llm_request_interval: float = 0.0, + chat_bot_config: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Any]: return list( self.translate_iter( ori_md_info_list, @@ -309,17 +314,17 @@ def translate( ) ) - def load_from_markdown(self, input): + def load_from_markdown(self, input: InputType) -> Any: return self.paddlex_pipeline.load_from_markdown(input) - def concatenate_markdown_pages(self, markdown_list): + def concatenate_markdown_pages(self, markdown_list: List[str]) -> str: return self.paddlex_pipeline.concatenate_markdown_pages(markdown_list) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return PPDocTranslationCLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> Dict[str, Any]: # HACK: We should consider reducing duplication. STRUCTURE = { "SubPipelines.LayoutParser.SubPipelines.DocPreprocessor.use_doc_orientation_classify": self._params[ @@ -561,10 +566,10 @@ def _get_paddlex_config_overrides(self): class PPDocTranslationCLISubcommandExecutor(PipelineCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "pp_doctranslation" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: subparser.add_argument( "-i", "--input", @@ -907,7 +912,7 @@ def _update_subparser(self, subparser): help="Configuration for the embedding model.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) input = params.pop("input") target_language = params.pop("target_language") @@ -939,7 +944,7 @@ def execute_with_args(self, args): target_language=target_language, ) - for res in result_translate: - res.print() + for trans_res in result_translate: + trans_res.print() if save_path: - res.save_to_markdown(save_path) + trans_res.save_to_markdown(save_path) diff --git a/paddleocr/_pipelines/pp_structurev3.py b/paddleocr/_pipelines/pp_structurev3.py index eefe7d1c851..0ba13c903c5 100644 --- a/paddleocr/_pipelines/pp_structurev3.py +++ b/paddleocr/_pipelines/pp_structurev3.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse import warnings +from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Union + +from .._abstract import CLISubcommandExecutor +from .._types import InputType, PredictResult from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -31,75 +36,75 @@ class PPStructureV3(PaddleXPipelineWrapper): def __init__( self, - layout_detection_model_name=None, - layout_detection_model_dir=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - chart_recognition_model_name=None, - chart_recognition_model_dir=None, - chart_recognition_batch_size=None, - region_detection_model_name=None, - region_detection_model_dir=None, - doc_orientation_classify_model_name=None, - doc_orientation_classify_model_dir=None, - doc_unwarping_model_name=None, - doc_unwarping_model_dir=None, - text_detection_model_name=None, - text_detection_model_dir=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - textline_orientation_model_name=None, - textline_orientation_model_dir=None, - textline_orientation_batch_size=None, - text_recognition_model_name=None, - text_recognition_model_dir=None, - text_recognition_batch_size=None, - text_rec_score_thresh=None, - table_classification_model_name=None, - table_classification_model_dir=None, - wired_table_structure_recognition_model_name=None, - wired_table_structure_recognition_model_dir=None, - wireless_table_structure_recognition_model_name=None, - wireless_table_structure_recognition_model_dir=None, - wired_table_cells_detection_model_name=None, - wired_table_cells_detection_model_dir=None, - wireless_table_cells_detection_model_name=None, - wireless_table_cells_detection_model_dir=None, - table_orientation_classify_model_name=None, - table_orientation_classify_model_dir=None, - seal_text_detection_model_name=None, - seal_text_detection_model_dir=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_text_recognition_model_name=None, - seal_text_recognition_model_dir=None, - seal_text_recognition_batch_size=None, - seal_rec_score_thresh=None, - formula_recognition_model_name=None, - formula_recognition_model_dir=None, - formula_recognition_batch_size=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - use_seal_recognition=None, - use_table_recognition=None, - use_formula_recognition=None, - use_chart_recognition=None, - use_region_detection=None, - format_block_content=None, - markdown_ignore_labels=None, - lang=None, - ocr_version=None, - **kwargs, - ): + layout_detection_model_name: Optional[str] = None, + layout_detection_model_dir: Optional[str] = None, + layout_threshold: Optional[Union[float, dict]] = None, + layout_nms: Optional[bool] = None, + layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None, + layout_merge_bboxes_mode: Optional[str] = None, + chart_recognition_model_name: Optional[str] = None, + chart_recognition_model_dir: Optional[str] = None, + chart_recognition_batch_size: Optional[int] = None, + region_detection_model_name: Optional[str] = None, + region_detection_model_dir: Optional[str] = None, + doc_orientation_classify_model_name: Optional[str] = None, + doc_orientation_classify_model_dir: Optional[str] = None, + doc_unwarping_model_name: Optional[str] = None, + doc_unwarping_model_dir: Optional[str] = None, + text_detection_model_name: Optional[str] = None, + text_detection_model_dir: Optional[str] = None, + text_det_limit_side_len: Optional[int] = None, + text_det_limit_type: Optional[str] = None, + text_det_thresh: Optional[float] = None, + text_det_box_thresh: Optional[float] = None, + text_det_unclip_ratio: Optional[float] = None, + textline_orientation_model_name: Optional[str] = None, + textline_orientation_model_dir: Optional[str] = None, + textline_orientation_batch_size: Optional[int] = None, + text_recognition_model_name: Optional[str] = None, + text_recognition_model_dir: Optional[str] = None, + text_recognition_batch_size: Optional[int] = None, + text_rec_score_thresh: Optional[float] = None, + table_classification_model_name: Optional[str] = None, + table_classification_model_dir: Optional[str] = None, + wired_table_structure_recognition_model_name: Optional[str] = None, + wired_table_structure_recognition_model_dir: Optional[str] = None, + wireless_table_structure_recognition_model_name: Optional[str] = None, + wireless_table_structure_recognition_model_dir: Optional[str] = None, + wired_table_cells_detection_model_name: Optional[str] = None, + wired_table_cells_detection_model_dir: Optional[str] = None, + wireless_table_cells_detection_model_name: Optional[str] = None, + wireless_table_cells_detection_model_dir: Optional[str] = None, + table_orientation_classify_model_name: Optional[str] = None, + table_orientation_classify_model_dir: Optional[str] = None, + seal_text_detection_model_name: Optional[str] = None, + seal_text_detection_model_dir: Optional[str] = None, + seal_det_limit_side_len: Optional[int] = None, + seal_det_limit_type: Optional[str] = None, + seal_det_thresh: Optional[float] = None, + seal_det_box_thresh: Optional[float] = None, + seal_det_unclip_ratio: Optional[float] = None, + seal_text_recognition_model_name: Optional[str] = None, + seal_text_recognition_model_dir: Optional[str] = None, + seal_text_recognition_batch_size: Optional[int] = None, + seal_rec_score_thresh: Optional[float] = None, + formula_recognition_model_name: Optional[str] = None, + formula_recognition_model_dir: Optional[str] = None, + formula_recognition_batch_size: Optional[int] = None, + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_textline_orientation: Optional[bool] = None, + use_seal_recognition: Optional[bool] = None, + use_table_recognition: Optional[bool] = None, + use_formula_recognition: Optional[bool] = None, + use_chart_recognition: Optional[bool] = None, + use_region_detection: Optional[bool] = None, + format_block_content: Optional[bool] = None, + markdown_ignore_labels: Optional[List[str]] = None, + lang: Optional[str] = None, + ocr_version: Optional[str] = None, + **kwargs: Any, + ) -> None: if ocr_version is not None and ocr_version not in _SUPPORTED_OCR_VERSIONS: raise ValueError( f"Invalid OCR version: {ocr_version}. Supported values are {_SUPPORTED_OCR_VERSIONS}." @@ -142,47 +147,47 @@ def __init__( super().__init__(**kwargs) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: return "PP-StructureV3" def predict_iter( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - use_seal_recognition=None, - use_table_recognition=None, - use_formula_recognition=None, - use_chart_recognition=None, - use_region_detection=None, - format_block_content=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - use_wired_table_cells_trans_to_html=False, - use_wireless_table_cells_trans_to_html=False, - use_table_orientation_classify=True, - use_ocr_results_with_table_cells=True, - use_e2e_wired_table_rec_model=False, - use_e2e_wireless_table_rec_model=True, - markdown_ignore_labels=None, - **kwargs, - ): + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_textline_orientation: Optional[bool] = None, + use_seal_recognition: Optional[bool] = None, + use_table_recognition: Optional[bool] = None, + use_formula_recognition: Optional[bool] = None, + use_chart_recognition: Optional[bool] = None, + use_region_detection: Optional[bool] = None, + format_block_content: Optional[bool] = None, + layout_threshold: Optional[Union[float, dict]] = None, + layout_nms: Optional[bool] = None, + layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None, + layout_merge_bboxes_mode: Optional[str] = None, + text_det_limit_side_len: Optional[int] = None, + text_det_limit_type: Optional[str] = None, + text_det_thresh: Optional[float] = None, + text_det_box_thresh: Optional[float] = None, + text_det_unclip_ratio: Optional[float] = None, + text_rec_score_thresh: Optional[float] = None, + seal_det_limit_side_len: Optional[int] = None, + seal_det_limit_type: Optional[str] = None, + seal_det_thresh: Optional[float] = None, + seal_det_box_thresh: Optional[float] = None, + seal_det_unclip_ratio: Optional[float] = None, + seal_rec_score_thresh: Optional[float] = None, + use_wired_table_cells_trans_to_html: bool = False, + use_wireless_table_cells_trans_to_html: bool = False, + use_table_orientation_classify: bool = True, + use_ocr_results_with_table_cells: bool = True, + use_e2e_wired_table_rec_model: bool = False, + use_e2e_wireless_table_rec_model: bool = True, + markdown_ignore_labels: Optional[List[str]] = None, + **kwargs: Any, + ) -> Iterator[PredictResult]: return self.paddlex_pipeline.predict( input, use_doc_orientation_classify=use_doc_orientation_classify, @@ -222,42 +227,42 @@ def predict_iter( def predict( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_textline_orientation=None, - use_seal_recognition=None, - use_table_recognition=None, - use_formula_recognition=None, - use_chart_recognition=None, - use_region_detection=None, - format_block_content=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - use_wired_table_cells_trans_to_html=False, - use_wireless_table_cells_trans_to_html=False, - use_table_orientation_classify=True, - use_ocr_results_with_table_cells=True, - use_e2e_wired_table_rec_model=False, - use_e2e_wireless_table_rec_model=True, - markdown_ignore_labels=None, - **kwargs, - ): + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_textline_orientation: Optional[bool] = None, + use_seal_recognition: Optional[bool] = None, + use_table_recognition: Optional[bool] = None, + use_formula_recognition: Optional[bool] = None, + use_chart_recognition: Optional[bool] = None, + use_region_detection: Optional[bool] = None, + format_block_content: Optional[bool] = None, + layout_threshold: Optional[Union[float, dict]] = None, + layout_nms: Optional[bool] = None, + layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None, + layout_merge_bboxes_mode: Optional[str] = None, + text_det_limit_side_len: Optional[int] = None, + text_det_limit_type: Optional[str] = None, + text_det_thresh: Optional[float] = None, + text_det_box_thresh: Optional[float] = None, + text_det_unclip_ratio: Optional[float] = None, + text_rec_score_thresh: Optional[float] = None, + seal_det_limit_side_len: Optional[int] = None, + seal_det_limit_type: Optional[str] = None, + seal_det_thresh: Optional[float] = None, + seal_det_box_thresh: Optional[float] = None, + seal_det_unclip_ratio: Optional[float] = None, + seal_rec_score_thresh: Optional[float] = None, + use_wired_table_cells_trans_to_html: bool = False, + use_wireless_table_cells_trans_to_html: bool = False, + use_table_orientation_classify: bool = True, + use_ocr_results_with_table_cells: bool = True, + use_e2e_wired_table_rec_model: bool = False, + use_e2e_wireless_table_rec_model: bool = True, + markdown_ignore_labels: Optional[List[str]] = None, + **kwargs: Any, + ) -> List[PredictResult]: return list( self.predict_iter( input, @@ -297,14 +302,14 @@ def predict( ) ) - def concatenate_markdown_pages(self, markdown_list): + def concatenate_markdown_pages(self, markdown_list: List[str]) -> str: return self.paddlex_pipeline.concatenate_markdown_pages(markdown_list) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return PPStructureV3CLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> Dict[str, Any]: STRUCTURE = { "SubPipelines.DocPreprocessor.use_doc_orientation_classify": self._params[ "use_doc_orientation_classify" @@ -527,7 +532,9 @@ def _get_paddlex_config_overrides(self): } return create_config_from_structure(STRUCTURE) - def _get_ocr_model_names(self, lang, ppocr_version): + def _get_ocr_model_names( + self, lang: Optional[str], ppocr_version: Optional[str] + ) -> Tuple[Optional[str], Optional[str]]: LATIN_LANGS = [ "af", "az", @@ -692,10 +699,10 @@ def _get_ocr_model_names(self, lang, ppocr_version): class PPStructureV3CLISubcommandExecutor(PipelineCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "pp_structurev3" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( @@ -1024,7 +1031,7 @@ def _update_subparser(self, subparser): help="List of layout labels to ignore in Markdown output.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference( PPStructureV3, diff --git a/paddleocr/_pipelines/seal_recognition.py b/paddleocr/_pipelines/seal_recognition.py index b0185316eb9..8a91c712ae6 100644 --- a/paddleocr/_pipelines/seal_recognition.py +++ b/paddleocr/_pipelines/seal_recognition.py @@ -12,6 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse +from typing import Any, Dict, Iterator, List, Optional, Tuple, Union + +from .._abstract import CLISubcommandExecutor +from .._types import InputType, LayoutDetResult, PredictResult from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -25,32 +30,32 @@ class SealRecognition(PaddleXPipelineWrapper): def __init__( self, - doc_orientation_classify_model_name=None, - doc_orientation_classify_model_dir=None, - doc_unwarping_model_name=None, - doc_unwarping_model_dir=None, - layout_detection_model_name=None, - layout_detection_model_dir=None, - seal_text_detection_model_name=None, - seal_text_detection_model_dir=None, - text_recognition_model_name=None, - text_recognition_model_dir=None, - text_recognition_batch_size=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_layout_detection=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - **kwargs, - ): + doc_orientation_classify_model_name: Optional[str] = None, + doc_orientation_classify_model_dir: Optional[str] = None, + doc_unwarping_model_name: Optional[str] = None, + doc_unwarping_model_dir: Optional[str] = None, + layout_detection_model_name: Optional[str] = None, + layout_detection_model_dir: Optional[str] = None, + seal_text_detection_model_name: Optional[str] = None, + seal_text_detection_model_dir: Optional[str] = None, + text_recognition_model_name: Optional[str] = None, + text_recognition_model_dir: Optional[str] = None, + text_recognition_batch_size: Optional[int] = None, + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_layout_detection: Optional[bool] = None, + layout_threshold: Optional[Union[float, dict]] = None, + layout_nms: Optional[bool] = None, + layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]] = None, + layout_merge_bboxes_mode: Optional[str] = None, + seal_det_limit_side_len: Optional[int] = None, + seal_det_limit_type: Optional[str] = None, + seal_det_thresh: Optional[float] = None, + seal_det_box_thresh: Optional[float] = None, + seal_det_unclip_ratio: Optional[float] = None, + seal_rec_score_thresh: Optional[float] = None, + **kwargs: Any, + ) -> None: self._params = { "doc_orientation_classify_model_name": doc_orientation_classify_model_name, @@ -81,29 +86,29 @@ def __init__( super().__init__(**kwargs) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: return "seal_recognition" def predict_iter( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_layout_detection=None, - layout_det_res=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - **kwargs, - ): + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_layout_detection: Optional[bool] = None, + layout_det_res: LayoutDetResult = None, + layout_threshold: Optional[Union[float, dict]] = None, + layout_nms: Optional[bool] = None, + layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]] = None, + layout_merge_bboxes_mode: Optional[str] = None, + seal_det_limit_side_len: Optional[int] = None, + seal_det_limit_type: Optional[str] = None, + seal_det_thresh: Optional[float] = None, + seal_det_box_thresh: Optional[float] = None, + seal_det_unclip_ratio: Optional[float] = None, + seal_rec_score_thresh: Optional[float] = None, + **kwargs: Any, + ) -> Iterator[PredictResult]: return self.paddlex_pipeline.predict( input, use_doc_orientation_classify=use_doc_orientation_classify, @@ -125,24 +130,24 @@ def predict_iter( def predict( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_layout_detection=None, - layout_det_res=None, - layout_threshold=None, - layout_nms=None, - layout_unclip_ratio=None, - layout_merge_bboxes_mode=None, - seal_det_limit_side_len=None, - seal_det_limit_type=None, - seal_det_thresh=None, - seal_det_box_thresh=None, - seal_det_unclip_ratio=None, - seal_rec_score_thresh=None, - **kwargs, - ): + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_layout_detection: Optional[bool] = None, + layout_det_res: LayoutDetResult = None, + layout_threshold: Optional[Union[float, dict]] = None, + layout_nms: Optional[bool] = None, + layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]] = None, + layout_merge_bboxes_mode: Optional[str] = None, + seal_det_limit_side_len: Optional[int] = None, + seal_det_limit_type: Optional[str] = None, + seal_det_thresh: Optional[float] = None, + seal_det_box_thresh: Optional[float] = None, + seal_det_unclip_ratio: Optional[float] = None, + seal_rec_score_thresh: Optional[float] = None, + **kwargs: Any, + ) -> List[PredictResult]: return list( self.predict_iter( input, @@ -165,10 +170,10 @@ def predict( ) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return SealRecognitionCLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> Dict[str, Any]: STRUCTURE = { "SubPipelines.DocPreprocessor.SubModules.DocOrientationClassify.model_name": self._params[ "doc_orientation_classify_model_name" @@ -244,10 +249,10 @@ def _get_paddlex_config_overrides(self): class SealRecognitionCLISubcommandExecutor(PipelineCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "seal_recognition" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( @@ -371,7 +376,7 @@ def _update_subparser(self, subparser): help="Text recognition threshold. Text results with scores greater than this threshold are retained.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(SealRecognition, params) diff --git a/paddleocr/_pipelines/table_recognition_v2.py b/paddleocr/_pipelines/table_recognition_v2.py index 5cf48682256..803f0112d65 100644 --- a/paddleocr/_pipelines/table_recognition_v2.py +++ b/paddleocr/_pipelines/table_recognition_v2.py @@ -12,6 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse +from typing import Any, Dict, Iterator, List, Optional + +from .._abstract import CLISubcommandExecutor +from .._types import InputType, LayoutDetResult, PredictResult from .._utils.cli import ( add_simple_inference_args, get_subcommand_args, @@ -25,39 +30,39 @@ class TableRecognitionPipelineV2(PaddleXPipelineWrapper): def __init__( self, - layout_detection_model_name=None, - layout_detection_model_dir=None, - table_classification_model_name=None, - table_classification_model_dir=None, - wired_table_structure_recognition_model_name=None, - wired_table_structure_recognition_model_dir=None, - wireless_table_structure_recognition_model_name=None, - wireless_table_structure_recognition_model_dir=None, - wired_table_cells_detection_model_name=None, - wired_table_cells_detection_model_dir=None, - wireless_table_cells_detection_model_name=None, - wireless_table_cells_detection_model_dir=None, - doc_orientation_classify_model_name=None, - doc_orientation_classify_model_dir=None, - doc_unwarping_model_name=None, - doc_unwarping_model_dir=None, - text_detection_model_name=None, - text_detection_model_dir=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_recognition_model_name=None, - text_recognition_model_dir=None, - text_recognition_batch_size=None, - text_rec_score_thresh=None, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_layout_detection=None, - use_ocr_model=None, - **kwargs, - ): + layout_detection_model_name: Optional[str] = None, + layout_detection_model_dir: Optional[str] = None, + table_classification_model_name: Optional[str] = None, + table_classification_model_dir: Optional[str] = None, + wired_table_structure_recognition_model_name: Optional[str] = None, + wired_table_structure_recognition_model_dir: Optional[str] = None, + wireless_table_structure_recognition_model_name: Optional[str] = None, + wireless_table_structure_recognition_model_dir: Optional[str] = None, + wired_table_cells_detection_model_name: Optional[str] = None, + wired_table_cells_detection_model_dir: Optional[str] = None, + wireless_table_cells_detection_model_name: Optional[str] = None, + wireless_table_cells_detection_model_dir: Optional[str] = None, + doc_orientation_classify_model_name: Optional[str] = None, + doc_orientation_classify_model_dir: Optional[str] = None, + doc_unwarping_model_name: Optional[str] = None, + doc_unwarping_model_dir: Optional[str] = None, + text_detection_model_name: Optional[str] = None, + text_detection_model_dir: Optional[str] = None, + text_det_limit_side_len: Optional[int] = None, + text_det_limit_type: Optional[str] = None, + text_det_thresh: Optional[float] = None, + text_det_box_thresh: Optional[float] = None, + text_det_unclip_ratio: Optional[float] = None, + text_recognition_model_name: Optional[str] = None, + text_recognition_model_dir: Optional[str] = None, + text_recognition_batch_size: Optional[int] = None, + text_rec_score_thresh: Optional[float] = None, + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_layout_detection: Optional[bool] = None, + use_ocr_model: Optional[bool] = None, + **kwargs: Any, + ) -> None: params = locals().copy() params.pop("self") params.pop("kwargs") @@ -66,33 +71,33 @@ def __init__( super().__init__(**kwargs) @property - def _paddlex_pipeline_name(self): + def _paddlex_pipeline_name(self) -> str: return "table_recognition_v2" def predict_iter( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_layout_detection=None, - use_ocr_model=None, - overall_ocr_res=None, - layout_det_res=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - use_e2e_wired_table_rec_model=False, - use_e2e_wireless_table_rec_model=False, - use_wired_table_cells_trans_to_html=False, - use_wireless_table_cells_trans_to_html=False, - use_table_orientation_classify=True, - use_ocr_results_with_table_cells=True, - **kwargs, - ): + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_layout_detection: Optional[bool] = None, + use_ocr_model: Optional[bool] = None, + overall_ocr_res: Any = None, + layout_det_res: LayoutDetResult = None, + text_det_limit_side_len: Optional[int] = None, + text_det_limit_type: Optional[str] = None, + text_det_thresh: Optional[float] = None, + text_det_box_thresh: Optional[float] = None, + text_det_unclip_ratio: Optional[float] = None, + text_rec_score_thresh: Optional[float] = None, + use_e2e_wired_table_rec_model: bool = False, + use_e2e_wireless_table_rec_model: bool = False, + use_wired_table_cells_trans_to_html: bool = False, + use_wireless_table_cells_trans_to_html: bool = False, + use_table_orientation_classify: bool = True, + use_ocr_results_with_table_cells: bool = True, + **kwargs: Any, + ) -> Iterator[PredictResult]: return self.paddlex_pipeline.predict( input, use_doc_orientation_classify=use_doc_orientation_classify, @@ -118,28 +123,28 @@ def predict_iter( def predict( self, - input, + input: InputType, *, - use_doc_orientation_classify=None, - use_doc_unwarping=None, - use_layout_detection=None, - use_ocr_model=None, - overall_ocr_res=None, - layout_det_res=None, - text_det_limit_side_len=None, - text_det_limit_type=None, - text_det_thresh=None, - text_det_box_thresh=None, - text_det_unclip_ratio=None, - text_rec_score_thresh=None, - use_e2e_wired_table_rec_model=False, - use_e2e_wireless_table_rec_model=False, - use_wired_table_cells_trans_to_html=False, - use_wireless_table_cells_trans_to_html=False, - use_table_orientation_classify=True, - use_ocr_results_with_table_cells=True, - **kwargs, - ): + use_doc_orientation_classify: Optional[bool] = None, + use_doc_unwarping: Optional[bool] = None, + use_layout_detection: Optional[bool] = None, + use_ocr_model: Optional[bool] = None, + overall_ocr_res: Any = None, + layout_det_res: LayoutDetResult = None, + text_det_limit_side_len: Optional[int] = None, + text_det_limit_type: Optional[str] = None, + text_det_thresh: Optional[float] = None, + text_det_box_thresh: Optional[float] = None, + text_det_unclip_ratio: Optional[float] = None, + text_rec_score_thresh: Optional[float] = None, + use_e2e_wired_table_rec_model: bool = False, + use_e2e_wireless_table_rec_model: bool = False, + use_wired_table_cells_trans_to_html: bool = False, + use_wireless_table_cells_trans_to_html: bool = False, + use_table_orientation_classify: bool = True, + use_ocr_results_with_table_cells: bool = True, + **kwargs: Any, + ) -> List[PredictResult]: return list( self.predict_iter( input, @@ -166,10 +171,10 @@ def predict( ) @classmethod - def get_cli_subcommand_executor(cls): + def get_cli_subcommand_executor(cls) -> CLISubcommandExecutor: return TableRecognitionPipelineV2CLISubcommandExecutor() - def _get_paddlex_config_overrides(self): + def _get_paddlex_config_overrides(self) -> Dict[str, Any]: STRUCTURE = { "SubPipelines.DocPreprocessor.use_doc_orientation_classify": self._params[ "use_doc_orientation_classify" @@ -268,10 +273,10 @@ def _get_paddlex_config_overrides(self): class TableRecognitionPipelineV2CLISubcommandExecutor(PipelineCLISubcommandExecutor): @property - def subparser_name(self): + def subparser_name(self) -> str: return "table_recognition_v2" - def _update_subparser(self, subparser): + def _update_subparser(self, subparser: argparse.ArgumentParser) -> None: add_simple_inference_args(subparser) subparser.add_argument( @@ -433,6 +438,6 @@ def _update_subparser(self, subparser): help="Whether to use OCR models.", ) - def execute_with_args(self, args): + def execute_with_args(self, args: argparse.Namespace) -> None: params = get_subcommand_args(args) perform_simple_inference(TableRecognitionPipelineV2, params) diff --git a/paddleocr/_pipelines/utils.py b/paddleocr/_pipelines/utils.py index 9a7b4bae993..305764845e3 100644 --- a/paddleocr/_pipelines/utils.py +++ b/paddleocr/_pipelines/utils.py @@ -12,8 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any, Dict, Optional -def create_config_from_structure(structure, *, unset=None, config=None): + +def create_config_from_structure( + structure: Dict[str, Any], + *, + unset: Any = None, + config: Optional[Dict[str, Any]] = None, +) -> Dict[str, Any]: if config is None: config = {} for k, v in structure.items(): diff --git a/paddleocr/_types.py b/paddleocr/_types.py new file mode 100644 index 00000000000..9a9e36a7a5d --- /dev/null +++ b/paddleocr/_types.py @@ -0,0 +1,30 @@ +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Union + +import numpy as np + +if TYPE_CHECKING: + from paddlex.inference.models.object_detection.result import DetResult + +# Input types accepted by PaddleX: str (file path, URL) or np.ndarray +ImageInput = Union[str, np.ndarray] +InputType = Union[ImageInput, Sequence[ImageInput]] + +# Prediction result (Phase 3 will refine with TypedDict) +PredictResult = Dict[str, Any] + +# Layout detection result accepted by PaddleX pipelines +LayoutDetResult = Optional[Union["DetResult", List["DetResult"]]] diff --git a/paddleocr/_utils/cli.py b/paddleocr/_utils/cli.py index 6d218756c92..6d0fcca2b43 100644 --- a/paddleocr/_utils/cli.py +++ b/paddleocr/_utils/cli.py @@ -12,23 +12,27 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse import time +from typing import Any, Dict, Optional, Set from .logging import logger -def str2bool(v, /): +def str2bool(v: str, /) -> bool: return v.lower() in ("true", "yes", "t", "y", "1") -def get_subcommand_args(args): - args = vars(args).copy() - args.pop("subcommand") - args.pop("executor") - return args +def get_subcommand_args(args: argparse.Namespace) -> Dict[str, Any]: + args_dict = vars(args).copy() + args_dict.pop("subcommand") + args_dict.pop("executor") + return args_dict -def add_simple_inference_args(subparser, *, input_help=None): +def add_simple_inference_args( + subparser: argparse.ArgumentParser, *, input_help: Optional[str] = None +) -> None: if input_help is None: input_help = "Input path or URL." subparser.add_argument( @@ -45,14 +49,18 @@ def add_simple_inference_args(subparser, *, input_help=None): ) -def perform_simple_inference(wrapper_cls, params, predict_param_names=None): +def perform_simple_inference( + wrapper_cls: type, + params: Dict[str, Any], + predict_param_names: Optional[Set[str]] = None, +) -> None: params = params.copy() input_ = params.pop("input") save_path = params.pop("save_path") if predict_param_names is not None: - predict_params = {} + predict_params: Dict[str, Any] = {} for name in predict_param_names: predict_params[name] = params.pop(name) else: diff --git a/paddleocr/_utils/deprecation.py b/paddleocr/_utils/deprecation.py index b30419db49c..9c59b4ccbd8 100644 --- a/paddleocr/_utils/deprecation.py +++ b/paddleocr/_utils/deprecation.py @@ -15,6 +15,7 @@ import argparse import sys import warnings +from typing import Any, Optional, Sequence, Union from typing_extensions import deprecated as deprecated @@ -24,7 +25,13 @@ class CLIDeprecationWarning(DeprecationWarning): class DeprecatedOptionAction(argparse.Action): - def __call__(self, parser, namespace, values, option_string=None): + def __call__( + self, + parser: argparse.ArgumentParser, + namespace: argparse.Namespace, + values: Optional[Union[str, Sequence[Any]]], + option_string: Optional[str] = None, + ) -> None: assert option_string warnings.warn( f"The option `{option_string}` has been deprecated and will be removed in the future. Please refer to the documentation for more details.", @@ -33,7 +40,7 @@ def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, values) -def warn_deprecated_param(name, new_name=None): +def warn_deprecated_param(name: str, new_name: Optional[str] = None) -> None: msg = ( f"The parameter `{name}` has been deprecated and will be removed in the future." ) diff --git a/paddleocr/_utils/logging.py b/paddleocr/_utils/logging.py index 0ef89395db2..77b48a2eb47 100644 --- a/paddleocr/_utils/logging.py +++ b/paddleocr/_utils/logging.py @@ -16,12 +16,12 @@ from .._env import DISABLE_AUTO_LOGGING_CONFIG -LOGGER_NAME = "paddleocr" +LOGGER_NAME: str = "paddleocr" -logger = logging.getLogger(LOGGER_NAME) +logger: logging.Logger = logging.getLogger(LOGGER_NAME) -def _set_up_logger(): +def _set_up_logger() -> None: if DISABLE_AUTO_LOGGING_CONFIG: return diff --git a/paddleocr/py.typed b/paddleocr/py.typed new file mode 100644 index 00000000000..e69de29bb2d diff --git a/pyproject.toml b/pyproject.toml index 2aa826d67f9..e7f6ef11307 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,3 +73,14 @@ markers = [ "resource_intensive: mark a test as resource intensive" ] addopts = "-m 'not resource_intensive'" + +[tool.mypy] +python_version = "3.9" +packages = ["paddleocr"] +warn_return_any = false +warn_unused_configs = true +disallow_untyped_defs = true + +[[tool.mypy.overrides]] +module = ["paddlex.*", "paddle.*", "yaml.*", "requests.*", "PIL.*"] +ignore_missing_imports = true