Skip to content

Commit 6ec2384

Browse files
committed
feat: add type hints to paddleocr/ public API (Phase 1)
Add type annotations to all public modules in paddleocr/ using Python 3.8 compatible typing (List, Dict, Tuple, Optional, Union). Configure mypy for CI and add py.typed marker.
1 parent f0b39d4 commit 6ec2384

44 files changed

Lines changed: 1288 additions & 1031 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/codestyle.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,8 @@ jobs:
3434
- uses: pre-commit/action@v3.0.1
3535
with:
3636
extra_args: '--all-files'
37+
38+
- name: Type check with mypy
39+
run: |
40+
pip install mypy numpy
41+
mypy paddleocr/

paddleocr/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from ._cli import main
1919

2020

21-
def console_entry() -> int:
21+
def console_entry() -> None:
2222
# See https://docs.python.org/3/library/signal.html#note-on-sigpipe
2323
try:
2424
# Flush output here to force SIGPIPE to be triggered while inside this

paddleocr/_abstract.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,17 @@
1313
# limitations under the License.
1414

1515
import abc
16+
import argparse
17+
from typing import Any
1618

1719

1820
class CLISubcommandExecutor(metaclass=abc.ABCMeta):
1921
@abc.abstractmethod
20-
def add_subparser(self, subparsers):
22+
def add_subparser(
23+
self, subparsers: argparse._SubParsersAction
24+
) -> argparse.ArgumentParser:
2125
raise NotImplementedError
2226

2327
@abc.abstractmethod
24-
def execute_with_args(self, args):
28+
def execute_with_args(self, args: argparse.Namespace) -> None:
2529
raise NotImplementedError

paddleocr/_cli.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import time
2020
import warnings
2121
from threading import Thread
22+
from typing import Any
2223

2324
import requests
2425

@@ -54,7 +55,7 @@
5455
from ._utils.logging import logger
5556

5657

57-
def _register_pipelines(subparsers):
58+
def _register_pipelines(subparsers: argparse._SubParsersAction) -> None:
5859
for cls in [
5960
DocPreprocessor,
6061
DocUnderstanding,
@@ -67,12 +68,12 @@ def _register_pipelines(subparsers):
6768
SealRecognition,
6869
TableRecognitionPipelineV2,
6970
]:
70-
subcommand_executor = cls.get_cli_subcommand_executor()
71+
subcommand_executor = cls.get_cli_subcommand_executor() # type: ignore[attr-defined]
7172
subparser = subcommand_executor.add_subparser(subparsers)
7273
subparser.set_defaults(executor=subcommand_executor.execute_with_args)
7374

7475

75-
def _register_models(subparsers):
76+
def _register_models(subparsers: argparse._SubParsersAction) -> None:
7677
for cls in [
7778
ChartParsing,
7879
DocImgOrientationClassification,
@@ -88,13 +89,13 @@ def _register_models(subparsers):
8889
TextLineOrientationClassification,
8990
TextRecognition,
9091
]:
91-
subcommand_executor = cls.get_cli_subcommand_executor()
92+
subcommand_executor = cls.get_cli_subcommand_executor() # type: ignore[attr-defined]
9293
subparser = subcommand_executor.add_subparser(subparsers)
9394
subparser.set_defaults(executor=subcommand_executor.execute_with_args)
9495

9596

96-
def _register_install_hpi_deps_command(subparsers):
97-
def _install_hpi_deps(args):
97+
def _register_install_hpi_deps_command(subparsers: argparse._SubParsersAction) -> None:
98+
def _install_hpi_deps(args: argparse.Namespace) -> None:
9899
hpip = f"hpi-{args.variant}"
99100
try:
100101
subprocess.check_call(["paddlex", "--install", hpip])
@@ -107,8 +108,10 @@ def _install_hpi_deps(args):
107108
subparser.set_defaults(executor=_install_hpi_deps)
108109

109110

110-
def _register_install_genai_server_deps_command(subparsers):
111-
def _install_genai_server_deps(args):
111+
def _register_install_genai_server_deps_command(
112+
subparsers: argparse._SubParsersAction,
113+
) -> None:
114+
def _install_genai_server_deps(args: argparse.Namespace) -> None:
112115
try:
113116
subprocess.check_call(
114117
["paddlex", "--install", f"genai-{args.variant}-server"]
@@ -123,14 +126,14 @@ def _install_genai_server_deps(args):
123126
subparser.set_defaults(executor=_install_genai_server_deps)
124127

125128

126-
def _register_genai_server_command(subparsers):
129+
def _register_genai_server_command(subparsers: argparse._SubParsersAction) -> None:
127130
# TODO: Register the subparser whether the plugin is installed or not
128131
try:
129132
from paddlex.inference.genai.server import get_arg_parser, run_genai_server
130133
except RuntimeError:
131134
return
132135

133-
def _show_prompt_when_server_is_running(host, port, backend):
136+
def _show_prompt_when_server_is_running(host: str, port: int, backend: str) -> None:
134137
if host == "0.0.0.0":
135138
host = "localhost"
136139
while True:
@@ -147,7 +150,7 @@ def _show_prompt_when_server_is_running(host, port, backend):
147150
2. Make HTTP requests directly, or using the OpenAI client library."""
148151
logger.info(prompt)
149152

150-
def _run_genai_server(args):
153+
def _run_genai_server(args: argparse.Namespace) -> None:
151154
Thread(
152155
target=_show_prompt_when_server_is_running,
153156
args=(args.host, args.port, args.backend),
@@ -165,7 +168,7 @@ def _run_genai_server(args):
165168
subparser.set_defaults(executor=_run_genai_server)
166169

167170

168-
def _get_parser():
171+
def _get_parser() -> argparse.ArgumentParser:
169172
parser = argparse.ArgumentParser(prog="paddleocr")
170173
parser.add_argument(
171174
"-v", "--version", action="version", version=f"%(prog)s {version}"
@@ -179,11 +182,11 @@ def _get_parser():
179182
return parser
180183

181184

182-
def _execute(args):
185+
def _execute(args: argparse.Namespace) -> None:
183186
args.executor(args)
184187

185188

186-
def main():
189+
def main() -> None:
187190
logger.setLevel(logging.INFO)
188191
warnings.filterwarnings("default", category=CLIDeprecationWarning)
189192
parser = _get_parser()

paddleocr/_common_args.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import argparse
16+
from typing import Any, Dict, Optional
17+
1518
from paddlex.inference import PaddlePredictorOption
1619
from paddlex.utils.device import get_default_device, parse_device
1720

@@ -28,7 +31,9 @@
2831
from ._utils.cli import str2bool
2932

3033

31-
def parse_common_args(kwargs, *, default_enable_hpi):
34+
def parse_common_args(
35+
kwargs: Dict[str, Any], *, default_enable_hpi: Optional[bool]
36+
) -> Dict[str, Any]:
3237
default_vals = {
3338
"device": DEFAULT_DEVICE,
3439
"enable_hpi": default_enable_hpi,
@@ -57,13 +62,15 @@ def parse_common_args(kwargs, *, default_enable_hpi):
5762
return kwargs
5863

5964

60-
def prepare_common_init_args(model_name, common_args):
65+
def prepare_common_init_args(
66+
model_name: Optional[str], common_args: Dict[str, Any]
67+
) -> Dict[str, Any]:
6168
device = common_args["device"]
6269
if device is None:
6370
device = get_default_device()
6471
device_type, _ = parse_device(device)
6572

66-
init_kwargs = {}
73+
init_kwargs: Dict[str, Any] = {}
6774
init_kwargs["device"] = device
6875
init_kwargs["use_hpip"] = common_args["enable_hpi"]
6976

@@ -94,7 +101,12 @@ def prepare_common_init_args(model_name, common_args):
94101
return init_kwargs
95102

96103

97-
def add_common_cli_opts(parser, *, default_enable_hpi, allow_multiple_devices):
104+
def add_common_cli_opts(
105+
parser: argparse.ArgumentParser,
106+
*,
107+
default_enable_hpi: Optional[bool],
108+
allow_multiple_devices: bool,
109+
) -> None:
98110
if allow_multiple_devices:
99111
help_ = "Device(s) to use for inference, e.g., `cpu`, `gpu`, `npu`, `gpu:0`, `gpu:0,1`. If multiple devices are specified, inference will be performed in parallel. Note that parallel inference is not always supported. By default, GPU 0 will be used if available; otherwise, the CPU will be used."
100112
else:

paddleocr/_constants.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from typing import List, Optional
2+
13
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
24
#
35
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -12,11 +14,11 @@
1214
# See the License for the specific language governing permissions and
1315
# limitations under the License.
1416

15-
DEFAULT_DEVICE = None
16-
DEFAULT_USE_TENSORRT = False
17-
DEFAULT_PRECISION = "fp32"
18-
DEFAULT_ENABLE_MKLDNN = True
19-
DEFAULT_MKLDNN_CACHE_CAPACITY = 10
20-
DEFAULT_CPU_THREADS = 10
21-
SUPPORTED_PRECISION_LIST = ["fp32", "fp16"]
22-
DEFAULT_USE_CINN = False
17+
DEFAULT_DEVICE: Optional[str] = None
18+
DEFAULT_USE_TENSORRT: bool = False
19+
DEFAULT_PRECISION: str = "fp32"
20+
DEFAULT_ENABLE_MKLDNN: bool = True
21+
DEFAULT_MKLDNN_CACHE_CAPACITY: int = 10
22+
DEFAULT_CPU_THREADS: int = 10
23+
SUPPORTED_PRECISION_LIST: List[str] = ["fp32", "fp16"]
24+
DEFAULT_USE_CINN: bool = False

paddleocr/_env.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,6 @@
1414

1515
import os
1616

17-
DISABLE_AUTO_LOGGING_CONFIG = (
17+
DISABLE_AUTO_LOGGING_CONFIG: bool = (
1818
os.getenv("PADDLEOCR_DISABLE_AUTO_LOGGING_CONFIG", "0") == "1"
1919
)

paddleocr/_models/_doc_vlm.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
# limitations under the License.
1414

1515
import abc
16+
import argparse
17+
from typing import Any, Dict, Type
1618

1719
from .._utils.cli import (
1820
get_subcommand_args,
@@ -25,13 +27,13 @@
2527
class BaseDocVLM(PaddleXPredictorWrapper):
2628
def __init__(
2729
self,
28-
*args,
29-
**kwargs,
30-
):
31-
self._extra_init_args = {}
30+
*args: Any,
31+
**kwargs: Any,
32+
) -> None:
33+
self._extra_init_args: Dict[str, Any] = {}
3234
super().__init__(*args, **kwargs)
3335

34-
def _get_extra_paddlex_predictor_init_args(self):
36+
def _get_extra_paddlex_predictor_init_args(self) -> Dict[str, Any]:
3537
return self._extra_init_args
3638

3739

@@ -40,10 +42,10 @@ class BaseDocVLMSubcommandExecutor(PredictorCLISubcommandExecutor):
4042

4143
@property
4244
@abc.abstractmethod
43-
def wrapper_cls(self):
45+
def wrapper_cls(self) -> Type[PaddleXPredictorWrapper]:
4446
raise NotImplementedError
4547

46-
def execute_with_args(self, args):
48+
def execute_with_args(self, args: argparse.Namespace) -> None:
4749
params = get_subcommand_args(args)
4850
params["input"] = self.input_validator(params["input"])
4951
perform_simple_inference(self.wrapper_cls, params)

paddleocr/_models/_image_classification.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
# limitations under the License.
1414

1515
import abc
16+
import argparse
17+
from typing import Any, Dict, Optional, Type
1618

1719
from .._utils.cli import (
1820
add_simple_inference_args,
@@ -26,20 +28,20 @@ class ImageClassification(PaddleXPredictorWrapper):
2628
def __init__(
2729
self,
2830
*,
29-
topk=None,
30-
**kwargs,
31-
):
31+
topk: Optional[int] = None,
32+
**kwargs: Any,
33+
) -> None:
3234
self._extra_init_args = {
3335
"topk": topk,
3436
}
3537
super().__init__(**kwargs)
3638

37-
def _get_extra_paddlex_predictor_init_args(self):
39+
def _get_extra_paddlex_predictor_init_args(self) -> Dict[str, Any]:
3840
return self._extra_init_args
3941

4042

4143
class ImageClassificationSubcommandExecutor(PredictorCLISubcommandExecutor):
42-
def _update_subparser(self, subparser):
44+
def _update_subparser(self, subparser: argparse.ArgumentParser) -> None:
4345
add_simple_inference_args(subparser)
4446

4547
subparser.add_argument(
@@ -50,9 +52,9 @@ def _update_subparser(self, subparser):
5052

5153
@property
5254
@abc.abstractmethod
53-
def wrapper_cls(self):
55+
def wrapper_cls(self) -> Type[PaddleXPredictorWrapper]:
5456
raise NotImplementedError
5557

56-
def execute_with_args(self, args):
58+
def execute_with_args(self, args: argparse.Namespace) -> None:
5759
params = get_subcommand_args(args)
5860
perform_simple_inference(self.wrapper_cls, params)

paddleocr/_models/_object_detection.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
# limitations under the License.
1414

1515
import abc
16+
import argparse
17+
from typing import Any, Dict, Optional, Tuple, Type, Union
1618

1719
from .._utils.cli import (
1820
add_simple_inference_args,
@@ -27,13 +29,13 @@ class ObjectDetection(PaddleXPredictorWrapper):
2729
def __init__(
2830
self,
2931
*,
30-
img_size=None,
31-
threshold=None,
32-
layout_nms=None,
33-
layout_unclip_ratio=None,
34-
layout_merge_bboxes_mode=None,
35-
**kwargs,
36-
):
32+
img_size: Optional[Union[int, Tuple[int, int]]] = None,
33+
threshold: Optional[Union[float, dict]] = None,
34+
layout_nms: Optional[bool] = None,
35+
layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None,
36+
layout_merge_bboxes_mode: Optional[Union[str, dict]] = None,
37+
**kwargs: Any,
38+
) -> None:
3739
self._extra_init_args = {
3840
"img_size": img_size,
3941
"threshold": threshold,
@@ -43,12 +45,12 @@ def __init__(
4345
}
4446
super().__init__(**kwargs)
4547

46-
def _get_extra_paddlex_predictor_init_args(self):
48+
def _get_extra_paddlex_predictor_init_args(self) -> Dict[str, Any]:
4749
return self._extra_init_args
4850

4951

5052
class ObjectDetectionSubcommandExecutor(PredictorCLISubcommandExecutor):
51-
def _update_subparser(self, subparser):
53+
def _update_subparser(self, subparser: argparse.ArgumentParser) -> None:
5254
add_simple_inference_args(subparser)
5355

5456
subparser.add_argument(
@@ -79,9 +81,9 @@ def _update_subparser(self, subparser):
7981

8082
@property
8183
@abc.abstractmethod
82-
def wrapper_cls(self):
84+
def wrapper_cls(self) -> Type[PaddleXPredictorWrapper]:
8385
raise NotImplementedError
8486

85-
def execute_with_args(self, args):
87+
def execute_with_args(self, args: argparse.Namespace) -> None:
8688
params = get_subcommand_args(args)
8789
perform_simple_inference(self.wrapper_cls, params)

0 commit comments

Comments
 (0)