PaddlePaddle · robot010 · Apr 5, 2026
diff --git a/paddleocr/_models/base.py b/paddleocr/_models/base.py
@@ -28,6 +28,27 @@
 
 
 class PaddleXPredictorWrapper(metaclass=abc.ABCMeta):
+    """Base class for single-model PaddleOCR wrappers.
+
+    Subclasses wrap a PaddleX predictor for a specific model (e.g. text
+    detection, text recognition) and expose ``predict`` / ``predict_iter``
+    methods along with optional CLI support.  Each subclass must declare
+    ``default_model_name`` and ``get_cli_subcommand_executor``.
+
+    Args:
+        model_name (str | None): Name of the model to load. Defaults to
+            ``default_model_name`` when ``None``.
+        model_dir (str | None): Local directory containing model files.
+            Downloads from the model hub when ``None``.
+        **common_args: Common inference arguments forwarded to PaddleX (e.g.
+            ``device``, ``use_hpip``, ``use_tensorrt``).
+
+    Example:
+        >>> from paddleocr import TextDetection
+        >>> detector = TextDetection()
+        >>> results = detector.predict("image.png")
+    """
+
     def __init__(
         self,
         *,
@@ -83,6 +104,12 @@ def _create_paddlex_predictor(self):
 
 
 class PredictorCLISubcommandExecutor(CLISubcommandExecutor):
+    """Base class for single-model CLI subcommand executors.
+
+    Registers a model predictor as a subcommand of the ``paddleocr`` CLI and
+    handles argument parsing and execution.
+    """
+
     @property
     @abc.abstractmethod
     def subparser_name(self):

diff --git a/paddleocr/_pipelines/base.py b/paddleocr/_pipelines/base.py
@@ -31,6 +31,7 @@
 
 
 def _merge_dicts(d1, d2):
+    """Recursively merge d2 into d1, with d2 values taking precedence."""
     res = d1.copy()
     for k, v in d2.items():
         if k in res and isinstance(res[k], dict) and isinstance(v, dict):
@@ -41,6 +42,7 @@ def _merge_dicts(d1, d2):
 
 
 def _to_builtin(obj):
+    """Recursively convert AttrDict and nested structures to plain Python dicts/lists."""
     if isinstance(obj, AttrDict):
         return {k: _to_builtin(v) for k, v in obj.items()}
     elif isinstance(obj, dict):
@@ -52,6 +54,25 @@ def _to_builtin(obj):
 
 
 class PaddleXPipelineWrapper(metaclass=abc.ABCMeta):
+    """Base class for PaddleOCR pipeline wrappers.
+
+    Subclasses wrap a PaddleX pipeline and expose a simplified Python API
+    with optional CLI support. Each subclass must declare
+    ``_paddlex_pipeline_name`` and ``get_cli_subcommand_executor``.
+
+    Args:
+        paddlex_config (str | dict | None): Path to a PaddleX pipeline YAML
+            config file, a pre-loaded config dict, or ``None`` to use the
+            default config for the pipeline.
+        **common_args: Common inference arguments forwarded to PaddleX (e.g.
+            ``device``, ``use_hpip``, ``use_tensorrt``).
+
+    Example:
+        >>> from paddleocr import PaddleOCR
+        >>> ocr = PaddleOCR(lang="en")
+        >>> results = ocr.predict("image.png")
+    """
+
     def __init__(
         self,
         *,
@@ -110,6 +131,12 @@ def _create_paddlex_pipeline(self):
 
 
 class PipelineCLISubcommandExecutor(CLISubcommandExecutor):
+    """Base class for pipeline CLI subcommand executors.
+
+    Registers a pipeline as a subcommand of the ``paddleocr`` CLI and handles
+    argument parsing and execution.
+    """
+
     @property
     @abc.abstractmethod
     def subparser_name(self):

diff --git a/paddleocr/_pipelines/ocr.py b/paddleocr/_pipelines/ocr.py
@@ -53,6 +53,54 @@
 
 # Be comptable with PaddleOCR 2.x interfaces
 class PaddleOCR(PaddleXPipelineWrapper):
+    """OCR pipeline that combines text detection and text recognition.
+
+    Runs the full OCR pipeline: optional document preprocessing (orientation
+    classification and unwarping), text detection, optional text-line
+    orientation classification, and text recognition.
+
+    Args:
+        lang (str | None): Language code for the input image (e.g. ``"ch"``,
+            ``"en"``, ``"fr"``).  Used to select default detection and
+            recognition models when no explicit model name/dir is provided.
+            Defaults to ``"ch"`` when ``None``.
+        ocr_version (str | None): PP-OCR version to use when ``lang`` is set.
+            One of ``"PP-OCRv3"``, ``"PP-OCRv4"``, ``"PP-OCRv5"``.  Defaults
+            to the latest available for the chosen language.
+        text_detection_model_name (str | None): Name of the text detection
+            model.  Overrides ``lang``/``ocr_version`` selection.
+        text_detection_model_dir (str | None): Local directory for the text
+            detection model.
+        text_recognition_model_name (str | None): Name of the text recognition
+            model.  Overrides ``lang``/``ocr_version`` selection.
+        text_recognition_model_dir (str | None): Local directory for the text
+            recognition model.
+        use_doc_orientation_classify (bool | None): Enable document orientation
+            classification preprocessing.
+        use_doc_unwarping (bool | None): Enable document unwarping
+            preprocessing.
+        use_textline_orientation (bool | None): Enable text-line orientation
+            classification.
+        text_det_thresh (float | None): Pixel-level detection threshold.
+        text_det_box_thresh (float | None): Box-level detection threshold.
+        text_det_unclip_ratio (float | None): Expansion ratio for detected
+            text bounding boxes.
+        text_rec_score_thresh (float | None): Minimum recognition confidence
+            to retain a result.
+        return_word_box (bool | None): Return per-word bounding boxes in
+            addition to line-level boxes.
+        **kwargs: Additional arguments forwarded to the base class (e.g.
+            ``device``, ``use_hpip``) or deprecated PaddleOCR 2.x parameter
+            names.
+
+    Example:
+        >>> from paddleocr import PaddleOCR
+        >>> ocr = PaddleOCR(lang="en")
+        >>> results = ocr.predict("image.png")
+        >>> for res in results:
+        ...     res.print()
+    """
+
     def __init__(
         self,
         doc_orientation_classify_model_name=None,
@@ -181,6 +229,33 @@ def predict_iter(
         text_rec_score_thresh=None,
         return_word_box=None,
     ):
+        """Run OCR on ``input`` and yield one result object per image.
+
+        Args:
+            input: Image path (str), URL, numpy array, PIL Image, or an
+                iterable of any of the above.
+            use_doc_orientation_classify (bool | None): Override the
+                constructor setting for this call.
+            use_doc_unwarping (bool | None): Override the constructor setting
+                for this call.
+            use_textline_orientation (bool | None): Override the constructor
+                setting for this call.
+            text_det_limit_side_len (int | None): Maximum side length for
+                text detection input resizing.
+            text_det_limit_type (str | None): How to apply the side-length
+                limit (``"max"`` or ``"min"``).
+            text_det_thresh (float | None): Override detection pixel threshold.
+            text_det_box_thresh (float | None): Override detection box
+                threshold.
+            text_det_unclip_ratio (float | None): Override box expansion ratio.
+            text_rec_score_thresh (float | None): Override recognition
+                confidence threshold.
+            return_word_box (bool | None): Override per-word box setting.
+
+        Yields:
+            PaddleX OCR result objects with ``.print()``, ``.save_to_img()``,
+            and ``.save_to_json()`` methods.
+        """
         return self.paddlex_pipeline.predict(
             input,
             use_doc_orientation_classify=use_doc_orientation_classify,
@@ -210,6 +285,15 @@ def predict(
         text_rec_score_thresh=None,
         return_word_box=None,
     ):
+        """Run OCR on ``input`` and return a list of result objects.
+
+        Convenience wrapper around :meth:`predict_iter` that collects all
+        results into a list.  See :meth:`predict_iter` for the full parameter
+        documentation.
+
+        Returns:
+            list: One result object per input image.
+        """
         return list(
             self.predict_iter(
                 input,

diff --git a/paddleocr/_pipelines/paddleocr_vl.py b/paddleocr/_pipelines/paddleocr_vl.py
@@ -35,6 +35,49 @@
 
 
 class PaddleOCRVL(PaddleXPipelineWrapper):
+    """Vision-Language document understanding pipeline (PaddleOCR-VL).
+
+    Uses a compact Vision-Language Model (VLM) — PaddleOCR-VL-1.5 (0.9 B
+    parameters by default) — to parse document images into structured
+    Markdown.  Supports 109+ languages and handles challenging real-world
+    conditions such as skew, warping, scanning artifacts, and uneven
+    illumination.
+
+    Args:
+        pipeline_version (str): VL pipeline version.  ``"v1.5"`` (default)
+            uses PaddleOCR-VL-1.5; ``"v1"`` uses the earlier PaddleOCR-VL.
+        vl_rec_model_name (str | None): Name of the VL recognition model.
+        vl_rec_model_dir (str | None): Local directory for the VL model.
+        vl_rec_backend (str | None): Inference backend for the VL model.
+            One of ``"native"`` (default), ``"vllm-server"``,
+            ``"sglang-server"``, ``"fastdeploy-server"``,
+            ``"mlx-vlm-server"``, ``"llama-cpp-server"``.
+        vl_rec_server_url (str | None): Server URL when using a server
+            backend.
+        vl_rec_api_key (str | None): API key for API-based backends.
+        layout_detection_model_name (str | None): Name of the layout
+            detection model.
+        layout_detection_model_dir (str | None): Local directory for the
+            layout detection model.
+        use_doc_orientation_classify (bool | None): Enable document
+            orientation classification preprocessing.
+        use_doc_unwarping (bool | None): Enable document unwarping
+            preprocessing.
+        use_layout_detection (bool | None): Enable layout detection.
+        use_chart_recognition (bool | None): Enable chart parsing.
+        use_seal_recognition (bool | None): Enable seal text recognition.
+        use_ocr_for_image_block (bool | None): Run OCR on image blocks.
+        **kwargs: Additional arguments forwarded to the base class (e.g.
+            ``device``, ``use_hpip``).
+
+    Example:
+        >>> from paddleocr import PaddleOCRVL
+        >>> pipeline = PaddleOCRVL()
+        >>> results = pipeline.predict("document.png")
+        >>> for res in results:
+        ...     print(res.markdown)
+    """
+
     def __init__(
         self,
         pipeline_version=_DEFAULT_PIPELINE_VERSION,

diff --git a/paddleocr/_pipelines/pp_structurev3.py b/paddleocr/_pipelines/pp_structurev3.py
@@ -29,6 +29,44 @@
 
 
 class PPStructureV3(PaddleXPipelineWrapper):
+    """Document structure analysis pipeline (PP-StructureV3).
+
+    Parses complex documents into structured Markdown or JSON by combining
+    layout detection, OCR, table recognition, formula recognition, chart
+    recognition, and seal recognition sub-pipelines.
+
+    Args:
+        lang (str | None): Language code for OCR sub-pipelines (e.g.
+            ``"ch"``, ``"en"``).  Defaults to ``"ch"`` when ``None``.
+        ocr_version (str | None): PP-OCR version for text detection/
+            recognition models.  One of ``"PP-OCRv3"``, ``"PP-OCRv4"``,
+            ``"PP-OCRv5"``.
+        layout_detection_model_name (str | None): Name of the layout
+            detection model.
+        layout_detection_model_dir (str | None): Local directory for the
+            layout detection model.
+        use_doc_orientation_classify (bool | None): Enable document
+            orientation classification.
+        use_doc_unwarping (bool | None): Enable document unwarping.
+        use_textline_orientation (bool | None): Enable text-line orientation
+            classification.
+        use_seal_recognition (bool | None): Enable seal text recognition.
+        use_table_recognition (bool | None): Enable table structure
+            recognition.
+        use_formula_recognition (bool | None): Enable formula recognition.
+        use_chart_recognition (bool | None): Enable chart parsing.
+        use_region_detection (bool | None): Enable region detection.
+        **kwargs: Additional arguments forwarded to the base class (e.g.
+            ``device``, ``use_hpip``).
+
+    Example:
+        >>> from paddleocr import PPStructureV3
+        >>> pipeline = PPStructureV3()
+        >>> results = pipeline.predict("document.pdf")
+        >>> for res in results:
+        ...     print(res.markdown)
+    """
+
     def __init__(
         self,
         layout_detection_model_name=None,