HumanSignal
diff --git a/‎.github/docker-build-config.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/docker-build-config.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 1 deletion b/‎README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎label_studio_ml/examples/ppocr/.dockerignore‎
Lines changed: 52 additions & 0 deletions b/‎label_studio_ml/examples/ppocr/.dockerignore‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎label_studio_ml/examples/ppocr/Dockerfile‎
Lines changed: 51 additions & 0 deletions b/‎label_studio_ml/examples/ppocr/Dockerfile‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎label_studio_ml/examples/ppocr/Dockerfile.gpu‎
Lines changed: 59 additions & 0 deletions b/‎label_studio_ml/examples/ppocr/Dockerfile.gpu‎
Lines changed: 59 additions & 0 deletions
@@ -23,6 +23,8 @@
 - backend_dir_name: mmdetection-3
   backend_tag_prefix: mmdetection3-
   runs_on: ubuntu-latest-4c-16gb
+- backend_dir_name: ppocr
+  backend_tag_prefix: ppocr-
 - backend_dir_name: nemo_asr
   backend_tag_prefix: nemoasr-
   runs_on: ubuntu-latest-4c-16gb
 
@@ -56,7 +56,8 @@ Check the **Required parameters** column to see if you need to set any additiona
 | [langchain_search_agent](/label_studio_ml/examples/langchain_search_agent)                 | RAG pipeline with Google Search and [Langchain](https://langchain.com/)                                                                              | ✅              | ✅                | ✅        | OPENAI_API_KEY, GOOGLE_CSE_ID, GOOGLE_API_KEY | Arbitrary | 
 | [llm_interactive](/label_studio_ml/examples/llm_interactive)                               | Prompt engineering with [OpenAI](https://platform.openai.com/), Azure LLMs.                                                                          | ✅              | ✅                | ✅        | OPENAI_API_KEY             | Arbitrary                                                                  | 
 | [mmdetection](/label_studio_ml/examples/mmdetection-3)                                     | Object Detection with [OpenMMLab](https://github.com/open-mmlab/mmdetection)                                                                         | ✅              | ❌                | ❌        | None                       | Arbitrary | 
-| [nemo_asr](/label_studio_ml/examples/nemo_asr)                                             | Speech ASR by [NVIDIA NeMo](https://github.com/NVIDIA/NeMo)                                                                                          | ✅              | ❌                | ❌        | None                       | Set (vocabulary and characters) | 
+| [nemo_asr](/label_studio_ml/examples/nemo_asr)                                             | Speech ASR by [NVIDIA NeMo](https://github.com/NVIDIA/NeMo)                                                                                          | ✅              | ❌                | ❌        | None                       | Set (vocabulary and characters) |
+| [paddleocr](/label_studio_ml/examples/paddleocr)                                           | OCR with [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) (PP-OCRv5)                                                                           | ✅              | ❌                | ❌        | None                       | Set (characters)                                                           |
 | [segment_anything_2_image](/label_studio_ml/examples/segment_anything_2_image)             | Image segmentation with [SAM 2](https://github.com/facebookresearch/segment-anything-2)                                                              | ❌              | ✅ | ❌ | None| Arbitrary|
 | [segment_anything_model](/label_studio_ml/examples/segment_anything_model)                 | Image segmentation by [Meta](https://segment-anything.com/)                                                                                          | ❌              | ✅                |   ❌       | None                       | Arbitrary                                                                  |
 | [sklearn_text_classifier](/label_studio_ml/examples/sklearn_text_classifier)               | Text classification with [scikit-learn](https://scikit-learn.org/stable/)                                                                            | ✅              | ❌                | ✅        | None                        | Arbitrary | 
 
@@ -0,0 +1,52 @@
+# Git
+.git
+.gitignore
+
+# Python
+__pycache__
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual environments
+venv/
+ENV/
+env/
+.venv/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Test files
+.pytest_cache/
+.coverage
+htmlcov/
+
+# Data directories (should be mounted as volumes)
+data/
+
+# Logs
+*.log
+
+# Local config
+config.json
+.env
@@ -0,0 +1,51 @@
+# syntax=docker/dockerfile:1
+# PP-OCR ML Backend for Label Studio (CPU version)
+#
+# Uses the official PaddleX Docker image which includes
+# PaddlePaddle and PaddleX pre-installed.
+#
+# Build arguments:
+#   TEST_ENV: Set to "true" to install test dependencies
+
+ARG TEST_ENV
+
+FROM ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.1.2-paddlepaddle3.0.0-cpu
+
+WORKDIR /app
+
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PORT=9090 \
+    PIP_CACHE_DIR=/.cache \
+    WORKERS=1 \
+    THREADS=8 \
+    DEVICE=cpu
+
+# Install base requirements (label-studio-ml and gunicorn)
+COPY requirements-base.txt .
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    pip install -r requirements-base.txt
+
+# Install PaddleX OCR extra dependencies
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    pip install "paddlex[ocr]"
+
+# Install custom requirements (boto3, opencv, etc.)
+COPY requirements.txt .
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    pip install -r requirements.txt
+
+# Install test requirements if needed
+ARG TEST_ENV
+COPY requirements-test.txt .
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    if [ "$TEST_ENV" = "true" ]; then \
+        pip install -r requirements-test.txt; \
+    fi
+
+# Copy application code
+COPY . .
+
+EXPOSE 9090
+
+CMD gunicorn --preload --bind :$PORT --workers $WORKERS --threads $THREADS --timeout 0 _wsgi:app
@@ -0,0 +1,59 @@
+# syntax=docker/dockerfile:1
+# PP-OCR ML Backend for Label Studio (GPU version)
+#
+# Uses the official PaddleX Docker image which includes
+# PaddlePaddle and PaddleX pre-installed.
+#
+# Build arguments:
+#   CUDA_VERSION: cuda11.8 (default) or cuda12.6
+#   TEST_ENV: Set to "true" to install test dependencies
+
+ARG CUDA_VERSION=cuda11.8
+
+# GPU with CUDA 11.8 (default)
+FROM ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.1.2-paddlepaddle3.0.0-gpu-cuda11.8-cudnn8.9-trt8.6 AS gpu-cuda11.8
+
+# GPU with CUDA 12.6
+FROM ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/paddlex:paddlex3.1.2-paddlepaddle3.0.0-gpu-cuda12.6-cudnn9.5-trt10.5 AS gpu-cuda12.6
+
+# Select the appropriate base image
+FROM gpu-${CUDA_VERSION} AS base
+
+ARG TEST_ENV
+
+WORKDIR /app
+
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PORT=9090 \
+    PIP_CACHE_DIR=/.cache \
+    WORKERS=1 \
+    THREADS=8
+
+# Install base requirements (label-studio-ml and gunicorn)
+COPY requirements-base.txt .
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    pip install -r requirements-base.txt
+
+# Install PaddleX OCR extra dependencies
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    pip install "paddlex[ocr]"
+
+# Install custom requirements (boto3, opencv, etc.)
+COPY requirements.txt .
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    pip install -r requirements.txt
+
+# Install test requirements if needed
+COPY requirements-test.txt .
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    if [ "$TEST_ENV" = "true" ]; then \
+        pip install -r requirements-test.txt; \
+    fi
+
+# Copy application code
+COPY . .
+
+EXPOSE 9090
+
+CMD gunicorn --preload --bind :$PORT --workers $WORKERS --threads $THREADS --timeout 0 _wsgi:app