HumanSignal
diff --git a/‎.github/workflows/tests.yml‎
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/tests.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎label_studio_ml/examples/docling/.dockerignore‎
Lines changed: 15 additions & 0 deletions b/‎label_studio_ml/examples/docling/.dockerignore‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎label_studio_ml/examples/docling/.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎label_studio_ml/examples/docling/.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎label_studio_ml/examples/docling/Dockerfile‎
Lines changed: 41 additions & 0 deletions b/‎label_studio_ml/examples/docling/Dockerfile‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎label_studio_ml/examples/docling/README.md‎
Lines changed: 132 additions & 0 deletions b/‎label_studio_ml/examples/docling/README.md‎
Lines changed: 132 additions & 0 deletions
diff --git a/‎label_studio_ml/examples/docling/_wsgi.py‎
Lines changed: 124 additions & 0 deletions b/‎label_studio_ml/examples/docling/_wsgi.py‎
Lines changed: 124 additions & 0 deletions
@@ -44,6 +44,9 @@ jobs:
       LOG_DIR: pytest_logs
       collect_analytics: false
       TEST_WITH_CPU: ${{ matrix.backend_dir_name == 'segment_anything_model' }}
+      # Keep the repo on pytest 6.x while preventing newer transitive anyio from auto-loading
+      # a pytest plugin that imports _pytest.scope (pytest 8+ only).
+      PYTEST_ADDOPTS: "-p no:anyio"
     steps:
       - uses: hmarr/debug-action@v3.0.0
 
 
@@ -43,6 +43,7 @@ Check the **Required parameters** column to see if you need to set any additiona
 | MODEL_NAME                                                                                 | Description                                                                                                                                          | Pre-annotation | Interactive mode | Training |  Required parameters  | Arbitrary or Set Labels?                                                   | 
 |--------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------|----------------|------------------|----------|----------------------|----------------------------------------------------------------------------|
 | [bert_classifier](/label_studio_ml/examples/bert_classifier)                               | Text classification with [Huggingface](https://huggingface.co/transformers/v3.0.2/model_doc/auto.html#automodelforsequenceclassification)            | ✅              | ❌                | ✅        | None                       | Arbitrary|
+| [docling](/label_studio_ml/examples/docling)                                               | Layout via [IBM Docling SaaS](https://www.ibm.com/products/docling) (`DoclingServiceClient`) → ReactCode regions                                      | ✅              | ❌                | ❌        | DOCLING_SERVICE_URL, DOCLING_SERVE_API_KEY, LABEL_STUDIO_URL (uploads) | Set (layout categories) |
 | [easyocr](/label_studio_ml/examples/easyocr)                                               | Automated OCR. [EasyOCR](https://github.com/JaidedAI/EasyOCR)                                                                                        | ✅              | ❌                | ❌        | None                       | Set (characters)                                                           | 
 | [flair](/label_studio_ml/examples/flair)                                                   | NER by [flair](https://flairnlp.github.io/)                                                                                                          | ✅              | ❌                | ❌        | None                       | Arbitrary|
 | [gliner](/label_studio_ml/examples/gliner)                                                 | NER by [GLiNER](https://huggingface.co/spaces/tomaarsen/gliner_medium-v2.1)                                                                          | ❌              |  ✅  |  ✅  | None | Arbitrary|
 
@@ -0,0 +1,15 @@
+# Exclude everything
+**
+
+# Build / compose
+!Dockerfile
+!docker-compose.yml
+
+# Application
+!*.py
+
+# Requirements
+!requirements*.txt
+
+# Tests (optional in image when TEST_ENV=true)
+!test_api.py
@@ -0,0 +1,2 @@
+# Docker Compose bind mounts (created on first `docker compose up`)
+data/
@@ -0,0 +1,41 @@
+# BuildKit required for RUN --mount below. Omitting `# syntax=docker/dockerfile:1` avoids extra frontend pulls that can trigger grpc errors on some hosts.
+ARG PYTHON_VERSION=3.11
+
+FROM python:${PYTHON_VERSION}-slim-bookworm AS python-base
+ARG TEST_ENV
+
+WORKDIR /app
+
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PORT=${PORT:-9090} \
+    PIP_CACHE_DIR=/.cache \
+    WORKERS=1 \
+    THREADS=8 \
+    PIP_ROOT_USER_ACTION=ignore \
+    DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+        libgl1 libglib2.0-0 curl wget git procps \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY requirements-base.txt .
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    pip install -r requirements-base.txt
+
+COPY requirements.txt .
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    pip install -r requirements.txt
+
+COPY requirements-test.txt .
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    if [ "$TEST_ENV" = "true" ]; then \
+      pip install -r requirements-test.txt; \
+    fi
+
+COPY . .
+
+EXPOSE 9090
+
+CMD gunicorn --preload --bind :$PORT --workers $WORKERS --threads $THREADS --timeout 0 _wsgi:app
@@ -0,0 +1,132 @@
+# Docling Serve backend (IBM Docling Workbench)
+
+## What this is for
+
+This backend connects Label Studio to **IBM Docling SaaS** using the Python **`DoclingServiceClient`** from the **`docling`** package (`from docling.service_client import DoclingServiceClient`). **Conversion runs on Docling’s servers**, not inside this container. For each task it resolves the file (usually via Label Studio–hosted storage), calls **`client.convert(source=…)`** with a local **`Path`** or an **`https://` URL string**, then maps **`result.document`** into **reactcode** predictions for the annotator.
+
+Use the **exact service URL** your tenant gives you (Integrate / Python snippet), including the path segment ending in **`/v1`**—for example  
+`https://api.aws-c1.dcls.saas.ibm.com/<instance>/v1`.
+
+The **`docling`** `DoclingServiceClient` builds paths like **`/v1/convert/...`** on top of its `url=` argument. IBM’s URL already ends with **`/v1`**, which would otherwise produce **`…/v1/v1/…`** requests (404/400). This example **strips one trailing `/v1`** from **`DOCLING_SERVICE_URL`** before creating the client—keep pasting the Workbench value unchanged.
+
+Typical workflow:
+
+1. Tasks include a **file URL** (PDF, image, etc.)—often an upload or storage URL managed by Label Studio.
+2. Annotators run predictions (or batch predict); this ML backend fetches the file (unless you use remote-URL-only mode), calls **`DoclingServiceClient.convert`**, and returns layout as reactcode regions.
+3. Reviewers adjust regions or labels on top of Docling’s structure.
+
+You need the **full SaaS service URL** and API key from Workbench. Separately, the backend must often **download task files** through Label Studio when URLs point at your instance—see **Label Studio URL and API key** below.
+
+## Label Studio URL and API key
+
+Set **`LABEL_STUDIO_URL`** and **`LABEL_STUDIO_API_KEY`** in `docker-compose.yml` (or your shell) whenever tasks reference **files hosted by Label Studio**—uploads, cloud storage integrations, or other URLs that Label Studio resolves for the ML backend.
+
+By default it downloads to a cache path and passes a **`Path`** into **`convert`**. Set **`DOCLING_CONVERT_REMOTE_URL_ONLY=true`** to pass the task’s **`https://` URL** directly to SaaS (works only for URLs the Docling service can fetch without Label Studio auth).
+
+Practical notes:
+
+- **`LABEL_STUDIO_URL`** must be reachable **from where the ML backend runs**. From Docker on your laptop, **`http://localhost:8080`** usually does **not** work inside the container; use your machine’s hostname/IP, **`http://host.docker.internal:8080`** (Docker Desktop), or another URL the container can route to. This compose file includes `extra_hosts` for `host.docker.internal` on macOS/Linux-friendly setups.
+- **`LABEL_STUDIO_API_KEY`** should be a **Personal Access Token** (or equivalent) for a user that can read the project’s tasks and attachments.
+
+Always include **`http://` or `https://`** in `LABEL_STUDIO_URL`. More background is in the repository [README](../../../README.md) under allowing the ML backend to access Label Studio data.
+
+## Prerequisites
+
+1. **`DOCLING_SERVICE_URL`** — full URL ending in **`/v1`** from IBM Docling Workbench (same as `DoclingServiceClient(url=…)`).
+2. **`DOCLING_SERVE_API_KEY`** — API key for `X-Api-Key` (name kept for backward compatibility).
+3. **`LABEL_STUDIO_URL`** / **`LABEL_STUDIO_API_KEY`** when tasks use Label Studio–hosted files (typical for uploads).
+
+## Quick start (Docker)
+
+```bash
+cd label_studio_ml/examples/docling
+# Set DOCLING_SERVICE_URL, DOCLING_SERVE_API_KEY, LABEL_STUDIO_URL, LABEL_STUDIO_API_KEY in docker-compose.yml
+docker compose up --build
+```
+
+The ML backend listens on **`http://localhost:9090`**. Register that URL in your Label Studio project’s machine learning settings.
+
+## Docling SaaS configuration
+
+| Variable | Required | Description |
+|----------|----------|-------------|
+| `DOCLING_SERVICE_URL` | Yes | Full **`DoclingServiceClient`** URL including path to **`/v1`** (fallback env name: `DOCLING_SERVE_URL`). |
+| `DOCLING_SERVE_API_KEY` | Often | API key (`X-Api-Key`). Alias: `DOCLING_API_KEY`. |
+| `DOCLING_CONVERT_REMOTE_URL_ONLY` | No | If `true`, pass the task **`https://` URL** as `convert(source=url)` instead of downloading via Label Studio first. |
+| `DOCLING_CONVERT_SOURCE_HEADERS_JSON` | No | Extra HTTP headers (JSON object) merged into **`convert`** when using remote URLs / headers the client supports. |
+| `DOCLING_SERVE_TIMEOUT` | No | Job / read timeout in seconds (default `600`). |
+| `DOCLING_HTTP_CONNECT_TIMEOUT` | No | Connect timeout (default `30`). |
+
+Optional tuning: `DOCLING_PAGE_NO`, `DOCLING_PREDICT_READING_ORDER`, `DOCLING_READING_ORDER_LEVEL`, `DOCLING_CONTENT_LAYERS`, `DOCLING_REACTCODE_FROM_NAME`, `DOCLING_REACTCODE_TO_NAME`, `DOCLING_TASK_DATA_KEY`.
+
+The **`docling`** PyPI package (**≥2.90**) provides **`DoclingServiceClient`**; behavior follows **your SaaS tenant**, not necessarily open-source Docling docs.
+
+## Label Studio configuration
+
+| Variable | Description |
+|----------|-------------|
+| `LABEL_STUDIO_URL` | Base URL of Label Studio, reachable from this backend (see above). |
+| `LABEL_STUDIO_API_KEY` | Token so the backend can download task attachments when needed. |
+
+Predictions are **`reactcode`** regions (rectangle / polyline payloads with percent coordinates), aligned with the Label Studio Enterprise ReactCode UI—see **`docling_labeling_config.xml`** in this folder.
+
+## Running locally (without Docker)
+
+```bash
+pip install -r requirements-base.txt -r requirements.txt
+export DOCLING_SERVICE_URL=https://api.aws-c1.dcls.saas.ibm.com/your-instance/v1
+export DOCLING_SERVE_API_KEY=your-api-key
+export LABEL_STUDIO_URL=http://host.docker.internal:8080
+export LABEL_STUDIO_API_KEY=your-label-studio-token
+python _wsgi.py -p 9090
+```
+
+Adjust `LABEL_STUDIO_URL` if Label Studio runs on the same machine without Docker (for example `http://127.0.0.1:8080`).
+
+## Validate
+
+```bash
+curl http://localhost:9090/
+```
+
+Expected: `{"status":"UP"}`.
+
+## Troubleshooting
+
+### Wrong SaaS URL
+
+**`DOCLING_SERVICE_URL`** must match the URL Workbench gives you (through **`/v1`**). The backend normalizes it so routes are not doubled—see the note above if you see **`/v1/v1/`** in logs.
+
+### No predictions / “nothing happens” (no errors in the UI)
+
+Label Studio often shows **no message** when the ML backend returns **empty `results`** (HTTP 200 with an empty list). Check **Docker logs** for this container:
+
+```bash
+docker compose logs -f docling
+```
+
+You should see a line like **`Docling predict: N task(s)`** whenever you run predictions. If you see **`Docling produced zero predictions`**, scroll up in the same log for **`No file URL found`** or Docling **`API error`** lines.
+
+Common fixes:
+
+1. **Placeholder URL** — Replace **`YOUR_INSTANCE_SEGMENT`** in **`DOCLING_SERVICE_URL`** with the real path from Workbench.
+2. **Wrong task field** — Tasks must expose a **file URL** under the key your labeling config expects (often **`undefined`**). Override with **`DOCLING_TASK_DATA_KEY`** if needed.
+3. **`LOG_LEVEL`** — Defaults to **`INFO`** in `_wsgi.py` when unset.
+4. **Upload / `/storage-data/` URLs** — `model.py` downloads via **`label_studio_sdk`** using **`LABEL_STUDIO_URL`** (same **scheme + host + port** as in your browser; wrong host breaks auth headers), **`LABEL_STUDIO_API_KEY`**, and network reachability from this container (`host.docker.internal` instead of `localhost` on Docker Desktop). Self-signed HTTPS: set **`VERIFY_SSL=false`** on the ML backend. Logs now include **HTTP status / snippet** when the download fails.
+
+Sanity checks:
+
+```bash
+curl -s http://localhost:9090/health
+curl -s http://localhost:9090/
+```
+
+Both should return JSON including **`"status":"UP"`**.
+
+### Empty or tiny downloaded files
+
+Check **`LABEL_STUDIO_URL`** / **`LABEL_STUDIO_API_KEY`** and logs for `Docling task … local_path=… size=…`. A size of **0** or failed stat (`-1` in logs) usually means the file did not download correctly before conversion.
+
+## Layout of this example
+
+Like other backends under `label_studio_ml/examples/` (for example `easyocr/`), this directory includes `_wsgi.py`, `model.py`, `requirements-base.txt`, `requirements.txt`, `Dockerfile`, `docker-compose.yml`, and tests. **`docker-compose.yml`** bind-mounts `./data/server` and `./data/.file-cache` for runtime caches; Docker creates those paths on the host when you first run Compose—they are not checked into git (see `.gitignore`).
@@ -0,0 +1,124 @@
+import os
+import argparse
+import json
+import logging
+import logging.config
+
+_LOG_LEVEL = (os.getenv("LOG_LEVEL") or "INFO").upper()
+
+logging.config.dictConfig({
+  "version": 1,
+  "disable_existing_loggers": False,
+  "formatters": {
+    "standard": {
+      "format": "[%(asctime)s] [%(levelname)s] [%(name)s::%(funcName)s::%(lineno)d] %(message)s"
+    }
+  },
+  "handlers": {
+    "console": {
+      "class": "logging.StreamHandler",
+      "level": _LOG_LEVEL,
+      "stream": "ext://sys.stdout",
+      "formatter": "standard"
+    }
+  },
+  "root": {
+    "level": _LOG_LEVEL,
+    "handlers": [
+      "console"
+    ],
+    "propagate": True
+  }
+})
+
+from label_studio_ml.api import init_app
+from model import Docling
+
+
+_DEFAULT_CONFIG_PATH = os.path.join(os.path.dirname(__file__), 'config.json')
+
+
+def get_kwargs_from_config(config_path=_DEFAULT_CONFIG_PATH):
+    if not os.path.exists(config_path):
+        return dict()
+    with open(config_path) as f:
+        config = json.load(f)
+    assert isinstance(config, dict)
+    return config
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Label studio')
+    parser.add_argument(
+        '-p', '--port', dest='port', type=int, default=9090,
+        help='Server port')
+    parser.add_argument(
+        '--host', dest='host', type=str, default='0.0.0.0',
+        help='Server host')
+    parser.add_argument(
+        '--kwargs', '--with', dest='kwargs', metavar='KEY=VAL', nargs='+', type=lambda kv: kv.split('='),
+        help='Additional LabelStudioMLBase model initialization kwargs')
+    parser.add_argument(
+        '-d', '--debug', dest='debug', action='store_true',
+        help='Switch debug mode')
+    parser.add_argument(
+        '--log-level', dest='log_level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default=None,
+        help='Logging level')
+    parser.add_argument(
+        '--model-dir', dest='model_dir', default=os.path.dirname(__file__),
+        help='Directory where models are stored (relative to the project directory)')
+    parser.add_argument(
+        '--check', dest='check', action='store_true',
+        help='Validate model instance before launching server')
+    parser.add_argument('--basic-auth-user',
+                        default=os.environ.get('ML_SERVER_BASIC_AUTH_USER', None),
+                        help='Basic auth user')
+
+    parser.add_argument('--basic-auth-pass',
+                        default=os.environ.get('ML_SERVER_BASIC_AUTH_PASS', None),
+                        help='Basic auth pass')
+
+    args = parser.parse_args()
+
+    # setup logging level
+    if args.log_level:
+        logging.root.setLevel(args.log_level)
+
+    def isfloat(value):
+        try:
+            float(value)
+            return True
+        except ValueError:
+            return False
+
+    def parse_kwargs():
+        param = dict()
+        for k, v in args.kwargs:
+            if v.isdigit():
+                param[k] = int(v)
+            elif v == 'True' or v == 'true':
+                param[k] = True
+            elif v == 'False' or v == 'false':
+                param[k] = False
+            elif isfloat(v):
+                param[k] = float(v)
+            else:
+                param[k] = v
+        return param
+
+    kwargs = get_kwargs_from_config()
+
+    if args.kwargs:
+        kwargs.update(parse_kwargs())
+
+    if args.check:
+        print('Check "' + Docling.__name__ + '" instance creation..')
+        model = Docling(**kwargs)
+
+    app = init_app(model_class=Docling, basic_auth_user=args.basic_auth_user, basic_auth_pass=args.basic_auth_pass)
+
+    app.run(host=args.host, port=args.port, debug=args.debug)
+
+else:
+    # for uWSGI use
+    app = init_app(model_class=Docling)
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	+# Docker Compose bind mounts (created on first `docker compose up`)
	`2`	`+data/`