Skip to content

Commit 7e78b98

Browse files
SyedShahmeerAli12julian-rischclaude
authored
feat: add Docling Serve integration with DoclingServeConverter component (#3173)
Co-authored-by: Julian Risch <julian.risch@deepset.ai> Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 98df196 commit 7e78b98

11 files changed

Lines changed: 977 additions & 0 deletions

File tree

.github/labeler.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ integration:docling:
7979
- any-glob-to-any-file: "integrations/docling/**/*"
8080
- any-glob-to-any-file: ".github/workflows/docling.yml"
8181

82+
integration:docling-serve:
83+
- changed-files:
84+
- any-glob-to-any-file: "integrations/docling_serve/**/*"
85+
- any-glob-to-any-file: ".github/workflows/docling_serve.yml"
86+
8287
integration:dspy:
8388
- changed-files:
8489
- any-glob-to-any-file: "integrations/dspy/**/*"

.github/workflows/CI_coverage_comment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ on:
1717
- "Test / cohere"
1818
- "Test / cometapi"
1919
- "Test / deepeval"
20+
- "Test / docling_serve"
2021
- "Test / dspy"
2122
- "Test / e2b"
2223
- "Test / elasticsearch"
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
name: Test / docling_serve
2+
3+
on:
4+
schedule:
5+
- cron: "0 0 * * *"
6+
pull_request:
7+
paths:
8+
- "integrations/docling_serve/**"
9+
- "!integrations/docling_serve/*.md"
10+
- ".github/workflows/docling_serve.yml"
11+
push:
12+
branches:
13+
- main
14+
paths:
15+
- "integrations/docling_serve/**"
16+
- "!integrations/docling_serve/*.md"
17+
- ".github/workflows/docling_serve.yml"
18+
19+
defaults:
20+
run:
21+
working-directory: integrations/docling_serve
22+
23+
concurrency:
24+
group: docling_serve-${{ github.head_ref }}
25+
cancel-in-progress: true
26+
27+
env:
28+
PYTHONUNBUFFERED: "1"
29+
FORCE_COLOR: "1"
30+
31+
jobs:
32+
run:
33+
name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }}
34+
runs-on: ${{ matrix.os }}
35+
strategy:
36+
fail-fast: false
37+
matrix:
38+
os: [ubuntu-latest]
39+
python-version: ["3.10", "3.14"]
40+
41+
services:
42+
docling-serve:
43+
image: ghcr.io/docling-project/docling-serve:latest
44+
ports:
45+
- 5001:5001
46+
options: >-
47+
--health-cmd "curl -f http://localhost:5001/health || exit 1"
48+
--health-interval 10s
49+
--health-timeout 5s
50+
--health-retries 10
51+
52+
steps:
53+
- uses: actions/checkout@v4
54+
55+
- name: Set up Python ${{ matrix.python-version }}
56+
uses: actions/setup-python@v5
57+
with:
58+
python-version: ${{ matrix.python-version }}
59+
60+
- name: Install Hatch
61+
run: pip install hatch
62+
63+
- name: Lint
64+
if: matrix.python-version == '3.10' && runner.os == 'Linux'
65+
run: hatch run fmt-check && hatch run test:types
66+
67+
- name: Run unit tests
68+
run: hatch run test:unit-cov-retry
69+
70+
- name: Store unit tests coverage
71+
id: coverage_comment
72+
if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name != 'schedule'
73+
uses: py-cov-action/python-coverage-comment-action@7188638f871f721a365d644f505d1ff3df20d683 # v3.40
74+
with:
75+
GITHUB_TOKEN: ${{ github.token }}
76+
COVERAGE_PATH: integrations/docling_serve
77+
SUBPROJECT_ID: docling_serve
78+
MINIMUM_GREEN: 90
79+
MINIMUM_ORANGE: 60
80+
81+
- name: Upload coverage comment to be posted
82+
if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name == 'pull_request' && steps.coverage_comment.outputs.COMMENT_FILE_WRITTEN == 'true'
83+
uses: actions/upload-artifact@v4
84+
with:
85+
name: coverage-comment-docling_serve
86+
path: python-coverage-comment-action-docling_serve.txt
87+
88+
- name: Run integration tests
89+
run: hatch run test:integration-cov-append-retry
90+
91+
- name: Store combined coverage
92+
if: github.event_name == 'push'
93+
uses: py-cov-action/python-coverage-comment-action@7188638f871f721a365d644f505d1ff3df20d683 # v3.40
94+
with:
95+
GITHUB_TOKEN: ${{ github.token }}
96+
COVERAGE_PATH: integrations/docling_serve
97+
SUBPROJECT_ID: docling_serve-combined
98+
MINIMUM_GREEN: 90
99+
MINIMUM_ORANGE: 60
100+
101+
- name: Run unit tests with lowest direct dependencies
102+
if: github.event_name != 'push'
103+
run: |
104+
hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt
105+
hatch -e test env run -- uv pip install -r requirements_lowest_direct.txt
106+
hatch run test:unit
107+
108+
- name: Nightly - run unit tests with Haystack main branch
109+
if: github.event_name == 'schedule'
110+
run: |
111+
hatch env prune
112+
hatch -e test env run -- uv pip install git+https://github.com/deepset-ai/haystack.git@main
113+
hatch run test:unit
114+
115+
notify-slack-on-failure:
116+
needs: run
117+
if: failure() && github.event_name == 'schedule'
118+
runs-on: ubuntu-latest
119+
steps:
120+
- uses: deepset-ai/notify-slack-action@3cda73b77a148f16f703274198e7771340cf862b # v1
121+
with:
122+
slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL_NOTIFICATIONS }}

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta
4141
| [cometapi-haystack](integrations/cometapi/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/cometapi-haystack.svg)](https://pypi.org/project/cometapi-haystack) | [![Test / cometapi](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/cometapi.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/cometapi.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-cometapi/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-cometapi/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-cometapi-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-cometapi-combined/htmlcov/index.html) |
4242
| [deepeval-haystack](integrations/deepeval/) | Evaluator | [![PyPI - Version](https://img.shields.io/pypi/v/deepeval-haystack.svg)](https://pypi.org/project/deepeval-haystack) | [![Test / deepeval](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/deepeval.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/deepeval.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-deepeval/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-deepeval/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-deepeval-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-deepeval-combined/htmlcov/index.html) |
4343
| [docling-haystack](integrations/docling/) | Converter | [![PyPI - Version](https://img.shields.io/pypi/v/docling-haystack.svg)](https://pypi.org/project/docling-haystack) | [![Test / docling](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/docling.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/docling.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-docling/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-docling/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-docling-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-docling-combined/htmlcov/index.html) |
44+
| [docling-serve-haystack](integrations/docling_serve/) | Converter | [![PyPI - Version](https://img.shields.io/pypi/v/docling-serve-haystack.svg)](https://pypi.org/project/docling-serve-haystack) | [![Test / docling_serve](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/docling_serve.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/docling_serve.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-docling_serve/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-docling_serve/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-docling_serve-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-docling_serve-combined/htmlcov/index.html) |
4445
| [dspy-haystack](integrations/dspy/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/dspy-haystack.svg)](https://pypi.org/project/dspy-haystack) | [![Test / dspy](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/dspy.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/dspy.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-dspy/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-dspy/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-dspy-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-dspy-combined/htmlcov/index.html) |
4546
| [e2b-haystack](integrations/e2b/) | Tool | [![PyPI - Version](https://img.shields.io/pypi/v/e2b-haystack.svg)](https://pypi.org/project/e2b-haystack) | [![Test / e2b](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/e2b.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/e2b.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-e2b/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-e2b/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-e2b-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-e2b-combined/htmlcov/index.html) |
4647
| [elasticsearch-haystack](integrations/elasticsearch/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/elasticsearch-haystack.svg)](https://pypi.org/project/elasticsearch-haystack) | [![Test / elasticsearch](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-elasticsearch/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-elasticsearch/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-elasticsearch-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-elasticsearch-combined/htmlcov/index.html) |
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# docling-serve-haystack
2+
3+
[![PyPI - Version](https://img.shields.io/pypi/v/docling-serve-haystack.svg)](https://pypi.org/project/docling-serve-haystack)
4+
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling-serve-haystack.svg)](https://pypi.org/project/docling-serve-haystack)
5+
6+
- [Changelog](https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/docling_serve/CHANGELOG.md)
7+
8+
---
9+
10+
Refer to the general [Contribution Guidelines](https://github.com/deepset-ai/haystack-core-integrations/blob/main/CONTRIBUTING.md).
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
loaders:
2+
- modules:
3+
- haystack_integrations.components.converters.docling_serve.converter
4+
search_path: [../src]
5+
processors:
6+
- type: filter
7+
documented_only: true
8+
skip_empty_modules: true
9+
renderer:
10+
description: DoclingServe integration for Haystack
11+
id: integrations-docling-serve
12+
filename: docling_serve.md
13+
title: DoclingServe
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
[build-system]
2+
requires = ["hatchling", "hatch-vcs"]
3+
build-backend = "hatchling.build"
4+
5+
[project]
6+
name = "docling-serve-haystack"
7+
dynamic = ["version"]
8+
description = "Haystack converter component for DoclingServe — document conversion via HTTP"
9+
readme = "README.md"
10+
requires-python = ">=3.10"
11+
license = "Apache-2.0"
12+
keywords = ["Haystack", "Docling", "DoclingServe", "document conversion", "PDF", "OCR"]
13+
authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }]
14+
classifiers = [
15+
"License :: OSI Approved :: Apache Software License",
16+
"Development Status :: 4 - Beta",
17+
"Programming Language :: Python",
18+
"Programming Language :: Python :: 3.10",
19+
"Programming Language :: Python :: 3.11",
20+
"Programming Language :: Python :: 3.12",
21+
"Programming Language :: Python :: 3.13",
22+
"Programming Language :: Python :: 3.14",
23+
"Programming Language :: Python :: Implementation :: CPython",
24+
"Programming Language :: Python :: Implementation :: PyPy",
25+
]
26+
dependencies = [
27+
"haystack-ai>=2.9.0",
28+
"httpx>=0.27.0",
29+
]
30+
31+
[project.urls]
32+
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/docling_serve#readme"
33+
Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues"
34+
Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/docling_serve"
35+
36+
[tool.hatch.build.targets.wheel]
37+
packages = ["src/haystack_integrations"]
38+
39+
[tool.hatch.version]
40+
source = "vcs"
41+
tag-pattern = 'integrations\/docling_serve-v(?P<version>.*)'
42+
43+
[tool.hatch.version.raw-options]
44+
root = "../.."
45+
git_describe_command = 'git describe --tags --match="integrations/docling_serve-v[0-9]*"'
46+
47+
[tool.hatch.envs.default]
48+
installer = "uv"
49+
dependencies = ["haystack-pydoc-tools", "ruff"]
50+
51+
[tool.hatch.envs.default.scripts]
52+
docs = ["haystack-pydoc pydoc/config_docusaurus.yml"]
53+
fmt = "ruff check --fix {args}; ruff format {args}"
54+
fmt-check = "ruff check {args} && ruff format --check {args}"
55+
56+
[tool.hatch.envs.test]
57+
dependencies = [
58+
"pytest",
59+
"pytest-asyncio",
60+
"pytest-cov",
61+
"pytest-rerunfailures",
62+
"mypy",
63+
"pip",
64+
]
65+
66+
[tool.hatch.envs.test.scripts]
67+
unit = 'pytest -m "not integration" {args:tests}'
68+
integration = 'pytest -m "integration" {args:tests}'
69+
all = 'pytest {args:tests}'
70+
unit-cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x -m "not integration" {args:tests}'
71+
integration-cov-append-retry = 'pytest --cov=haystack_integrations --cov-append --reruns 3 --reruns-delay 30 -x -m "integration" {args:tests}'
72+
types = "mypy -p haystack_integrations.components.converters.docling_serve {args}"
73+
74+
[tool.mypy]
75+
install_types = true
76+
non_interactive = true
77+
check_untyped_defs = true
78+
disallow_incomplete_defs = true
79+
80+
[[tool.mypy.overrides]]
81+
module = ["httpx"]
82+
ignore_missing_imports = true
83+
84+
[tool.ruff]
85+
line-length = 120
86+
87+
[tool.ruff.lint]
88+
select = [
89+
"A",
90+
"ANN",
91+
"ARG",
92+
"B",
93+
"C",
94+
"D102",
95+
"D103",
96+
"D205",
97+
"D209",
98+
"D213",
99+
"D417",
100+
"D419",
101+
"DTZ",
102+
"E",
103+
"EM",
104+
"F",
105+
"I",
106+
"ICN",
107+
"ISC",
108+
"N",
109+
"PLC",
110+
"PLE",
111+
"PLR",
112+
"PLW",
113+
"Q",
114+
"RUF",
115+
"S",
116+
"T",
117+
"TID",
118+
"UP",
119+
"W",
120+
"YTT",
121+
]
122+
ignore = [
123+
"B027",
124+
"B008",
125+
"S105",
126+
"S106",
127+
"S107",
128+
"C901",
129+
"PLR0911",
130+
"PLR0912",
131+
"PLR0913",
132+
"PLR0915",
133+
"ANN401",
134+
]
135+
136+
[tool.ruff.lint.isort]
137+
known-first-party = ["haystack_integrations"]
138+
139+
[tool.ruff.lint.flake8-tidy-imports]
140+
ban-relative-imports = "parents"
141+
142+
[tool.ruff.lint.per-file-ignores]
143+
"tests/**/*" = ["PLR2004", "S101", "TID252", "D", "ANN"]
144+
145+
[tool.coverage.run]
146+
source = ["haystack_integrations"]
147+
branch = true
148+
relative_files = true
149+
parallel = false
150+
151+
[tool.coverage.report]
152+
omit = ["*/tests/*", "*/__init__.py"]
153+
show_missing = true
154+
exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
155+
156+
[tool.pytest.ini_options]
157+
addopts = "--strict-markers"
158+
markers = [
159+
"integration: integration tests",
160+
]
161+
log_cli = true
162+
asyncio_mode = "auto"
163+
asyncio_default_fixture_loop_scope = "function"
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
from haystack_integrations.components.converters.docling_serve.converter import DoclingServeConverter, ExportType
6+
7+
__all__ = ["DoclingServeConverter", "ExportType"]

0 commit comments

Comments
 (0)