Skip to content

Commit b3d3f95

Browse files
feat(docling-serve): add DoclingServeConverter integration
Adds a new DoclingServeConverter component that converts documents by sending them to a running docling-serve HTTP server. Supports local files, URLs, and ByteStreams; markdown, text, and JSON export formats; optional API key authentication; and both sync (run) and async (arun) execution. Closes #2960
1 parent 3599976 commit b3d3f95

4 files changed

Lines changed: 22 additions & 8 deletions

File tree

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
loaders:
2+
- modules:
3+
- haystack_integrations.components.converters.docling_serve.converter
4+
search_path: [../src]
5+
processors:
6+
- type: filter
7+
documented_only: true
8+
skip_empty_modules: true
9+
renderer:
10+
description: DoclingServe integration for Haystack
11+
id: integrations-docling-serve
12+
filename: docling_serve.md
13+
title: DoclingServe

integrations/docling_serve/src/haystack_integrations/components/converters/docling_serve/converter.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@ class ExportType(str, Enum):
3434
@component
3535
class DoclingServeConverter:
3636
"""
37-
Converts documents to Haystack Documents using a [DoclingServe](https://github.com/docling-project/docling-serve)
38-
server.
37+
Converts documents to Haystack Documents using a DoclingServe server.
38+
39+
See [DoclingServe](https://github.com/docling-project/docling-serve).
3940
4041
DoclingServe hosts Docling in a scalable HTTP server, supporting PDFs, Office documents, HTML, and many other
4142
formats. Unlike the local `DoclingConverter`, this component has no heavy ML dependencies — all processing

integrations/docling_serve/src/haystack_integrations/components/converters/py.typed

Whitespace-only changes.

integrations/docling_serve/tests/test_converter.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,9 @@
44

55
import json
66
import logging
7-
from pathlib import Path
87
from unittest.mock import AsyncMock, MagicMock, patch
98

109
import pytest
11-
from haystack import Document
1210
from haystack.dataclasses import ByteStream
1311
from haystack.utils import Secret
1412

@@ -82,10 +80,12 @@ def test_from_dict(self):
8280
assert converter.export_type == ExportType.JSON
8381
assert converter.timeout == 60.0
8482

85-
def test_to_dict_with_api_key(self):
86-
converter = DoclingServeConverter(api_key=Secret.from_token("test-key"))
83+
def test_to_dict_with_api_key(self, monkeypatch):
84+
monkeypatch.setenv("DOCLING_API_KEY", "test-key")
85+
converter = DoclingServeConverter(api_key=Secret.from_env_var("DOCLING_API_KEY"))
8786
data = converter.to_dict()
8887
assert data["init_parameters"]["api_key"] is not None
88+
assert data["init_parameters"]["api_key"]["type"] == "env_var"
8989

9090
def test_roundtrip(self):
9191
converter = DoclingServeConverter(
@@ -207,7 +207,7 @@ def test_run_with_meta(self):
207207
def test_run_bytestream_meta_merged(self):
208208
converter = DoclingServeConverter()
209209
mock_resp = _mock_response("text")
210-
bs = ByteStream(data=b"bytes", meta={"file_path": "/tmp/doc.pdf"})
210+
bs = ByteStream(data=b"bytes", meta={"file_path": "doc.pdf"})
211211

212212
with patch("httpx.Client") as mock_client_cls:
213213
mock_client = MagicMock()
@@ -219,7 +219,7 @@ def test_run_bytestream_meta_merged(self):
219219
result = converter.run(sources=[bs], meta={"page": 1})
220220

221221
doc = result["documents"][0]
222-
assert doc.meta["file_path"] == "/tmp/doc.pdf"
222+
assert doc.meta["file_path"] == "doc.pdf"
223223
assert doc.meta["page"] == 1
224224

225225
def test_run_skips_on_http_error(self, caplog):

0 commit comments

Comments
 (0)