Skip to content

Commit bae9255

Browse files
committed
Align to docling datamodel changes
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
1 parent 852592f commit bae9255

8 files changed

Lines changed: 56 additions & 31 deletions

File tree

docling_jobkit/convert/chunking.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,9 @@
5656
DoclingTaskResult,
5757
DocumentResultItem,
5858
ExportDocumentResponse,
59-
ExportResult,
6059
RemoteTargetResult,
6160
ResultType,
6261
ZipArchiveResult,
63-
_to_export_result,
6462
)
6563
from docling_jobkit.datamodel.task import Task
6664
from docling_jobkit.public_errors import render_public_error_list
@@ -355,7 +353,7 @@ def process_chunkable_results(
355353
# We have some results, let's prepare the response
356354
task_result: ResultType
357355
chunks: list[ChunkedDocumentResultItem] = []
358-
documents: list[ExportResult] = []
356+
documents: list[DocumentResultItem] = []
359357
num_succeeded = 0
360358
num_failed = 0
361359
docs_succeeded: list[SucceededDocsItem] = []
@@ -477,17 +475,16 @@ def process_chunkable_results(
477475
else:
478476
doc_content = ExportDocumentResponse(filename=filename)
479477

480-
doc_result = _to_export_result(
481-
DocumentResultItem(
482-
document=doc_content,
483-
status=exportable_document.status,
484-
timings=exportable_document.timings,
485-
errors=errors,
486-
)
478+
doc_result = DocumentResultItem(
479+
document=doc_content,
480+
status=exportable_document.status,
481+
timings=exportable_document.timings,
482+
errors=errors,
487483
)
488484

489485
documents.append(doc_result)
490486
num_total = num_succeeded + num_failed
487+
# Task-level wall clock elapsed time across the whole request.
491488
processing_time = time.monotonic() - start_time
492489
_log.info(
493490
f"Processed {num_total} docs generating {len(chunks)} chunks in {processing_time:.2f} seconds."

docling_jobkit/convert/results.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
RemoteTargetResult,
4343
ResultType,
4444
ZipArchiveResult,
45-
_to_export_result,
4645
)
4746
from docling_jobkit.datamodel.task import Task
4847
from docling_jobkit.public_errors import render_public_error_list
@@ -484,6 +483,7 @@ def process_exportable_results(
484483
)
485484

486485
exportable_documents = documents_list
486+
# Task-level wall clock elapsed time across the whole request.
487487
processing_time = time.monotonic() - start_time
488488

489489
_log.info(
@@ -533,13 +533,11 @@ def process_exportable_results(
533533
image_mode=conversion_options.image_export_mode,
534534
md_page_break_placeholder=conversion_options.md_page_break_placeholder,
535535
)
536-
task_result = _to_export_result(
537-
DocumentResultItem(
538-
document=content,
539-
status=exportable_document.status,
540-
errors=exportable_document.errors,
541-
timings=exportable_document.timings,
542-
)
536+
task_result = DocumentResultItem(
537+
document=content,
538+
status=exportable_document.status,
539+
errors=exportable_document.errors,
540+
timings=exportable_document.timings,
543541
)
544542

545543
num_succeeded = 1 if _is_exportable_status(exportable_document.status) else 0

docling_jobkit/datamodel/result.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
ExportDocumentResponse,
99
ExportResult,
1010
PresignedArtifactResult,
11+
PresignedUrlConvertDocumentResponse,
1112
PresignedUrlConvertResponse,
1213
RemoteTargetResult,
1314
ResultType,
@@ -25,6 +26,7 @@
2526
"ExportDocumentResponse",
2627
"ExportResult",
2728
"PresignedArtifactResult",
29+
"PresignedUrlConvertDocumentResponse",
2830
"PresignedUrlConvertResponse",
2931
"RemoteTargetResult",
3032
"ResultType",

tests/test_chunking.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
from docling_jobkit.datamodel.result import (
2323
ChunkedDocumentResult,
2424
ChunkedDocumentResultItem,
25+
DocumentResultItem,
2526
ExportDocumentResponse,
26-
ExportResult,
2727
ZipArchiveResult,
2828
)
2929
from docling_jobkit.datamodel.task import Task
@@ -110,8 +110,8 @@ def test_chunked_response_creation(self):
110110
response = ChunkedDocumentResult(
111111
chunks=[],
112112
documents=[
113-
ExportResult(
114-
content=ExportDocumentResponse(filename="file.pdf"),
113+
DocumentResultItem(
114+
document=ExportDocumentResponse(filename="file.pdf"),
115115
status=ConversionStatus.SUCCESS,
116116
)
117117
],
@@ -138,8 +138,8 @@ def test_chunked_response_with_chunks(self):
138138
response = ChunkedDocumentResult(
139139
chunks=[chunk],
140140
documents=[
141-
ExportResult(
142-
content=ExportDocumentResponse(filename="file.pdf"),
141+
DocumentResultItem(
142+
document=ExportDocumentResponse(filename="file.pdf"),
143143
status=ConversionStatus.SUCCESS,
144144
)
145145
],
@@ -206,8 +206,8 @@ def test_export_chunking_result(self):
206206
),
207207
]
208208
documents = [
209-
ExportResult(
210-
content=ExportDocumentResponse(filename="doc1.pdf"),
209+
DocumentResultItem(
210+
document=ExportDocumentResponse(filename="doc1.pdf"),
211211
status=ConversionStatus.SUCCESS,
212212
),
213213
]

tests/test_local_orchestrator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ async def test_chunk_file(
418418

419419
assert len(task_result.result.documents) == 1
420420
assert (
421-
task_result.result.documents[0].content.json_content is None
421+
task_result.result.documents[0].document.json_content is None
422422
) # by default no document
423423
assert len(task_result.result.chunks) > 1
424424

tests/test_rq_orchestrator.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,10 @@ async def test_chunk_file(orchestrator: RQOrchestrator, include_converted_doc: b
171171

172172
if include_converted_doc:
173173
DoclingDocument.model_validate(
174-
task_result.result.documents[0].content.json_content
174+
task_result.result.documents[0].document.json_content
175175
)
176176
else:
177-
task_result.result.documents[0].content.json_content is None
177+
task_result.result.documents[0].document.json_content is None
178178

179179

180180
@pytest.mark.asyncio
@@ -208,7 +208,7 @@ async def test_delete_task_cleans_up_job(orchestrator: RQOrchestrator):
208208
# (normally this would be done by the worker)
209209
result_key = f"{orchestrator.config.results_prefix}:{task.task_id}"
210210
mock_result = ExportResult(
211-
content=ExportDocumentResponse(filename="test.pdf"),
211+
document=ExportDocumentResponse(filename="test.pdf"),
212212
status=ConversionStatus.SUCCESS,
213213
)
214214
packed = msgpack.packb(mock_result.model_dump(), use_bin_type=True)

tests/test_service_shims.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from docling.datamodel.base_models import ConversionStatus
12
from docling.datamodel.service.chunking import (
23
BaseChunkerOptions as SharedBaseChunkerOptions,
34
)
@@ -11,7 +12,10 @@
1112
DoclingTaskResult as SharedDoclingTaskResult,
1213
DocumentArtifactItem as SharedDocumentArtifactItem,
1314
DocumentResultItem as SharedDocumentResultItem,
15+
ExportDocumentResponse as SharedExportDocumentResponse,
16+
ExportResult as SharedExportResult,
1417
PresignedArtifactResult as SharedPresignedArtifactResult,
18+
PresignedUrlConvertDocumentResponse as SharedPresignedUrlConvertDocumentResponse,
1519
RemoteTargetResult as SharedRemoteTargetResult,
1620
ResultType as SharedResultType,
1721
ZipArchiveResult as SharedZipArchiveResult,
@@ -31,8 +35,10 @@
3135
DoclingTaskResult,
3236
DocumentArtifactItem,
3337
DocumentResultItem,
38+
ExportDocumentResponse,
3439
ExportResult,
3540
PresignedArtifactResult,
41+
PresignedUrlConvertDocumentResponse,
3642
RemoteTargetResult,
3743
ResultType,
3844
ZipArchiveResult,
@@ -69,11 +75,33 @@ def test_jobkit_result_models_are_shared_types():
6975
assert ArtifactRef is SharedArtifactRef
7076
assert DocumentArtifactItem is SharedDocumentArtifactItem
7177
assert DocumentResultItem is SharedDocumentResultItem
78+
assert ExportResult is SharedExportResult
7279
assert PresignedArtifactResult is SharedPresignedArtifactResult
80+
assert (
81+
PresignedUrlConvertDocumentResponse is SharedPresignedUrlConvertDocumentResponse
82+
)
7383

7484

7585
def test_shared_service_response_still_constructs_from_jobkit_result():
7686
assert (
77-
ExportResult.model_fields["content"].annotation
87+
DocumentResultItem.model_fields["document"].annotation
7888
is SharedConvertDocumentResponse.model_fields["document"].annotation
7989
)
90+
assert (
91+
DocumentResultItem.model_fields["document"].annotation
92+
is SharedExportDocumentResponse
93+
)
94+
assert DocumentResultItem.model_fields["document"].serialization_alias == "content"
95+
assert ExportResult is DocumentResultItem
96+
97+
98+
def test_document_result_item_serializes_document_to_legacy_content_field():
99+
item = DocumentResultItem(
100+
document=ExportDocumentResponse(filename="file.pdf"),
101+
status=ConversionStatus.SUCCESS,
102+
)
103+
104+
payload = item.model_dump(mode="json")
105+
106+
assert "document" not in payload
107+
assert payload["content"]["filename"] == "file.pdf"

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)