77from unittest .mock import MagicMock
88
99import pytest
10+ from docling .chunking import HybridChunker
11+ from docling .document_converter import DocumentConverter
1012from docling_core .types .io import DocumentStream
11- from haystack .core .serialization import component_from_dict , component_to_dict
1213from haystack .dataclasses import ByteStream
1314
1415from haystack_integrations .components .converters .docling import (
@@ -134,8 +135,6 @@ def test_run_json_minimal() -> None:
134135
135136
136137def test_legacy_import_path () -> None :
137- import warnings
138-
139138 with warnings .catch_warnings (record = True ) as caught :
140139 warnings .simplefilter ("always" )
141140 from docling_haystack .converter import DoclingConverter as LegacyDoclingConverter
@@ -146,63 +145,59 @@ def test_legacy_import_path() -> None:
146145 )
147146
148147
149- def test_component_from_dict_legacy_nulls () -> None :
150- # Before the public-attribute refactor, default serialization couldn't find
151- # the _-prefixed attributes and fell back to the init defaults, so
152- # convert_kwargs and md_export_kwargs were always serialized as null.
153- # Verify that such a serialized dict still deserializes correctly.
154- legacy_data = {
148+ def test_component_to_dict_defaults () -> None :
149+ converter = DoclingConverter ()
150+ assert converter .to_dict () == {
155151 "type" : "haystack_integrations.components.converters.docling.converter.DoclingConverter" ,
156152 "init_parameters" : {
157153 "converter" : None ,
158- "convert_kwargs" : None ,
154+ "convert_kwargs" : {} ,
159155 "export_type" : "doc_chunks" ,
160- "md_export_kwargs" : None ,
156+ "md_export_kwargs" : { "image_placeholder" : "" } ,
161157 "chunker" : None ,
162158 "meta_extractor" : None ,
163159 },
164160 }
165- restored = component_from_dict (DoclingConverter , legacy_data , "docling_converter" )
166-
167- assert restored .convert_kwargs == {}
168- assert restored .md_export_kwargs == {"image_placeholder" : "" }
169- assert restored .export_type == ExportType .DOC_CHUNKS
170- assert restored .converter is None
171- assert restored .chunker is None
172- assert restored .meta_extractor is None
173-
174-
175- def test_component_to_dict_defaults () -> None :
176- converter = DoclingConverter ()
177- data = component_to_dict (converter , "docling_converter" )
178-
179- init_params = data ["init_parameters" ]
180- assert init_params ["converter" ] is None
181- assert init_params ["convert_kwargs" ] == {}
182- assert init_params ["export_type" ] == ExportType .DOC_CHUNKS
183- assert init_params ["md_export_kwargs" ] == {"image_placeholder" : "" }
184- assert init_params ["chunker" ] is None
185- assert init_params ["meta_extractor" ] is None
186161
187162
188163def test_component_to_dict_custom_params () -> None :
189164 converter = DoclingConverter (
165+ converter = DocumentConverter (),
190166 convert_kwargs = {"raises_on_error" : False },
191167 export_type = ExportType .MARKDOWN ,
192168 md_export_kwargs = {"image_placeholder" : "[img]" },
169+ meta_extractor = MetaExtractor (),
193170 )
194- data = component_to_dict (converter , "docling_converter" )
195-
196- init_params = data ["init_parameters" ]
197- assert init_params ["convert_kwargs" ] == {"raises_on_error" : False }
198- assert init_params ["export_type" ] == ExportType .MARKDOWN
199- assert init_params ["md_export_kwargs" ] == {"image_placeholder" : "[img]" }
171+ assert converter .to_dict () == {
172+ "type" : "haystack_integrations.components.converters.docling.converter.DoclingConverter" ,
173+ "init_parameters" : {
174+ "converter" : None ,
175+ "convert_kwargs" : {"raises_on_error" : False },
176+ "export_type" : "markdown" ,
177+ "md_export_kwargs" : {"image_placeholder" : "[img]" },
178+ "chunker" : None ,
179+ "meta_extractor" : {
180+ "type" : "haystack_integrations.components.converters.docling.converter.MetaExtractor" ,
181+ "data" : {},
182+ },
183+ },
184+ }
200185
201186
202187def test_component_from_dict_defaults () -> None :
203- converter = DoclingConverter ()
204- data = component_to_dict (converter , "docling_converter" )
205- restored = component_from_dict (DoclingConverter , data , "docling_converter" )
188+ # null kwargs mirror the pre-refactor serialization format and must still deserialize correctly
189+ data = {
190+ "type" : "haystack_integrations.components.converters.docling.converter.DoclingConverter" ,
191+ "init_parameters" : {
192+ "converter" : None ,
193+ "convert_kwargs" : None ,
194+ "export_type" : "doc_chunks" ,
195+ "md_export_kwargs" : None ,
196+ "chunker" : None ,
197+ "meta_extractor" : None ,
198+ },
199+ }
200+ restored = DoclingConverter .from_dict (data )
206201
207202 assert restored .converter is None
208203 assert restored .convert_kwargs == {}
@@ -213,17 +208,44 @@ def test_component_from_dict_defaults() -> None:
213208
214209
215210def test_component_from_dict_custom_params () -> None :
216- converter = DoclingConverter (
217- convert_kwargs = {"raises_on_error" : False },
218- export_type = ExportType .JSON ,
219- md_export_kwargs = {"image_placeholder" : "[img]" },
220- )
221- data = component_to_dict (converter , "docling_converter" )
222- restored = component_from_dict (DoclingConverter , data , "docling_converter" )
211+ data = {
212+ "type" : "haystack_integrations.components.converters.docling.converter.DoclingConverter" ,
213+ "init_parameters" : {
214+ "converter" : None ,
215+ "convert_kwargs" : {"raises_on_error" : False },
216+ "export_type" : "json" ,
217+ "md_export_kwargs" : {"image_placeholder" : "[img]" },
218+ "chunker" : None ,
219+ "meta_extractor" : {
220+ "type" : "haystack_integrations.components.converters.docling.converter.MetaExtractor" ,
221+ "data" : {},
222+ },
223+ },
224+ }
225+ restored = DoclingConverter .from_dict (data )
223226
227+ assert restored .converter is None
224228 assert restored .convert_kwargs == {"raises_on_error" : False }
225229 assert restored .export_type == ExportType .JSON
226230 assert restored .md_export_kwargs == {"image_placeholder" : "[img]" }
231+ assert restored .chunker is None
232+ assert isinstance (restored .meta_extractor , MetaExtractor )
233+
234+
235+ def test_component_to_dict_chunker_warns_and_is_dropped () -> None :
236+ converter = DoclingConverter (chunker = HybridChunker (merge_peers = False ))
237+
238+ assert converter .to_dict () == {
239+ "type" : "haystack_integrations.components.converters.docling.converter.DoclingConverter" ,
240+ "init_parameters" : {
241+ "converter" : None ,
242+ "convert_kwargs" : {},
243+ "export_type" : "doc_chunks" ,
244+ "md_export_kwargs" : {"image_placeholder" : "" },
245+ "chunker" : None ,
246+ "meta_extractor" : None ,
247+ },
248+ }
227249
228250
229251def test_run_with_sources_parameter () -> None :
0 commit comments