33# SPDX-License-Identifier: Apache-2.0
44
55import warnings
6+ from copy import deepcopy
67from dataclasses import replace
78
89import pytest
@@ -213,7 +214,7 @@ def test_from_dict():
213214 assert Document .from_dict ({}) == Document ()
214215
215216
216- def from_from_dict_with_parameters ():
217+ def test_from_dict_with_parameters ():
217218 blob_data = b"some bytes"
218219 assert Document .from_dict (
219220 {
@@ -234,6 +235,40 @@ def from_from_dict_with_parameters():
234235 )
235236
236237
238+ def test_from_dict_does_not_mutate_input ():
239+ blob_data = b"some bytes"
240+ data = {
241+ "content" : "test text" ,
242+ "blob" : {"data" : list (blob_data ), "mime_type" : "text/markdown" },
243+ "score" : 0.812 ,
244+ "embedding" : [0.1 , 0.2 , 0.3 ],
245+ "sparse_embedding" : {"indices" : [0 , 2 , 4 ], "values" : [0.1 , 0.2 , 0.3 ]},
246+ "date" : "10-10-2023" ,
247+ "type" : "article" ,
248+ }
249+ original_data = deepcopy (data )
250+
251+ assert Document .from_dict (data ) == Document (
252+ content = "test text" ,
253+ blob = ByteStream (blob_data , mime_type = "text/markdown" ),
254+ score = 0.812 ,
255+ embedding = [0.1 , 0.2 , 0.3 ],
256+ sparse_embedding = SparseEmbedding (indices = [0 , 2 , 4 ], values = [0.1 , 0.2 , 0.3 ]),
257+ meta = {"date" : "10-10-2023" , "type" : "article" },
258+ )
259+ assert data == original_data
260+
261+
262+ def test_from_dict_does_not_mutate_input_with_explicit_meta ():
263+ data = {"content" : "test text" , "meta" : {"date" : "10-10-2023" , "type" : "article" }, "score" : 0.812 }
264+ original_data = deepcopy (data )
265+
266+ assert Document .from_dict (data ) == Document (
267+ content = "test text" , meta = {"date" : "10-10-2023" , "type" : "article" }, score = 0.812
268+ )
269+ assert data == original_data
270+
271+
237272def test_from_dict_with_legacy_fields ():
238273 assert Document .from_dict (
239274 {
0 commit comments