@@ -257,6 +257,74 @@ def mock_invoke_model(*args, **kwargs):
257257 assert doc .content == docs [i ].content
258258 assert doc .embedding == [0.1 , 0.2 , 0.3 ]
259259
260+ def test_run_cohere_does_not_modify_original_documents (self , mock_boto3_session ):
261+ embedder = AmazonBedrockDocumentEmbedder (model = "cohere.embed-english-v3" )
262+
263+ original_docs = [
264+ Document (content = "test 1" , id = "doc1" ),
265+ Document (content = "test 2" , id = "doc2" ),
266+ ]
267+
268+ # Store original IDs to verify they're the same objects
269+ original_doc_ids = [id (doc ) for doc in original_docs ]
270+ original_embeddings = [doc .embedding for doc in original_docs ]
271+
272+ with patch .object (embedder , "_client" ) as mock_client :
273+ mock_client .invoke_model .return_value = {
274+ "body" : io .StringIO ('{"embeddings": [[0.1, 0.2], [0.3, 0.4]]}' ),
275+ }
276+
277+ result = embedder .run (documents = original_docs )
278+
279+ # Verify originals are unchanged
280+ assert all (doc .embedding is None for doc in original_docs )
281+ assert original_embeddings == [None , None ]
282+
283+ # Verify returned documents are NEW instances
284+ returned_doc_ids = [id (doc ) for doc in result ["documents" ]]
285+ assert original_doc_ids != returned_doc_ids
286+
287+ # Verify returned documents have embeddings
288+ assert result ["documents" ][0 ].embedding == [0.1 , 0.2 ]
289+ assert result ["documents" ][1 ].embedding == [0.3 , 0.4 ]
290+ assert result ["documents" ][0 ].content == "test 1"
291+ assert result ["documents" ][1 ].content == "test 2"
292+
293+ def test_run_titan_does_not_modify_original_documents (self , mock_boto3_session ):
294+ embedder = AmazonBedrockDocumentEmbedder (model = "amazon.titan-embed-text-v1" )
295+
296+ original_docs = [
297+ Document (content = "test 1" , id = "doc1" ),
298+ Document (content = "test 2" , id = "doc2" ),
299+ ]
300+
301+ # Store original IDs to verify they're the same objects
302+ original_doc_ids = [id (doc ) for doc in original_docs ]
303+ original_embeddings = [doc .embedding for doc in original_docs ]
304+
305+ with patch .object (embedder , "_client" ) as mock_client :
306+ # Titan returns one embedding at a time
307+ mock_client .invoke_model .side_effect = [
308+ {"body" : io .StringIO ('{"embedding": [0.1, 0.2]}' )},
309+ {"body" : io .StringIO ('{"embedding": [0.3, 0.4]}' )},
310+ ]
311+
312+ result = embedder .run (documents = original_docs )
313+
314+ # Verify originals are unchanged
315+ assert all (doc .embedding is None for doc in original_docs )
316+ assert original_embeddings == [None , None ]
317+
318+ # Verify returned documents are NEW instances
319+ returned_doc_ids = [id (doc ) for doc in result ["documents" ]]
320+ assert original_doc_ids != returned_doc_ids
321+
322+ # Verify returned documents have embeddings
323+ assert result ["documents" ][0 ].embedding == [0.1 , 0.2 ]
324+ assert result ["documents" ][1 ].embedding == [0.3 , 0.4 ]
325+ assert result ["documents" ][0 ].content == "test 1"
326+ assert result ["documents" ][1 ].content == "test 2"
327+
260328 @pytest .mark .integration
261329 @pytest .mark .skipif (
262330 not os .getenv ("AWS_ACCESS_KEY_ID" )
0 commit comments