@@ -126,38 +126,6 @@ def __init__(
126126 self .created = 1234567890
127127
128128
129- class MockEmbeddingData :
130- def __init__ (self , embedding = None ):
131- self .embedding = embedding or [0.1 , 0.2 , 0.3 ]
132- self .index = 0
133- self .object = "embedding"
134-
135-
136- class MockEmbeddingResponse :
137- def __init__ (self , model = "text-embedding-ada-002" , data = None , usage = None ):
138- self .model = model
139- self .data = data or [MockEmbeddingData ()]
140- self .usage = usage or MockUsage (
141- prompt_tokens = 5 , completion_tokens = 0 , total_tokens = 5
142- )
143- self .object = "list"
144-
145- def model_dump (self ):
146- return {
147- "model" : self .model ,
148- "data" : [
149- {"embedding" : d .embedding , "index" : d .index , "object" : d .object }
150- for d in self .data
151- ],
152- "usage" : {
153- "prompt_tokens" : self .usage .prompt_tokens ,
154- "completion_tokens" : self .usage .completion_tokens ,
155- "total_tokens" : self .usage .total_tokens ,
156- },
157- "object" : self .object ,
158- }
159-
160-
161129@pytest .mark .parametrize (
162130 "send_default_pii, include_prompts" ,
163131 [
@@ -311,7 +279,13 @@ def test_streaming_chat_completion(
311279 assert span ["data" ][SPANDATA .GEN_AI_RESPONSE_STREAMING ] is True
312280
313281
314- def test_embeddings_create (sentry_init , capture_events , clear_litellm_cache ):
282+ def test_embeddings_create (
283+ sentry_init ,
284+ capture_events ,
285+ get_model_response ,
286+ openai_embedding_model_response ,
287+ clear_litellm_cache ,
288+ ):
315289 """
316290 Test that litellm.embedding() calls are properly instrumented.
317291
@@ -325,20 +299,24 @@ def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache):
325299 )
326300 events = capture_events ()
327301
328- mock_response = MockEmbeddingResponse ( )
302+ client = OpenAI ( api_key = "z" )
329303
330- # Mock within the test to ensure proper ordering with cache clearing
331- with mock .patch (
332- "litellm.openai_chat_completions.make_sync_openai_embedding_request"
333- ) as mock_http :
334- # The function returns (headers, response)
335- mock_http .return_value = ({}, mock_response )
304+ model_response = get_model_response (
305+ openai_embedding_model_response ,
306+ serialize_pydantic = True ,
307+ request_headers = {"X-Stainless-Raw-Response" : "True" },
308+ )
336309
310+ with mock .patch .object (
311+ client .embeddings ._client ._client ,
312+ "send" ,
313+ return_value = model_response ,
314+ ):
337315 with start_transaction (name = "litellm test" ):
338316 response = litellm .embedding (
339317 model = "text-embedding-ada-002" ,
340318 input = "Hello, world!" ,
341- api_key = "test-key" , # Provide a fake API key to avoid authentication errors
319+ client = client ,
342320 )
343321 # Allow time for callbacks to complete (they may run in separate threads)
344322 time .sleep (0.1 )
@@ -349,8 +327,13 @@ def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache):
349327 (event ,) = events
350328
351329 assert event ["type" ] == "transaction"
352- assert len (event ["spans" ]) == 1
353- (span ,) = event ["spans" ]
330+ spans = list (
331+ x
332+ for x in event ["spans" ]
333+ if x ["op" ] == OP .GEN_AI_EMBEDDINGS and x ["origin" ] == "auto.ai.litellm"
334+ )
335+ assert len (spans ) == 1
336+ span = spans [0 ]
354337
355338 assert span ["op" ] == OP .GEN_AI_EMBEDDINGS
356339 assert span ["description" ] == "embeddings text-embedding-ada-002"
@@ -363,7 +346,11 @@ def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache):
363346
364347
365348def test_embeddings_create_with_list_input (
366- sentry_init , capture_events , clear_litellm_cache
349+ sentry_init ,
350+ capture_events ,
351+ get_model_response ,
352+ openai_embedding_model_response ,
353+ clear_litellm_cache ,
367354):
368355 """Test embedding with list input."""
369356 sentry_init (
@@ -373,20 +360,24 @@ def test_embeddings_create_with_list_input(
373360 )
374361 events = capture_events ()
375362
376- mock_response = MockEmbeddingResponse ( )
363+ client = OpenAI ( api_key = "z" )
377364
378- # Mock within the test to ensure proper ordering with cache clearing
379- with mock .patch (
380- "litellm.openai_chat_completions.make_sync_openai_embedding_request"
381- ) as mock_http :
382- # The function returns (headers, response)
383- mock_http .return_value = ({}, mock_response )
365+ model_response = get_model_response (
366+ openai_embedding_model_response ,
367+ serialize_pydantic = True ,
368+ request_headers = {"X-Stainless-Raw-Response" : "True" },
369+ )
384370
371+ with mock .patch .object (
372+ client .embeddings ._client ._client ,
373+ "send" ,
374+ return_value = model_response ,
375+ ):
385376 with start_transaction (name = "litellm test" ):
386377 response = litellm .embedding (
387378 model = "text-embedding-ada-002" ,
388379 input = ["First text" , "Second text" , "Third text" ],
389- api_key = "test-key" , # Provide a fake API key to avoid authentication errors
380+ client = client ,
390381 )
391382 # Allow time for callbacks to complete (they may run in separate threads)
392383 time .sleep (0.1 )
@@ -397,8 +388,13 @@ def test_embeddings_create_with_list_input(
397388 (event ,) = events
398389
399390 assert event ["type" ] == "transaction"
400- assert len (event ["spans" ]) == 1
401- (span ,) = event ["spans" ]
391+ spans = list (
392+ x
393+ for x in event ["spans" ]
394+ if x ["op" ] == OP .GEN_AI_EMBEDDINGS and x ["origin" ] == "auto.ai.litellm"
395+ )
396+ assert len (spans ) == 1
397+ span = spans [0 ]
402398
403399 assert span ["op" ] == OP .GEN_AI_EMBEDDINGS
404400 assert span ["data" ][SPANDATA .GEN_AI_OPERATION_NAME ] == "embeddings"
@@ -411,7 +407,13 @@ def test_embeddings_create_with_list_input(
411407 ]
412408
413409
414- def test_embeddings_no_pii (sentry_init , capture_events , clear_litellm_cache ):
410+ def test_embeddings_no_pii (
411+ sentry_init ,
412+ capture_events ,
413+ get_model_response ,
414+ openai_embedding_model_response ,
415+ clear_litellm_cache ,
416+ ):
415417 """Test that PII is not captured when disabled."""
416418 sentry_init (
417419 integrations = [LiteLLMIntegration (include_prompts = True )],
@@ -420,20 +422,24 @@ def test_embeddings_no_pii(sentry_init, capture_events, clear_litellm_cache):
420422 )
421423 events = capture_events ()
422424
423- mock_response = MockEmbeddingResponse ( )
425+ client = OpenAI ( api_key = "z" )
424426
425- # Mock within the test to ensure proper ordering with cache clearing
426- with mock .patch (
427- "litellm.openai_chat_completions.make_sync_openai_embedding_request"
428- ) as mock_http :
429- # The function returns (headers, response)
430- mock_http .return_value = ({}, mock_response )
427+ model_response = get_model_response (
428+ openai_embedding_model_response ,
429+ serialize_pydantic = True ,
430+ request_headers = {"X-Stainless-Raw-Response" : "True" },
431+ )
431432
433+ with mock .patch .object (
434+ client .embeddings ._client ._client ,
435+ "send" ,
436+ return_value = model_response ,
437+ ):
432438 with start_transaction (name = "litellm test" ):
433439 response = litellm .embedding (
434440 model = "text-embedding-ada-002" ,
435441 input = "Hello, world!" ,
436- api_key = "test-key" , # Provide a fake API key to avoid authentication errors
442+ client = client ,
437443 )
438444 # Allow time for callbacks to complete (they may run in separate threads)
439445 time .sleep (0.1 )
@@ -444,8 +450,13 @@ def test_embeddings_no_pii(sentry_init, capture_events, clear_litellm_cache):
444450 (event ,) = events
445451
446452 assert event ["type" ] == "transaction"
447- assert len (event ["spans" ]) == 1
448- (span ,) = event ["spans" ]
453+ spans = list (
454+ x
455+ for x in event ["spans" ]
456+ if x ["op" ] == OP .GEN_AI_EMBEDDINGS and x ["origin" ] == "auto.ai.litellm"
457+ )
458+ assert len (spans ) == 1
459+ span = spans [0 ]
449460
450461 assert span ["op" ] == OP .GEN_AI_EMBEDDINGS
451462 # Check that embeddings input is NOT captured when PII is disabled
0 commit comments